Adding rocprofilerv2
Change-Id: Ic0cc280ba207d2b8f6ccae1cd4ac3184152fc1ad
[ROCm/rocprofiler commit: 8032adb64f]
This commit is contained in:
@@ -0,0 +1,60 @@
|
||||
---
|
||||
Language: Cpp
|
||||
BasedOnStyle: Google
|
||||
AccessModifierOffset: -1
|
||||
ConstructorInitializerIndentWidth: 4
|
||||
AlignEscapedNewlinesLeft: false
|
||||
AlignTrailingComments: true
|
||||
AlignConsecutiveAssignments: false
|
||||
AlignOperands: false
|
||||
AllowAllParametersOfDeclarationOnNextLine: true
|
||||
AllowShortBlocksOnASingleLine: false
|
||||
AllowShortIfStatementsOnASingleLine: true
|
||||
AllowShortLoopsOnASingleLine: true
|
||||
AllowShortFunctionsOnASingleLine: All
|
||||
AlwaysBreakAfterDefinitionReturnType: false
|
||||
AlwaysBreakTemplateDeclarations: false
|
||||
AlwaysBreakBeforeMultilineStrings: true
|
||||
BreakBeforeBinaryOperators: false
|
||||
BreakBeforeTernaryOperators: true
|
||||
BreakConstructorInitializersBeforeComma: false
|
||||
BinPackParameters: true
|
||||
ColumnLimit: 100
|
||||
ConstructorInitializerAllOnOneLineOrOnePerLine: true
|
||||
ExperimentalAutoDetectBinPacking: false
|
||||
IndentCaseLabels: true
|
||||
IndentWrappedFunctionNames: false
|
||||
IndentFunctionDeclarationAfterType: false
|
||||
MaxEmptyLinesToKeep: 2
|
||||
KeepEmptyLinesAtTheStartOfBlocks: false
|
||||
NamespaceIndentation: None
|
||||
ObjCSpaceAfterProperty: false
|
||||
ObjCSpaceBeforeProtocolList: false
|
||||
PenaltyBreakBeforeFirstCallParameter: 1
|
||||
PenaltyBreakComment: 300
|
||||
PenaltyBreakString: 1000
|
||||
PenaltyBreakFirstLessLess: 120
|
||||
PenaltyExcessCharacter: 1000000
|
||||
PenaltyReturnTypeOnItsOwnLine: 200
|
||||
DerivePointerAlignment: false
|
||||
PointerAlignment: Left
|
||||
SpacesBeforeTrailingComments: 2
|
||||
Cpp11BracedListStyle: true
|
||||
Standard: Auto
|
||||
IndentWidth: 2
|
||||
TabWidth: 8
|
||||
UseTab: Never
|
||||
BreakBeforeBraces: Attach
|
||||
SpacesInParentheses: false
|
||||
SpacesInAngles: false
|
||||
SpaceInEmptyParentheses: false
|
||||
SpacesInCStyleCastParentheses: false
|
||||
SpacesInContainerLiterals: true
|
||||
SpaceBeforeAssignmentOperators: true
|
||||
ContinuationIndentWidth: 4
|
||||
CommentPragmas: '^ IWYU pragma:'
|
||||
ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ]
|
||||
SpaceBeforeParens: ControlStatements
|
||||
DisableFormat: false
|
||||
SortIncludes: false
|
||||
...
|
||||
@@ -0,0 +1,4 @@
|
||||
build
|
||||
compile_commands.json
|
||||
.cache
|
||||
.DS_Store
|
||||
@@ -20,18 +20,32 @@
|
||||
# THE SOFTWARE.
|
||||
################################################################################
|
||||
|
||||
cmake_minimum_required ( VERSION 2.8.12 )
|
||||
|
||||
## Verbose output.
|
||||
set ( CMAKE_VERBOSE_MAKEFILE TRUE CACHE BOOL "Verbose Output" FORCE )
|
||||
cmake_minimum_required ( VERSION 3.18.0 )
|
||||
|
||||
## Set module name and project name.
|
||||
set ( ROCPROFILER_NAME "rocprofiler" )
|
||||
set ( ROCPROFILER_TARGET "${ROCPROFILER_NAME}64" )
|
||||
set ( ROCPROFILER_LIBRARY "lib${ROCPROFILER_TARGET}" )
|
||||
project ( ${ROCPROFILER_NAME} )
|
||||
project(${ROCPROFILER_NAME} VERSION 2.0.0)
|
||||
|
||||
include(GNUInstallDirs)
|
||||
|
||||
# set default ROCM_PATH
|
||||
if(NOT DEFINED ROCM_PATH)
|
||||
set(ROCM_PATH "/opt/rocm" CACHE STRING "Default ROCM installation directory")
|
||||
endif()
|
||||
|
||||
## Build is not supported on Windows plaform
|
||||
if(WIN32)
|
||||
message(FATAL_ERROR "Windows build is not supported.")
|
||||
endif()
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
add_compile_options(-Wall)
|
||||
|
||||
set(THREADS_PREFER_PTHREAD_FLAG ON)
|
||||
|
||||
## Adding default path cmake modules
|
||||
list ( APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules" )
|
||||
## Include common cmake modules
|
||||
@@ -40,7 +54,7 @@ include ( utils )
|
||||
include ( env )
|
||||
|
||||
## Setup the package version.
|
||||
get_version ( "1.0.0" )
|
||||
get_version ( "2.0.0" )
|
||||
message ( "-- LIB-VERSION: ${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH}" )
|
||||
|
||||
set ( BUILD_VERSION_MAJOR ${VERSION_MAJOR} )
|
||||
@@ -68,11 +82,39 @@ set ( ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}" )
|
||||
set ( LIB_DIR "${ROOT_DIR}/src" )
|
||||
set ( TEST_DIR "${ROOT_DIR}/test" )
|
||||
|
||||
find_package(amd_comgr REQUIRED CONFIG HINTS ${CMAKE_INSTALL_PREFIX} PATHS ${ROCM_PATH} PATH_SUFFIXES lib/cmake/amd_comgr)
|
||||
MESSAGE(STATUS "Code Object Manager found at ${amd_comgr_DIR}.")
|
||||
link_libraries(amd_comgr)
|
||||
|
||||
find_package(Threads REQUIRED)
|
||||
find_package(hsa-runtime64 REQUIRED CONFIG HINTS ${CMAKE_INSTALL_PREFIX} PATHS ${ROCM_PATH})
|
||||
find_package(HIP REQUIRED CONFIG HINTS ${CMAKE_INSTALL_PREFIX} PATHS ${ROCM_PATH})
|
||||
|
||||
get_property(HSA_RUNTIME_INCLUDE_DIRECTORIES TARGET hsa-runtime64::hsa-runtime64 PROPERTY INTERFACE_INCLUDE_DIRECTORIES)
|
||||
find_file(HSA_H hsa.h
|
||||
PATHS ${HSA_RUNTIME_INCLUDE_DIRECTORIES}
|
||||
PATH_SUFFIXES hsa
|
||||
NO_DEFAULT_PATH
|
||||
REQUIRED)
|
||||
get_filename_component(HSA_RUNTIME_INC_PATH ${HSA_H} DIRECTORY)
|
||||
include_directories(${HSA_RUNTIME_INC_PATH})
|
||||
|
||||
if(NOT DEFINED LIBRARY_TYPE)
|
||||
set(LIBRARY_TYPE SHARED)
|
||||
endif()
|
||||
|
||||
## Enable tracing API
|
||||
if (NOT USE_PROF_API)
|
||||
set(USE_PROF_API 1)
|
||||
endif()
|
||||
|
||||
configure_file(rocprofv2 ${PROJECT_BINARY_DIR} COPYONLY)
|
||||
install(FILES
|
||||
${PROJECT_SOURCE_DIR}/rocprofv2
|
||||
DESTINATION ${CMAKE_INSTALL_BINDIR}
|
||||
PERMISSIONS OWNER_READ OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE
|
||||
COMPONENT runtime)
|
||||
|
||||
# Protocol header lookup
|
||||
set(PROF_API_HEADER_NAME prof_protocol.h)
|
||||
if(USE_PROF_API EQUAL 1)
|
||||
@@ -87,26 +129,27 @@ if(USE_PROF_API EQUAL 1)
|
||||
if(NOT PROF_API_HEADER_DIR)
|
||||
MESSAGE(FATAL_ERROR "Profiling API header not found. Tracer integration disabled. Use -DPROF_API_HEADER_PATH=<path to ${PROF_API_HEADER_NAME} header>")
|
||||
else()
|
||||
add_definitions(-DUSE_PROF_API=1)
|
||||
include_directories(${PROF_API_HEADER_DIR})
|
||||
MESSAGE(STATUS "Profiling API: ${PROF_API_HEADER_DIR}/${PROF_API_HEADER_NAME}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
## Build library
|
||||
include ( ${LIB_DIR}/CMakeLists.txt )
|
||||
## Build libraries
|
||||
add_subdirectory(src)
|
||||
|
||||
## Set the VERSION and SOVERSION values
|
||||
set_property ( TARGET ${TARGET_NAME} PROPERTY VERSION "${LIB_VERSION_STRING}" )
|
||||
set_property ( TARGET ${TARGET_NAME} PROPERTY SOVERSION "${LIB_VERSION_MAJOR}" )
|
||||
if(${LIBRARY_TYPE} STREQUAL SHARED)
|
||||
## Build samples
|
||||
add_subdirectory(samples)
|
||||
|
||||
## If the library is a release, strip the target library
|
||||
if ( "${CMAKE_BUILD_TYPE}" STREQUAL release )
|
||||
add_custom_command ( TARGET ${ROCPROFILER_TARGET} POST_BUILD COMMAND ${CMAKE_STRIP} *.so )
|
||||
endif ()
|
||||
## Build tests
|
||||
add_subdirectory(tests)
|
||||
endif()
|
||||
|
||||
## Build Plugins
|
||||
add_subdirectory(plugin)
|
||||
|
||||
## Build tests
|
||||
add_subdirectory ( ${TEST_DIR} ${PROJECT_BINARY_DIR}/test )
|
||||
add_subdirectory(${TEST_DIR} ${PROJECT_BINARY_DIR}/test)
|
||||
|
||||
## Installation and packaging
|
||||
set ( DEST_NAME ${ROCPROFILER_NAME} )
|
||||
@@ -130,18 +173,8 @@ message ( "CMake-install-prefix: ${CMAKE_INSTALL_PREFIX}" )
|
||||
message ( "CPack-install-prefix: ${CPACK_PACKAGING_INSTALL_PREFIX}" )
|
||||
message ( "-----------Dest-name: ${DEST_NAME}" )
|
||||
|
||||
## set components
|
||||
set ( CPACK_COMPONENTS_ALL runtime dev )
|
||||
## Enable Component Install
|
||||
set(CPACK_RPM_COMPONENT_INSTALL ON)
|
||||
set(CPACK_DEB_COMPONENT_INSTALL ON)
|
||||
|
||||
## Install libraries: Non versioned lib file in dev package
|
||||
install ( TARGETS ${ROCPROFILER_TARGET} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT dev NAMELINK_ONLY )
|
||||
install ( TARGETS ${ROCPROFILER_TARGET} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT runtime NAMELINK_SKIP )
|
||||
## Install headers
|
||||
install ( FILES
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/inc/rocprofiler.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/src/core/activity.h
|
||||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${DEST_NAME}
|
||||
COMPONENT dev )
|
||||
@@ -178,77 +211,114 @@ install ( FILES ${PROJECT_BINARY_DIR}/test/rocprof-ctrl DESTINATION ${CMAKE_INST
|
||||
COMPONENT runtime )
|
||||
|
||||
# File reorg Backward compatibility
|
||||
option(FILE_REORG_BACKWARD_COMPATIBILITY "Enable File Reorg with backward compatibility" ON)
|
||||
if(FILE_REORG_BACKWARD_COMPATIBILITY)
|
||||
include (rocprofiler-backward-compat.cmake)
|
||||
endif()
|
||||
# option(FILE_REORG_BACKWARD_COMPATIBILITY "Enable File Reorg with backward compatibility" ON)
|
||||
# if(FILE_REORG_BACKWARD_COMPATIBILITY)
|
||||
# include (rocprofiler-backward-compat.cmake)
|
||||
# endif()
|
||||
|
||||
## Packaging directives
|
||||
set ( CPACK_GENERATOR "DEB" "RPM" "TGZ" CACHE STRING "CPACK GENERATOR e.g. DEB;RPM" )
|
||||
set ( ENABLE_LDCONFIG ON CACHE BOOL "Set library links and caches using ldconfig.")
|
||||
set ( CPACK_PACKAGE_VENDOR "Advanced Micro Devices, Inc." )
|
||||
set ( CPACK_PACKAGE_VERSION_MAJOR ${BUILD_VERSION_MAJOR} )
|
||||
set ( CPACK_PACKAGE_VERSION_MINOR ${BUILD_VERSION_MINOR} )
|
||||
set ( CPACK_PACKAGE_VERSION_PATCH ${BUILD_VERSION_PATCH} )
|
||||
set ( CPACK_PACKAGE_VERSION "${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}" )
|
||||
set ( CPACK_PACKAGE_CONTACT "ROCm Profiler Support <dl.ROCm-Profiler.support@amd.com>" )
|
||||
set ( CPACK_PACKAGE_DESCRIPTION_SUMMARY "ROCPROFILER library for AMD HSA runtime API extension support" )
|
||||
set ( CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE" )
|
||||
if(${LIBRARY_TYPE} STREQUAL SHARED)
|
||||
# # Installation and packaging
|
||||
if(DEFINED CPACK_PACKAGING_INSTALL_PREFIX)
|
||||
get_filename_component(DEST_NAME ${CPACK_PACKAGING_INSTALL_PREFIX} NAME)
|
||||
get_filename_component(DEST_DIR ${CPACK_PACKAGING_INSTALL_PREFIX} DIRECTORY)
|
||||
set(CPACK_PACKAGING_INSTALL_PREFIX ${DEST_DIR})
|
||||
endif()
|
||||
|
||||
# Install license file
|
||||
install(FILES ${CPACK_RESOURCE_FILE_LICENSE} DESTINATION ${CMAKE_INSTALL_DOCDIR} COMPONENT runtime)
|
||||
message("-----------Dest-name: ${DEST_NAME}")
|
||||
message("------Install-prefix: ${CMAKE_INSTALL_PREFIX}")
|
||||
message("-----------CPACK-dir: ${CPACK_PACKAGING_INSTALL_PREFIX}")
|
||||
|
||||
if ( DEFINED ENV{ROCM_LIBPATCH_VERSION} )
|
||||
set ( CPACK_PACKAGE_VERSION "${CPACK_PACKAGE_VERSION}.$ENV{ROCM_LIBPATCH_VERSION}" )
|
||||
message ( "Using CPACK_PACKAGE_VERSION ${CPACK_PACKAGE_VERSION}" )
|
||||
endif()
|
||||
## Packaging directives
|
||||
set(CPACK_GENERATOR "DEB" "RPM" "TGZ" CACHE STRING "CPACK GENERATOR DEB;RPM")
|
||||
set(ENABLE_LDCONFIG ON CACHE BOOL "Set library links and caches using ldconfig.")
|
||||
set(CPACK_PACKAGE_NAME "${PROJECT_NAME}")
|
||||
set(CPACK_PACKAGE_VENDOR "Advanced Micro Devices, Inc.")
|
||||
set(CPACK_PACKAGE_VERSION_MAJOR ${PROJECT_VERSION_MAJOR})
|
||||
set(CPACK_PACKAGE_VERSION_MINOR ${PROJECT_VERSION_MINOR})
|
||||
set(CPACK_PACKAGE_VERSION_PATCH ${PROJECT_VERSION_PATCH})
|
||||
set(CPACK_PACKAGE_VERSION "${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}")
|
||||
set(CPACK_PACKAGE_CONTACT "ROCm Profiler Support <dl.ROCm-Profiler.support@amd.com>")
|
||||
set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "AMD ROCMTOOLS library")
|
||||
set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE")
|
||||
|
||||
## Debian package specific variables
|
||||
if ( DEFINED ENV{CPACK_DEBIAN_PACKAGE_RELEASE} )
|
||||
set ( CPACK_DEBIAN_PACKAGE_RELEASE $ENV{CPACK_DEBIAN_PACKAGE_RELEASE} )
|
||||
else()
|
||||
set ( CPACK_DEBIAN_PACKAGE_RELEASE "local" )
|
||||
endif()
|
||||
message ( "Using CPACK_DEBIAN_PACKAGE_RELEASE ${CPACK_DEBIAN_PACKAGE_RELEASE}" )
|
||||
set ( CPACK_DEBIAN_FILE_NAME "DEB-DEFAULT" )
|
||||
if(DEFINED ENV{ROCM_LIBPATCH_VERSION})
|
||||
set(CPACK_PACKAGE_VERSION "${CPACK_PACKAGE_VERSION}.$ENV{ROCM_LIBPATCH_VERSION}")
|
||||
message("Using CPACK_PACKAGE_VERSION ${CPACK_PACKAGE_VERSION}")
|
||||
endif()
|
||||
|
||||
# # Install license file
|
||||
install(FILES ${CPACK_RESOURCE_FILE_LICENSE}
|
||||
DESTINATION ${CMAKE_INSTALL_DOCDIR}
|
||||
COMPONENT runtime)
|
||||
|
||||
# # Debian package specific variables
|
||||
if(DEFINED ENV{CPACK_DEBIAN_PACKAGE_RELEASE})
|
||||
set(CPACK_DEBIAN_PACKAGE_RELEASE $ENV{CPACK_DEBIAN_PACKAGE_RELEASE})
|
||||
else()
|
||||
set(CPACK_DEBIAN_PACKAGE_RELEASE "local")
|
||||
endif()
|
||||
|
||||
message("Using CPACK_DEBIAN_PACKAGE_RELEASE ${CPACK_DEBIAN_PACKAGE_RELEASE}")
|
||||
set(CPACK_DEB_COMPONENT_INSTALL ON)
|
||||
set(CPACK_DEBIAN_FILE_NAME "DEB-DEFAULT")
|
||||
set(CPACK_DEBIAN_RUNTIME_PACKAGE_NAME "${PROJECT_NAME}")
|
||||
set(CPACK_DEBIAN_RUNTIME_PACKAGE_DEPENDS "hsa-rocr-dev, rocm-core")
|
||||
set(CPACK_DEBIAN_DEV_PACKAGE_NAME "${PROJECT_NAME}-dev" )
|
||||
set(CPACK_DEBIAN_DEV_PACKAGE_DEPENDS "${PROJECT_NAME}, hsa-rocr-dev, rocm-core" )
|
||||
set(CPACK_DEBIAN_TESTS_PACKAGE_NAME "${PROJECT_NAME}-tests")
|
||||
set(CPACK_DEBIAN_TESTS_PACKAGE_DEPENDS "${PROJECT_NAME}-dev, hsa-rocr-dev, rocm-core")
|
||||
set(CPACK_DEBIAN_SAMPLES_PACKAGE_NAME "${PROJECT_NAME}-samples")
|
||||
set(CPACK_DEBIAN_SAMPLES_PACKAGE_DEPENDS "${PROJECT_NAME}-dev, hsa-rocr-dev, rocm-core")
|
||||
set(CPACK_DEBIAN_DOCS_PACKAGE_NAME "${PROJECT_NAME}-docs")
|
||||
set(CPACK_DEBIAN_DOCS_PACKAGE_DEPENDS "${PROJECT_NAME}-dev, hsa-rocr-dev, rocm-core")
|
||||
set(CPACK_DEBIAN_PLUGINS_PACKAGE_NAME "${PROJECT_NAME}-plugins")
|
||||
set(CPACK_DEBIAN_PLUGINS_PACKAGE_DEPENDS "${PROJECT_NAME}, hsa-rocr-dev, rocm-core")
|
||||
|
||||
set ( CPACK_DEBIAN_RUNTIME_PACKAGE_NAME "${ROCPROFILER_NAME}" )
|
||||
set ( CPACK_DEBIAN_RUNTIME_PACKAGE_DEPENDS "hsa-rocr-dev, rocm-core" )
|
||||
set ( CPACK_DEBIAN_DEV_PACKAGE_NAME "${ROCPROFILER_NAME}-dev" )
|
||||
set ( CPACK_DEBIAN_DEV_PACKAGE_DEPENDS "${ROCPROFILER_NAME}, hsa-rocr-dev, rocm-core" )
|
||||
## Process the Debian install/remove scripts to update the CPACK variables
|
||||
configure_file ( ${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/postinst.in DEBIAN/postinst @ONLY )
|
||||
configure_file ( ${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/prerm.in DEBIAN/prerm @ONLY )
|
||||
set ( CPACK_DEBIAN_RUNTIME_PACKAGE_CONTROL_EXTRA "DEBIAN/postinst;DEBIAN/prerm" )
|
||||
|
||||
## RPM package specific variables
|
||||
if ( DEFINED ENV{CPACK_RPM_PACKAGE_RELEASE} )
|
||||
set ( CPACK_RPM_PACKAGE_RELEASE $ENV{CPACK_RPM_PACKAGE_RELEASE} )
|
||||
else()
|
||||
set ( CPACK_RPM_PACKAGE_RELEASE "local" )
|
||||
endif()
|
||||
message ( "Using CPACK_RPM_PACKAGE_RELEASE ${CPACK_RPM_PACKAGE_RELEASE}" )
|
||||
# # RPM package specific variables
|
||||
if(DEFINED ENV{CPACK_RPM_PACKAGE_RELEASE})
|
||||
set(CPACK_RPM_PACKAGE_RELEASE $ENV{CPACK_RPM_PACKAGE_RELEASE})
|
||||
else()
|
||||
set(CPACK_RPM_PACKAGE_RELEASE "local")
|
||||
endif()
|
||||
|
||||
set( CPACK_RPM_PACKAGE_LICENSE "MIT" )
|
||||
message("Using CPACK_RPM_PACKAGE_RELEASE ${CPACK_RPM_PACKAGE_RELEASE}")
|
||||
|
||||
## 'dist' breaks manual builds on debian systems due to empty Provides
|
||||
execute_process( COMMAND rpm --eval %{?dist}
|
||||
RESULT_VARIABLE PROC_RESULT
|
||||
OUTPUT_VARIABLE EVAL_RESULT
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE )
|
||||
message("RESULT_VARIABLE ${PROC_RESULT} OUTPUT_VARIABLE: ${EVAL_RESULT}")
|
||||
set(CPACK_RPM_PACKAGE_LICENSE "MIT")
|
||||
|
||||
if ( PROC_RESULT EQUAL "0" AND NOT EVAL_RESULT STREQUAL "" )
|
||||
string ( APPEND CPACK_RPM_PACKAGE_RELEASE "%{?dist}" )
|
||||
endif()
|
||||
set ( CPACK_RPM_FILE_NAME "RPM-DEFAULT" )
|
||||
# # 'dist' breaks manual builds on debian systems due to empty Provides
|
||||
execute_process(COMMAND rpm --eval %{?dist}
|
||||
RESULT_VARIABLE PROC_RESULT
|
||||
OUTPUT_VARIABLE EVAL_RESULT
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
message("RESULT_VARIABLE ${PROC_RESULT} OUTPUT_VARIABLE: ${EVAL_RESULT}")
|
||||
|
||||
if(PROC_RESULT EQUAL "0" AND NOT EVAL_RESULT STREQUAL "")
|
||||
string(APPEND CPACK_RPM_PACKAGE_RELEASE "%{?dist}")
|
||||
endif()
|
||||
|
||||
set(CPACK_RPM_COMPONENT_INSTALL ON)
|
||||
set(CPACK_RPM_FILE_NAME "RPM-DEFAULT")
|
||||
set(CPACK_RPM_RUNTIME_PACKAGE_NAME "${PROJECT_NAME}")
|
||||
set(CPACK_RPM_RUNTIME_PACKAGE_REQUIRES "hsa-rocr-dev, rocm-core")
|
||||
set(CPACK_RPM_DEV_PACKAGE_NAME "${PROJECT_NAME}-devel")
|
||||
set(CPACK_RPM_DEV_PACKAGE_REQUIRES "${PROJECT_NAME}, hsa-rocr-dev, rocm-core")
|
||||
set(CPACK_RPM_DEV_PACKAGE_PROVIDES "${PROJECT_NAME}-dev")
|
||||
set(CPACK_RPM_DEV_PACKAGE_OBSOLETES "${PROJECT_NAME}-dev")
|
||||
set(CPACK_RPM_TESTS_PACKAGE_NAME "${PROJECT_NAME}-tests")
|
||||
set(CPACK_RPM_TESTS_PACKAGE_REQUIRES "${PROJECT_NAME}-devel, hsa-rocr-dev, rocm-core")
|
||||
set(CPACK_RPM_SAMPLES_PACKAGE_NAME "${PROJECT_NAME}-samples")
|
||||
set(CPACK_RPM_SAMPLES_PACKAGE_REQUIRES "${PROJECT_NAME}-devel, hsa-rocr-dev, rocm-core")
|
||||
set(CPACK_RPM_DOCS_PACKAGE_NAME "${PROJECT_NAME}-docs")
|
||||
set(CPACK_RPM_DOCS_PACKAGE_REQUIRES "${PROJECT_NAME}-devel, hsa-rocr-dev, rocm-core")
|
||||
set(CPACK_RPM_PLUGINS_PACKAGE_NAME "${PROJECT_NAME}-plugins")
|
||||
set(CPACK_RPM_PLUGINS_PACKAGE_REQUIRES "${PROJECT_NAME}, hsa-rocr-dev, rocm-core")
|
||||
message("CPACK_RPM_PACKAGE_RELEASE: ${CPACK_RPM_PACKAGE_RELEASE}")
|
||||
|
||||
set ( CPACK_RPM_RUNTIME_PACKAGE_NAME "${ROCPROFILER_NAME}" )
|
||||
set ( CPACK_RPM_RUNTIME_PACKAGE_REQUIRES "hsa-rocr-devel, rocm-core" )
|
||||
set ( CPACK_RPM_DEV_PACKAGE_NAME "${ROCPROFILER_NAME}-devel" )
|
||||
set ( CPACK_RPM_DEV_PACKAGE_REQUIRES "${ROCPROFILER_NAME}, hsa-rocr-devel, rocm-core" )
|
||||
set ( CPACK_RPM_DEV_PACKAGE_PROVIDES "${ROCPROFILER_NAME}-dev" )
|
||||
set ( CPACK_RPM_DEV_PACKAGE_OBSOLETES "${ROCPROFILER_NAME}-dev" )
|
||||
## Process the Rpm install/remove scripts to update the CPACK variables
|
||||
configure_file ( "${CMAKE_CURRENT_SOURCE_DIR}/RPM/post.in" RPM/post @ONLY )
|
||||
configure_file ( "${CMAKE_CURRENT_SOURCE_DIR}/RPM/postun.in" RPM/postun @ONLY )
|
||||
@@ -256,11 +326,85 @@ configure_file ( "${CMAKE_CURRENT_SOURCE_DIR}/RPM/postun.in" RPM/postun @ONLY )
|
||||
set ( CPACK_RPM_RUNTIME_POST_INSTALL_SCRIPT_FILE "${CMAKE_CURRENT_BINARY_DIR}/RPM/post" )
|
||||
set ( CPACK_RPM_RUNTIME_POST_UNINSTALL_SCRIPT_FILE "${CMAKE_CURRENT_BINARY_DIR}/RPM/postun" )
|
||||
# Remove dependency on rocm-core if -DROCM_DEP_ROCMCORE=ON not given to cmake
|
||||
if(NOT ROCM_DEP_ROCMCORE)
|
||||
string(REGEX REPLACE ",? ?rocm-core" "" CPACK_RPM_RUNTIME_PACKAGE_REQUIRES ${CPACK_RPM_RUNTIME_PACKAGE_REQUIRES})
|
||||
string(REGEX REPLACE ",? ?rocm-core" "" CPACK_RPM_DEV_PACKAGE_REQUIRES ${CPACK_RPM_DEV_PACKAGE_REQUIRES})
|
||||
string(REGEX REPLACE ",? ?rocm-core" "" CPACK_DEBIAN_RUNTIME_PACKAGE_DEPENDS ${CPACK_DEBIAN_RUNTIME_PACKAGE_DEPENDS})
|
||||
string(REGEX REPLACE ",? ?rocm-core" "" CPACK_DEBIAN_DEV_PACKAGE_DEPENDS ${CPACK_DEBIAN_DEV_PACKAGE_DEPENDS})
|
||||
if(NOT ROCM_DEP_ROCMCORE)
|
||||
string(REGEX REPLACE ",? ?rocm-core" "" CPACK_RPM_RUNTIME_PACKAGE_REQUIRES ${CPACK_RPM_RUNTIME_PACKAGE_REQUIRES})
|
||||
string(REGEX REPLACE ",? ?rocm-core" "" CPACK_RPM_DEV_PACKAGE_REQUIRES ${CPACK_RPM_DEV_PACKAGE_REQUIRES})
|
||||
string(REGEX REPLACE ",? ?rocm-core" "" CPACK_RPM_TESTS_PACKAGE_REQUIRES ${CPACK_RPM_TESTS_PACKAGE_REQUIRES})
|
||||
string(REGEX REPLACE ",? ?rocm-core" "" CPACK_RPM_SAMPLES_PACKAGE_REQUIRES ${CPACK_RPM_SAMPLES_PACKAGE_REQUIRES})
|
||||
string(REGEX REPLACE ",? ?rocm-core" "" CPACK_RPM_DOCS_PACKAGE_REQUIRES ${CPACK_RPM_DOCS_PACKAGE_REQUIRES})
|
||||
string(REGEX REPLACE ",? ?rocm-core" "" CPACK_RPM_PLUGINS_PACKAGE_REQUIRES ${CPACK_RPM_PLUGINS_PACKAGE_REQUIRES})
|
||||
string(REGEX REPLACE ",? ?rocm-core" "" CPACK_DEBIAN_RUNTIME_PACKAGE_DEPENDS ${CPACK_DEBIAN_RUNTIME_PACKAGE_DEPENDS})
|
||||
string(REGEX REPLACE ",? ?rocm-core" "" CPACK_DEBIAN_DEV_PACKAGE_DEPENDS ${CPACK_DEBIAN_DEV_PACKAGE_DEPENDS})
|
||||
string(REGEX REPLACE ",? ?rocm-core" "" CPACK_DEBIAN_TESTS_PACKAGE_DEPENDS ${CPACK_DEBIAN_TESTS_PACKAGE_DEPENDS})
|
||||
string(REGEX REPLACE ",? ?rocm-core" "" CPACK_DEBIAN_SAMPLES_PACKAGE_DEPENDS ${CPACK_DEBIAN_SAMPLES_PACKAGE_DEPENDS})
|
||||
string(REGEX REPLACE ",? ?rocm-core" "" CPACK_DEBIAN_DOCS_PACKAGE_DEPENDS ${CPACK_DEBIAN_DOCS_PACKAGE_DEPENDS})
|
||||
string(REGEX REPLACE ",? ?rocm-core" "" CPACK_DEBIAN_PLUGINS_PACKAGE_DEPENDS ${CPACK_DEBIAN_PLUGINS_PACKAGE_DEPENDS})
|
||||
endif()
|
||||
|
||||
set(CPACK_COMPONENTS_ALL runtime dev tests docs samples plugins)
|
||||
include(CPack)
|
||||
|
||||
cpack_add_component(runtime
|
||||
DISPLAY_NAME "Runtime"
|
||||
DESCRIPTION "Dynamic libraries for the ROCProfiler")
|
||||
|
||||
cpack_add_component(dev
|
||||
DISPLAY_NAME "Development"
|
||||
DESCRIPTION "Development needed header files for ROCProfiler"
|
||||
DEPENDS runtime)
|
||||
|
||||
cpack_add_component(plugins
|
||||
DISPLAY_NAME "ROCProfile Plugins"
|
||||
DESCRIPTION "Plugins for handling ROCProfiler data output"
|
||||
DEPENDS runtime)
|
||||
|
||||
cpack_add_component(tests
|
||||
DISPLAY_NAME "Tests"
|
||||
DESCRIPTION "Tests for the ROCProfiler"
|
||||
DEPENDS dev)
|
||||
|
||||
cpack_add_component(samples
|
||||
DISPLAY_NAME "Samples"
|
||||
DESCRIPTION "Samples for the ROCProfiler"
|
||||
DEPENDS dev)
|
||||
|
||||
cpack_add_component(docs
|
||||
DISPLAY_NAME "Documentation"
|
||||
DESCRIPTION "Documentation for the ROCProfiler API"
|
||||
DEPENDS dev)
|
||||
endif()
|
||||
|
||||
include ( CPack )
|
||||
find_package(Doxygen)
|
||||
|
||||
if(DOXYGEN_FOUND)
|
||||
# # Set input and output files
|
||||
set(DOXYGEN_IN ${CMAKE_CURRENT_SOURCE_DIR}/doc/Doxyfile.in)
|
||||
set(DOXYGEN_OUT ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile)
|
||||
|
||||
# # Request to configure the file
|
||||
configure_file(${DOXYGEN_IN} ${DOXYGEN_OUT} @ONLY)
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/doc/html/index.html ${CMAKE_CURRENT_BINARY_DIR}/doc/latex/refman.pdf
|
||||
COMMAND ${DOXYGEN_EXECUTABLE} ${DOXYGEN_OUT}
|
||||
COMMAND make -C ${CMAKE_CURRENT_BINARY_DIR}/doc/latex pdf
|
||||
MAIN_DEPENDENCY ${DOXYGEN_OUT} ${DOXYGEN_IN}
|
||||
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/inc/rocprofiler.h ${CMAKE_CURRENT_SOURCE_DIR}/inc/rocprofiler_plugin.h
|
||||
COMMENT "Generating documentation")
|
||||
|
||||
add_custom_target(doc DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/doc/html/index.html
|
||||
${CMAKE_CURRENT_BINARY_DIR}/doc/latex/refman.pdf)
|
||||
|
||||
install(FILES
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/doc/latex/refman.pdf"
|
||||
DESTINATION ${CMAKE_INSTALL_DOCDIR}
|
||||
RENAME "${PROJECT_NAME}.pdf"
|
||||
OPTIONAL
|
||||
COMPONENT docs)
|
||||
|
||||
install(DIRECTORY
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/doc/html/"
|
||||
DESTINATION ${CMAKE_INSTALL_DATADIR}/html/${PROJECT_NAME}
|
||||
OPTIONAL
|
||||
COMPONENT docs)
|
||||
endif()
|
||||
|
||||
@@ -22,19 +22,54 @@
|
||||
# IN THE SOFTWARE.
|
||||
################################################################################
|
||||
|
||||
SRC_DIR=`dirname $0`
|
||||
TO_CLEAN=yes
|
||||
SRC_DIR=$(dirname "$0")
|
||||
COMPONENT="rocprofiler"
|
||||
ROCM_PATH="${ROCM_PATH:=/opt/rocm}"
|
||||
LD_RUNPATH_FLAG=" -Wl,--enable-new-dtags -Wl,--rpath,$ROCM_PATH/lib:$ROCM_PATH/lib64"
|
||||
|
||||
usage() {
|
||||
echo -e "ROCProfiler Build Script Usage:"
|
||||
echo -e "\nTo run ./run.sh PARAMs, PARAMs can be the following:"
|
||||
echo -e "-h | --help For showing this message"
|
||||
echo -e "-b | --build For compiling"
|
||||
echo -e "-cb | --clean-build For full clean build"
|
||||
echo -e "-act | --asan-clean-build For compiling with ASAN library attached"
|
||||
exit 1
|
||||
}
|
||||
|
||||
while [ 1 ] ; do
|
||||
if [[ "$1" = "-h" || "$1" = "--help" ]] ; then
|
||||
usage
|
||||
exit 1
|
||||
elif [[ "$1" = "-b" || "$1" = "--build" ]] ; then
|
||||
TO_CLEAN=no
|
||||
shift
|
||||
elif [[ "$1" = "-acb" || "$1" = "--asan-clean-build" ]] ; then
|
||||
ASAN=True TO_CLEAN=yes
|
||||
shift
|
||||
elif [[ "$1" = "-cb" || "$1" = "--clean-build" ]] ; then
|
||||
TO_CLEAN=yes
|
||||
shift
|
||||
elif [[ "$1" = "-"* || "$1" = "--"* ]] ; then
|
||||
echo -e "Wrong option \"$1\", Please use the following options:\n"
|
||||
usage
|
||||
exit 1
|
||||
else
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
umask 022
|
||||
|
||||
if [ -z "$ROCPROFILER_ROOT" ]; then ROCPROFILER_ROOT=$SRC_DIR; fi
|
||||
if [ -z "$BUILD_DIR" ] ; then BUILD_DIR=$SRC_DIR/build; fi
|
||||
if [ -z "$BUILD_TYPE" ] ; then BUILD_TYPE="release"; fi
|
||||
if [ -z "$BUILD_DIR" ] ; then BUILD_DIR=build; fi
|
||||
if [ -z "$BUILD_TYPE" ] ; then BUILD_TYPE="RelWithDebInfo"; fi
|
||||
if [ -z "$PACKAGE_ROOT" ] ; then PACKAGE_ROOT=$ROCM_PATH; fi
|
||||
if [ -z "$PACKAGE_PREFIX" ] ; then PACKAGE_PREFIX="${ROCM_PATH}/${COMPONENT}"; fi
|
||||
if [ -z "$PREFIX_PATH" ] ; then PREFIX_PATH="${ROCM_PATH}/include/hsa:${ROCM_PATH}"; fi
|
||||
if [ -z "$PREFIX_PATH" ] ; then PREFIX_PATH=$PACKAGE_ROOT; fi
|
||||
if [ -z "$HIP_VDI" ] ; then HIP_VDI=0; fi
|
||||
if [ -n "$ROCM_RPATH" ] ; then LD_RUNPATH_FLAG=" -Wl,--enable-new-dtags -Wl,--rpath,${ROCM_RPATH}"; fi
|
||||
if [ -z "$TO_CLEAN" ] ; then TO_CLEAN=yes; fi
|
||||
if [ -z "$ASAN" ] ; then ASAN=False; fi
|
||||
|
||||
ROCPROFILER_ROOT=$(cd $ROCPROFILER_ROOT && echo $PWD)
|
||||
|
||||
@@ -43,15 +78,14 @@ mkdir -p $BUILD_DIR
|
||||
pushd $BUILD_DIR
|
||||
|
||||
cmake \
|
||||
-DCMAKE_MODULE_PATH=$ROCPROFILER_ROOT/cmake_modules \
|
||||
-DCMAKE_EXPORT_COMPILE_COMMANDS=TRUE \
|
||||
-DCMAKE_BUILD_TYPE=$BUILD_TYPE \
|
||||
-DCMAKE_PREFIX_PATH="${PREFIX_PATH}" \
|
||||
-DCMAKE_INSTALL_PREFIX=$PACKAGE_ROOT \
|
||||
-DCPACK_PACKAGING_INSTALL_PREFIX=$PACKAGE_PREFIX \
|
||||
-DCPACK_GENERATOR="${CPACKGEN:-"DEB;RPM"}" \
|
||||
-DCMAKE_MODULE_PATH=$ROCM_PATH/hip/cmake \
|
||||
-DCMAKE_PREFIX_PATH="$PREFIX_PATH" \
|
||||
-DCMAKE_INSTALL_PREFIX="$PACKAGE_ROOT" \
|
||||
-DCMAKE_SHARED_LINKER_FLAGS="$LD_RUNPATH_FLAG" \
|
||||
$ROCPROFILER_ROOT
|
||||
|
||||
make -j
|
||||
make mytest
|
||||
make package
|
||||
|
||||
exit 0
|
||||
@@ -0,0 +1,60 @@
|
||||
---
|
||||
Language: Cpp
|
||||
BasedOnStyle: Google
|
||||
AccessModifierOffset: -1
|
||||
ConstructorInitializerIndentWidth: 4
|
||||
AlignEscapedNewlinesLeft: false
|
||||
AlignTrailingComments: true
|
||||
AlignConsecutiveAssignments: false
|
||||
AlignOperands: false
|
||||
AllowAllParametersOfDeclarationOnNextLine: true
|
||||
AllowShortBlocksOnASingleLine: false
|
||||
AllowShortIfStatementsOnASingleLine: true
|
||||
AllowShortLoopsOnASingleLine: true
|
||||
AllowShortFunctionsOnASingleLine: All
|
||||
AlwaysBreakAfterDefinitionReturnType: false
|
||||
AlwaysBreakTemplateDeclarations: false
|
||||
AlwaysBreakBeforeMultilineStrings: true
|
||||
BreakBeforeBinaryOperators: false
|
||||
BreakBeforeTernaryOperators: true
|
||||
BreakConstructorInitializersBeforeComma: false
|
||||
BinPackParameters: true
|
||||
ColumnLimit: 100
|
||||
ConstructorInitializerAllOnOneLineOrOnePerLine: true
|
||||
ExperimentalAutoDetectBinPacking: false
|
||||
IndentCaseLabels: true
|
||||
IndentWrappedFunctionNames: false
|
||||
IndentFunctionDeclarationAfterType: false
|
||||
MaxEmptyLinesToKeep: 2
|
||||
KeepEmptyLinesAtTheStartOfBlocks: false
|
||||
NamespaceIndentation: None
|
||||
ObjCSpaceAfterProperty: false
|
||||
ObjCSpaceBeforeProtocolList: false
|
||||
PenaltyBreakBeforeFirstCallParameter: 1
|
||||
PenaltyBreakComment: 300
|
||||
PenaltyBreakString: 1000
|
||||
PenaltyBreakFirstLessLess: 120
|
||||
PenaltyExcessCharacter: 1000000
|
||||
PenaltyReturnTypeOnItsOwnLine: 200
|
||||
DerivePointerAlignment: false
|
||||
PointerAlignment: Left
|
||||
SpacesBeforeTrailingComments: 2
|
||||
Cpp11BracedListStyle: true
|
||||
Standard: Auto
|
||||
IndentWidth: 2
|
||||
TabWidth: 8
|
||||
UseTab: Never
|
||||
BreakBeforeBraces: Attach
|
||||
SpacesInParentheses: false
|
||||
SpacesInAngles: false
|
||||
SpaceInEmptyParentheses: false
|
||||
SpacesInCStyleCastParentheses: false
|
||||
SpacesInContainerLiterals: true
|
||||
SpaceBeforeAssignmentOperators: true
|
||||
ContinuationIndentWidth: 4
|
||||
CommentPragmas: '^ IWYU pragma:'
|
||||
ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ]
|
||||
SpaceBeforeParens: ControlStatements
|
||||
DisableFormat: false
|
||||
SortIncludes: false
|
||||
...
|
||||
Executable
+91
@@ -0,0 +1,91 @@
|
||||
#!/bin/bash -e
|
||||
|
||||
################################################################################
|
||||
# Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to
|
||||
# deal in the Software without restriction, including without limitation the
|
||||
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
# sell copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
################################################################################
|
||||
|
||||
SRC_DIR=$(dirname "$0")
|
||||
COMPONENT="rocmtools"
|
||||
ROCM_PATH="${ROCM_PATH:=/opt/rocm}"
|
||||
LD_RUNPATH_FLAG=" -Wl,--enable-new-dtags -Wl,--rpath,$ROCM_PATH/lib:$ROCM_PATH/lib64"
|
||||
|
||||
usage() {
|
||||
echo -e "ROCMTools Build Script Usage:"
|
||||
echo -e "\nTo run ./run.sh PARAMs, PARAMs can be the following:"
|
||||
echo -e "-h | --help For showing this message"
|
||||
echo -e "-b | --build For compiling"
|
||||
echo -e "-cb | --clean-build For full clean build"
|
||||
echo -e "-act | --asan-clean-build For compiling with ASAN library attached"
|
||||
exit 1
|
||||
}
|
||||
|
||||
while [ 1 ] ; do
|
||||
if [[ "$1" = "-h" || "$1" = "--help" ]] ; then
|
||||
usage
|
||||
exit 1
|
||||
elif [[ "$1" = "-b" || "$1" = "--build" ]] ; then
|
||||
TO_CLEAN=no
|
||||
shift
|
||||
elif [[ "$1" = "-acb" || "$1" = "--asan-clean-build" ]] ; then
|
||||
ASAN=True TO_CLEAN=yes
|
||||
shift
|
||||
elif [[ "$1" = "-cb" || "$1" = "--clean-build" ]] ; then
|
||||
TO_CLEAN=yes
|
||||
shift
|
||||
elif [[ "$1" = "-"* || "$1" = "--"* ]] ; then
|
||||
echo -e "Wrong option \"$1\", Please use the following options:\n"
|
||||
usage
|
||||
exit 1
|
||||
else
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
umask 022
|
||||
|
||||
if [ -z "$ROCPROFILER_ROOT" ]; then ROCPROFILER_ROOT=$SRC_DIR; fi
|
||||
if [ -z "$BUILD_DIR" ] ; then BUILD_DIR=build; fi
|
||||
if [ -z "$BUILD_TYPE" ] ; then BUILD_TYPE="RelWithDebInfo"; fi
|
||||
if [ -z "$PACKAGE_ROOT" ] ; then PACKAGE_ROOT=$ROCM_PATH; fi
|
||||
if [ -z "$PREFIX_PATH" ] ; then PREFIX_PATH=$PACKAGE_ROOT; fi
|
||||
if [ -z "$HIP_VDI" ] ; then HIP_VDI=0; fi
|
||||
if [ -n "$ROCM_RPATH" ] ; then LD_RUNPATH_FLAG=" -Wl,--enable-new-dtags -Wl,--rpath,${ROCM_RPATH}"; fi
|
||||
if [ -z "$TO_CLEAN" ] ; then TO_CLEAN=yes; fi
|
||||
if [ -z "$ASAN" ] ; then ASAN=False; fi
|
||||
|
||||
ROCPROFILER_ROOT=$(cd $ROCPROFILER_ROOT && echo $PWD)
|
||||
|
||||
if [ "$TO_CLEAN" = "yes" ] ; then rm -rf $BUILD_DIR; fi
|
||||
mkdir -p $BUILD_DIR
|
||||
pushd $BUILD_DIR
|
||||
|
||||
cmake \
|
||||
-DCMAKE_EXPORT_COMPILE_COMMANDS=TRUE \
|
||||
-DCMAKE_BUILD_TYPE=$BUILD_TYPE \
|
||||
-DCMAKE_MODULE_PATH=$ROCM_PATH/hip/cmake \
|
||||
-DCMAKE_PREFIX_PATH="$PREFIX_PATH" \
|
||||
-DCMAKE_INSTALL_PREFIX="$PACKAGE_ROOT" \
|
||||
-DCMAKE_SHARED_LINKER_FLAGS="$LD_RUNPATH_FLAG" \
|
||||
$ROCPROFILER_ROOT
|
||||
|
||||
make -j
|
||||
|
||||
exit 0
|
||||
Filskillnaden har hållits tillbaka eftersom den är för stor
Load Diff
@@ -0,0 +1,92 @@
|
||||
# rocmtools
|
||||
|
||||
|
||||
|
||||
## Getting started
|
||||
|
||||
To make it easy for you to get started with GitLab, here's a list of recommended next steps.
|
||||
|
||||
Already a pro? Just edit this README.md and make it your own. Want to make it easy? [Use the template at the bottom](#editing-this-readme)!
|
||||
|
||||
## Add your files
|
||||
|
||||
- [ ] [Create](https://docs.gitlab.com/ee/user/project/repository/web_editor.html#create-a-file) or [upload](https://docs.gitlab.com/ee/user/project/repository/web_editor.html#upload-a-file) files
|
||||
- [ ] [Add files using the command line](https://docs.gitlab.com/ee/gitlab-basics/add-file.html#add-a-file-using-the-command-line) or push an existing Git repository with the following command:
|
||||
|
||||
```
|
||||
cd existing_repo
|
||||
git remote add origin http://gitlab1.amd.com/vinodtipparaju/rocmtools.git
|
||||
git branch -M main
|
||||
git push -uf origin main
|
||||
```
|
||||
|
||||
## Integrate with your tools
|
||||
|
||||
- [ ] [Set up project integrations](http://gitlab1.amd.com/vinodtipparaju/rocmtools/-/settings/integrations)
|
||||
|
||||
## Collaborate with your team
|
||||
|
||||
- [ ] [Invite team members and collaborators](https://docs.gitlab.com/ee/user/project/members/)
|
||||
- [ ] [Create a new merge request](https://docs.gitlab.com/ee/user/project/merge_requests/creating_merge_requests.html)
|
||||
- [ ] [Automatically close issues from merge requests](https://docs.gitlab.com/ee/user/project/issues/managing_issues.html#closing-issues-automatically)
|
||||
- [ ] [Enable merge request approvals](https://docs.gitlab.com/ee/user/project/merge_requests/approvals/)
|
||||
- [ ] [Automatically merge when pipeline succeeds](https://docs.gitlab.com/ee/user/project/merge_requests/merge_when_pipeline_succeeds.html)
|
||||
|
||||
## Test and Deploy
|
||||
|
||||
Use the built-in continuous integration in GitLab.
|
||||
|
||||
- [ ] [Get started with GitLab CI/CD](https://docs.gitlab.com/ee/ci/quick_start/index.html)
|
||||
- [ ] [Analyze your code for known vulnerabilities with Static Application Security Testing(SAST)](https://docs.gitlab.com/ee/user/application_security/sast/)
|
||||
- [ ] [Deploy to Kubernetes, Amazon EC2, or Amazon ECS using Auto Deploy](https://docs.gitlab.com/ee/topics/autodevops/requirements.html)
|
||||
- [ ] [Use pull-based deployments for improved Kubernetes management](https://docs.gitlab.com/ee/user/clusters/agent/)
|
||||
- [ ] [Set up protected environments](https://docs.gitlab.com/ee/ci/environments/protected_environments.html)
|
||||
|
||||
***
|
||||
|
||||
# Editing this README
|
||||
|
||||
When you're ready to make this README your own, just edit this file and use the handy template below (or feel free to structure it however you want - this is just a starting point!). Thank you to [makeareadme.com](https://www.makeareadme.com/) for this template.
|
||||
|
||||
## Suggestions for a good README
|
||||
Every project is different, so consider which of these sections apply to yours. The sections used in the template are suggestions for most open source projects. Also keep in mind that while a README can be too long and detailed, too long is better than too short. If you think your README is too long, consider utilizing another form of documentation rather than cutting out information.
|
||||
|
||||
## Name
|
||||
Choose a self-explaining name for your project.
|
||||
|
||||
## Description
|
||||
Let people know what your project can do specifically. Provide context and add a link to any reference visitors might be unfamiliar with. A list of Features or a Background subsection can also be added here. If there are alternatives to your project, this is a good place to list differentiating factors.
|
||||
|
||||
## Badges
|
||||
On some READMEs, you may see small images that convey metadata, such as whether or not all the tests are passing for the project. You can use Shields to add some to your README. Many services also have instructions for adding a badge.
|
||||
|
||||
## Visuals
|
||||
Depending on what you are making, it can be a good idea to include screenshots or even a video (you'll frequently see GIFs rather than actual videos). Tools like ttygif can help, but check out Asciinema for a more sophisticated method.
|
||||
|
||||
## Installation
|
||||
Within a particular ecosystem, there may be a common way of installing things, such as using Yarn, NuGet, or Homebrew. However, consider the possibility that whoever is reading your README is a novice and would like more guidance. Listing specific steps helps remove ambiguity and gets people to using your project as quickly as possible. If it only runs in a specific context like a particular programming language version or operating system or has dependencies that have to be installed manually, also add a Requirements subsection.
|
||||
|
||||
## Usage
|
||||
Use examples liberally, and show the expected output if you can. It's helpful to have inline the smallest example of usage that you can demonstrate, while providing links to more sophisticated examples if they are too long to reasonably include in the README.
|
||||
|
||||
## Support
|
||||
Tell people where they can go to for help. It can be any combination of an issue tracker, a chat room, an email address, etc.
|
||||
|
||||
## Roadmap
|
||||
If you have ideas for releases in the future, it is a good idea to list them in the README.
|
||||
|
||||
## Contributing
|
||||
State if you are open to contributions and what your requirements are for accepting them.
|
||||
|
||||
For people who want to make changes to your project, it's helpful to have some documentation on how to get started. Perhaps there is a script that they should run or some environment variables that they need to set. Make these steps explicit. These instructions could also be useful to your future self.
|
||||
|
||||
You can also document commands to lint the code or run tests. These steps help to ensure high code quality and reduce the likelihood that the changes inadvertently break something. Having instructions for running tests is especially helpful if it requires external setup, such as starting a Selenium server for testing in a browser.
|
||||
|
||||
## Authors and acknowledgment
|
||||
Show your appreciation to those who have contributed to the project.
|
||||
|
||||
## License
|
||||
For open source projects, say how it is licensed.
|
||||
|
||||
## Project status
|
||||
If you have run out of energy or time for your project, put a note at the top of the README saying that development has slowed down or stopped completely. Someone may choose to fork your project or volunteer to step in as a maintainer or owner, allowing your project to keep going. You can also make an explicit request for maintainers.
|
||||
Binary file not shown.
@@ -0,0 +1,28 @@
|
||||
# Try to find LIBDW
|
||||
#
|
||||
# Once found, this will define:
|
||||
# - LIBDW_FOUND - system has libelf
|
||||
# - LIBDW_INCLUDE_DIRS - the libelf include directory
|
||||
# - LIBDW_LIBRARIES - Link these to use libelf
|
||||
# - LIBDW_DEFINITIONS - Compiler switches required for using libelf
|
||||
find_path(FIND_LIBDW_INCLUDES
|
||||
NAMES
|
||||
elfutils/libdw.h
|
||||
PATHS
|
||||
/usr/include
|
||||
/usr/local/include)
|
||||
|
||||
find_library(FIND_LIBDW_LIBRARIES
|
||||
NAMES
|
||||
dw
|
||||
PATH
|
||||
/usr/lib
|
||||
/usr/local/lib)
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(LibDw DEFAULT_MSG
|
||||
FIND_LIBDW_INCLUDES FIND_LIBDW_LIBRARIES)
|
||||
mark_as_advanced(FIND_LIBDW_INCLUDES FIND_LIBDW_LIBRARIES)
|
||||
|
||||
set(LIBDW_INCLUDES ${FIND_LIBDW_INCLUDES})
|
||||
set(LIBDW_LIBRARIES ${FIND_LIBDW_LIBRARIES})
|
||||
@@ -0,0 +1,30 @@
|
||||
# Try to find LIBELF
|
||||
#
|
||||
# Once found, this will define:
|
||||
# - LIBELF_FOUND - system has libelf
|
||||
# - LIBELF_INCLUDE_DIRS - the libelf include directory
|
||||
# - LIBELF_LIBRARIES - Link these to use libelf
|
||||
# - LIBELF_DEFINITIONS - Compiler switches required for using libelf
|
||||
find_path(FIND_LIBELF_INCLUDES
|
||||
NAMES
|
||||
libelf.h
|
||||
PATHS
|
||||
/usr/include
|
||||
/usr/include/libelf
|
||||
/usr/local/include
|
||||
/usr/local/include/libelf)
|
||||
|
||||
find_library(FIND_LIBELF_LIBRARIES
|
||||
NAMES
|
||||
elf
|
||||
PATH
|
||||
/usr/lib
|
||||
/usr/local/lib)
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(LibElf DEFAULT_MSG
|
||||
FIND_LIBELF_INCLUDES FIND_LIBELF_LIBRARIES)
|
||||
mark_as_advanced(FIND_LIBELF_INCLUDES FIND_LIBELF_LIBRARIES)
|
||||
|
||||
set(LIBELF_INCLUDES ${FIND_LIBELF_INCLUDES})
|
||||
set(LIBELF_LIBRARIES ${FIND_LIBELF_LIBRARIES})
|
||||
@@ -0,0 +1,25 @@
|
||||
################################################################################
|
||||
## Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
##
|
||||
## Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
## of this software and associated documentation files (the "Software"), to
|
||||
## deal in the Software without restriction, including without limitation the
|
||||
## rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
## sell copies of the Software, and to permit persons to whom the Software is
|
||||
## furnished to do so, subject to the following conditions:
|
||||
##
|
||||
## The above copyright notice and this permission notice shall be included in
|
||||
## all copies or substantial portions of the Software.
|
||||
##
|
||||
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
## FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
## IN THE SOFTWARE.
|
||||
################################################################################
|
||||
|
||||
add_subdirectory(file)
|
||||
add_subdirectory(perfetto)
|
||||
add_subdirectory(ctf)
|
||||
@@ -0,0 +1 @@
|
||||
README.html
|
||||
@@ -0,0 +1,161 @@
|
||||
################################################################################
|
||||
## Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
##
|
||||
## Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
## of this software and associated documentation files (the "Software"), to
|
||||
## deal in the Software without restriction, including without limitation the
|
||||
## rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
## sell copies of the Software, and to permit persons to whom the Software is
|
||||
## furnished to do so, subject to the following conditions:
|
||||
##
|
||||
## The above copyright notice and this permission notice shall be included in
|
||||
## all copies or substantial portions of the Software.
|
||||
##
|
||||
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
## FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
## IN THE SOFTWARE.
|
||||
################################################################################
|
||||
|
||||
# Plugin shared object.
|
||||
add_library(ctf_plugin SHARED
|
||||
ctf.cpp
|
||||
plugin.cpp
|
||||
barectf.c "${CMAKE_CURRENT_BINARY_DIR}/barectf.h"
|
||||
${PROJECT_SOURCE_DIR}/src/utils/helper.cpp
|
||||
hsa_begin.cpp.i hsa_end.cpp.i
|
||||
hip_begin.cpp.i hip_end.cpp.i)
|
||||
set_target_properties(ctf_plugin PROPERTIES
|
||||
CXX_VISIBILITY_PRESET hidden
|
||||
LINK_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/../exportmap"
|
||||
LIBRARY_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}")
|
||||
set(METADATA_STREAM_FILE_DIR "${CMAKE_INSTALL_DATADIR}/${PROJECT_NAME}/plugin/ctf")
|
||||
target_compile_definitions(ctf_plugin PRIVATE
|
||||
HIP_PROF_HIP_API_STRING=1
|
||||
__HIP_PLATFORM_HCC__=1
|
||||
CTF_PLUGIN_METADATA_FILE_PATH="${CMAKE_INSTALL_PREFIX}/${METADATA_STREAM_FILE_DIR}/metadata")
|
||||
target_include_directories(ctf_plugin PRIVATE
|
||||
"${PROJECT_SOURCE_DIR}/inc"
|
||||
"${PROJECT_SOURCE_DIR}"
|
||||
"${CMAKE_BINARY_DIR}/src/api"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}")
|
||||
target_link_options(ctf_plugin PRIVATE
|
||||
"-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../exportmap"
|
||||
-Wl,--no-undefined)
|
||||
target_link_libraries(ctf_plugin PRIVATE
|
||||
${ROCPROFILER_TARGET}
|
||||
hsa-runtime64::hsa-runtime64
|
||||
systemd
|
||||
stdc++fs
|
||||
dl)
|
||||
install(TARGETS ctf_plugin LIBRARY
|
||||
DESTINATION "${CMAKE_INSTALL_LIBDIR}/${PROJECT_NAME}"
|
||||
COMPONENT runtime)
|
||||
|
||||
# `gen_api_files.py` and `gen_env_yaml.py` require Python 3,
|
||||
# CppHeaderParser, PyYAML, and barectf.
|
||||
find_package(Python3 COMPONENTS Interpreter REQUIRED)
|
||||
|
||||
message("Python: ${Python3_EXECUTABLE})")
|
||||
|
||||
execute_process(COMMAND Python3::Interpreter -c "print('hello')")
|
||||
|
||||
function(check_py3_pkg pkg_name)
|
||||
execute_process(COMMAND "${Python3_EXECUTABLE}" -c "import ${pkg_name}"
|
||||
RESULT_VARIABLE PY3_IMPORT_RES
|
||||
OUTPUT_QUIET)
|
||||
|
||||
if(NOT (${PY3_IMPORT_RES} EQUAL 0))
|
||||
message(FATAL_ERROR "Cannot find Python 3 package `${pkg_name}`")
|
||||
endif()
|
||||
|
||||
message(STATUS "Found Python 3 package `${pkg_name}`")
|
||||
endfunction()
|
||||
|
||||
check_py3_pkg(CppHeaderParser)
|
||||
check_py3_pkg(yaml)
|
||||
find_program(BARECTF_RES barectf REQUIRED)
|
||||
|
||||
# Generate barectf YAML and C++ files for HSA API.
|
||||
get_property(HSA_RUNTIME_INCLUDE_DIRS
|
||||
TARGET hsa-runtime64::hsa-runtime64
|
||||
PROPERTY INTERFACE_INCLUDE_DIRECTORIES)
|
||||
find_file(HSA_H hsa.h
|
||||
PATHS ${HSA_RUNTIME_INCLUDE_DIRS}
|
||||
PATH_SUFFIXES hsa
|
||||
NO_DEFAULT_PATH
|
||||
REQUIRED)
|
||||
get_filename_component(HSA_RUNTIME_INC_PATH "${HSA_H}" DIRECTORY)
|
||||
add_custom_command(
|
||||
OUTPUT hsa_erts.yaml hsa_begin.cpp.i hsa_end.cpp.i
|
||||
COMMAND ${CMAKE_C_COMPILER} -E "${HSA_RUNTIME_INC_PATH}/hsa.h" -o hsa.h.i
|
||||
COMMAND ${CMAKE_C_COMPILER} -E "${HSA_RUNTIME_INC_PATH}/hsa_ext_amd.h"
|
||||
-o hsa_ext_amd.h.i
|
||||
COMMAND ${CMAKE_COMMAND} -E cat hsa.h.i
|
||||
hsa_ext_amd.h.i
|
||||
"${CMAKE_BINARY_DIR}/src/api/hsa_prof_str.h"
|
||||
> hsa_input.h
|
||||
COMMAND "${Python3_EXECUTABLE}" "${CMAKE_CURRENT_SOURCE_DIR}/gen_api_files.py"
|
||||
hsa hsa_input.h
|
||||
BYPRODUCTS hsa.h.i hsa_ext_amd.h.i hsa_input.h
|
||||
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/gen_api_files.py"
|
||||
"${HSA_RUNTIME_INC_PATH}/hsa.h"
|
||||
"${HSA_RUNTIME_INC_PATH}/hsa_ext_amd.h"
|
||||
"${CMAKE_BINARY_DIR}/src/api/hsa_prof_str.h"
|
||||
COMMENT "Generating HSA API files for the `ctf` plugin...")
|
||||
|
||||
# Generate barectf YAML and C++ files for HIP API.
|
||||
get_property(HIP_INCLUDE_DIRS TARGET hip::amdhip64
|
||||
PROPERTY INTERFACE_INCLUDE_DIRECTORIES)
|
||||
find_file(HIP_RUNTIME_API_H hip_runtime_api.h
|
||||
PATHS ${HIP_INCLUDE_DIRS}
|
||||
PATH_SUFFIXES hip
|
||||
NO_DEFAULT_PATH
|
||||
REQUIRED)
|
||||
find_file(HIP_PROF_STR_H hip_prof_str.h
|
||||
PATHS ${HIP_INCLUDE_DIRS}
|
||||
PATH_SUFFIXES hip hip/amd_detail
|
||||
NO_DEFAULT_PATH
|
||||
REQUIRED)
|
||||
list(TRANSFORM HIP_INCLUDE_DIRS PREPEND -I)
|
||||
add_custom_command(
|
||||
OUTPUT hip_erts.yaml hip_begin.cpp.i hip_end.cpp.i
|
||||
COMMAND ${CMAKE_C_COMPILER} ${HIP_INCLUDE_DIRS}
|
||||
-E "${HIP_RUNTIME_API_H}"
|
||||
-D__HIP_PLATFORM_HCC__=1
|
||||
-D__HIP_ROCclr__=1
|
||||
-o hip_runtime_api.h.i
|
||||
COMMAND cat hip_runtime_api.h.i "${HIP_PROF_STR_H}" > hip_input.h
|
||||
BYPRODUCTS hip_runtime_api.h.i hip_input.h
|
||||
COMMAND "${Python3_EXECUTABLE}" "${CMAKE_CURRENT_SOURCE_DIR}/gen_api_files.py"
|
||||
hip hip_input.h
|
||||
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/gen_api_files.py"
|
||||
"${HIP_RUNTIME_API_H}"
|
||||
"${HIP_PROF_STR_H}"
|
||||
COMMENT "Generating HIP API files for the `ctf` plugin...")
|
||||
|
||||
# Generate `env.yaml` (trace environment for barectf).
|
||||
add_custom_command(
|
||||
OUTPUT env.yaml
|
||||
COMMAND "${Python3_EXECUTABLE}" "${CMAKE_CURRENT_SOURCE_DIR}/gen_env_yaml.py"
|
||||
${PROJECT_VERSION}
|
||||
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/gen_env_yaml.py"
|
||||
COMMENT "Generating `env.yaml`...")
|
||||
|
||||
# Generate raw CTF tracer with barectf.
|
||||
add_custom_command(
|
||||
OUTPUT barectf.c barectf.h barectf-bitfield.h metadata
|
||||
COMMAND "${BARECTF_RES}" gen "-I${CMAKE_CURRENT_BINARY_DIR}"
|
||||
"-I${CMAKE_CURRENT_SOURCE_DIR}"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/config.yaml"
|
||||
DEPENDS hsa_erts.yaml
|
||||
hip_erts.yaml
|
||||
env.yaml
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/config.yaml"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/dst_base.yaml"
|
||||
COMMENT "Generating raw CTF tracer with barectf...")
|
||||
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/metadata"
|
||||
DESTINATION "${METADATA_STREAM_FILE_DIR}")
|
||||
@@ -0,0 +1,260 @@
|
||||
= CTF plugin for ROCMTools
|
||||
13 December 2022
|
||||
Philippe Proulx
|
||||
|
||||
This plugin writes the received ROCMTools tracer and profiler records to
|
||||
a https://diamon.org/ctf/[CTF] trace.
|
||||
|
||||
== Build requirements
|
||||
|
||||
* Python ≥ 3.10
|
||||
* barectf ≥ 3.1.1 (`pip3 install barectf`)
|
||||
* PyYAML (`apt-get install python3-yaml`)
|
||||
* CppHeaderParser (`pip3 install CppHeaderParser`)
|
||||
|
||||
== Usage
|
||||
|
||||
Once installed, you may load this plugin with `rocprofv2` using
|
||||
the `--plugin ctf` command-line arguments.
|
||||
|
||||
This plugin honours the `OUTPUT_PATH` environment variable which
|
||||
`rocprofv2` sets with the `-d` option. If you pass `-d my-dir` to
|
||||
`rocprofv2`, then the plugin will write the CTF trace to the
|
||||
`my-dir/trace` directory.
|
||||
|
||||
IMPORTANT: This plugin performs important cleanup tasks at finalization
|
||||
time, so the resulting CTF trace could be corrupted if the plugin is
|
||||
never finalized.
|
||||
|
||||
Once the plugin is finalized, open the resulting trace directory with
|
||||
either https://babeltrace.org/[Babeltrace{nbsp}2] or
|
||||
https://www.eclipse.org/tracecompass/[Trace Compass] to view or analyze
|
||||
it.
|
||||
|
||||
=== Event record types
|
||||
|
||||
This plugin writes to different CTF data streams having different types.
|
||||
On the file system, the prefix of a data stream file name indicates the
|
||||
data stream type, that is:
|
||||
|
||||
`roctx_`::
|
||||
rocTX messages.
|
||||
+
|
||||
Each CTF event record is named `roctx` and corresponds to a rocTX
|
||||
tracer record.
|
||||
+
|
||||
The fields are:
|
||||
+
|
||||
--
|
||||
[horizontal]
|
||||
`thread_id`::
|
||||
Thread ID.
|
||||
|
||||
`id`::
|
||||
rocTX ID.
|
||||
|
||||
`msg`::
|
||||
rocTX message.
|
||||
--
|
||||
|
||||
`hsa_api_`::
|
||||
HSA API beginning and end function calls.
|
||||
+
|
||||
All CTF event records have the following common fields:
|
||||
+
|
||||
--
|
||||
[horizontal]
|
||||
`thread_id`::
|
||||
Thread ID.
|
||||
|
||||
`queue_id`::
|
||||
Queue ID.
|
||||
|
||||
`agent_id`::
|
||||
Agent ID.
|
||||
|
||||
`correlation_id`::
|
||||
Correlation ID.
|
||||
--
|
||||
+
|
||||
For each ROCMTools HSA API tracer record for the HSA function named
|
||||
`__name__`, this plugin writes two event records:
|
||||
+
|
||||
`__name___begin`:::
|
||||
Beginning of the function call.
|
||||
+
|
||||
The event record contains fields which correspond to most of the
|
||||
parameters of the HSA function.
|
||||
|
||||
`__name___end`:::
|
||||
End of the function call.
|
||||
|
||||
`hip_api_`::
|
||||
HIP API beginning and end function calls.
|
||||
+
|
||||
All CTF event records have the following common fields:
|
||||
+
|
||||
--
|
||||
[horizontal]
|
||||
`thread_id`::
|
||||
Thread ID.
|
||||
|
||||
`queue_id`::
|
||||
Queue ID.
|
||||
|
||||
`agent_id`::
|
||||
Agent ID.
|
||||
|
||||
`correlation_id`::
|
||||
Correlation ID.
|
||||
|
||||
`kernel_name`::
|
||||
Kernel name (empty string if not available).
|
||||
--
|
||||
+
|
||||
For each ROCMTools HIP API tracer record for the HIP function named
|
||||
`__name__`, this plugin writes two event records:
|
||||
+
|
||||
`__name__Begin`:::
|
||||
Beginning of the function call.
|
||||
+
|
||||
The event record contains fields which correspond to most of the
|
||||
parameters of the HIP function.
|
||||
|
||||
`__name__End`:::
|
||||
End of the function call.
|
||||
|
||||
`api_ops_`::
|
||||
HSA/HIP API beginning and end operations.
|
||||
+
|
||||
All CTF event records have the following common fields:
|
||||
+
|
||||
--
|
||||
[horizontal]
|
||||
`thread_id`::
|
||||
Thread ID.
|
||||
|
||||
`queue_id`::
|
||||
Queue ID.
|
||||
|
||||
`agent_id`::
|
||||
Agent ID.
|
||||
|
||||
`correlation_id`::
|
||||
Correlation ID.
|
||||
--
|
||||
+
|
||||
The possible CTF event records are:
|
||||
+
|
||||
`hsa_op_begin`:::
|
||||
HSA API operation beginning.
|
||||
|
||||
`hsa_op_end`:::
|
||||
HSA API operation end.
|
||||
|
||||
`hip_op_begin`:::
|
||||
HIP API operation beginning.
|
||||
+
|
||||
Such an event record also has the field `kernel_name` which is the
|
||||
kernel name (empty string if not available).
|
||||
|
||||
`hip_op_end`:::
|
||||
HIP API operation end.
|
||||
|
||||
`profiler_`::
|
||||
Profiler records.
|
||||
+
|
||||
All CTF event records have the following common fields:
|
||||
+
|
||||
--
|
||||
[horizontal]
|
||||
`dispatch`::
|
||||
Dispatch ID.
|
||||
|
||||
`gpu_id`::
|
||||
GPU ID.
|
||||
|
||||
`queue_id`::
|
||||
Queue ID.
|
||||
|
||||
`queue_index`::
|
||||
Queue index.
|
||||
|
||||
`process_id`::
|
||||
Process ID.
|
||||
|
||||
`thread_id`::
|
||||
Thread ID.
|
||||
|
||||
`kernel_id`::
|
||||
Kernel ID.
|
||||
|
||||
`kernel_name`::
|
||||
Kernel name (empty string if not available).
|
||||
|
||||
`counter_names`::
|
||||
Array of counter names, each one having a corresponding integral
|
||||
value in the `counter_values` field.
|
||||
|
||||
`counter_values`::
|
||||
Array of integers, each one being the value of a counter of which
|
||||
the name is a corresponding string in the `counter_names` field.
|
||||
--
|
||||
+
|
||||
The possible CTF event records are:
|
||||
+
|
||||
`profiler_record`:::
|
||||
Profiler record.
|
||||
|
||||
`profiler_record_with_kernel_properties`:::
|
||||
Profiler record with kernel properties.
|
||||
+
|
||||
Such an event record also has the following fields:
|
||||
+
|
||||
--
|
||||
`grid_size`::
|
||||
Grid size.
|
||||
|
||||
`workgroup_size`::
|
||||
Workgroup size.
|
||||
|
||||
`lds_size`::
|
||||
Local memory size.
|
||||
|
||||
`scratch_size`::
|
||||
Scratch size.
|
||||
|
||||
`arch_vgpr_count`::
|
||||
Architecture vector general purpose register count.
|
||||
|
||||
`accum_vgpr_count`::
|
||||
Accum. vector general purpose register count
|
||||
|
||||
`sgpr_count`::
|
||||
Scalar general purpose register count.
|
||||
|
||||
`wave_size`::
|
||||
Wavefront size.
|
||||
|
||||
`signal_handle`::
|
||||
Signal handle.
|
||||
--
|
||||
|
||||
`hsa_handles_`::
|
||||
HSA handle type mappings.
|
||||
+
|
||||
Each CTF event record is named `hsa_handle_type` and maps an HSA handle
|
||||
to a processor unit type (CPU or GPU).
|
||||
+
|
||||
The clock value of those event records is irrelevant (always{nbsp}0).
|
||||
+
|
||||
The fields are:
|
||||
+
|
||||
--
|
||||
[horizontal]
|
||||
`handle`::
|
||||
HSA handle.
|
||||
|
||||
`type`::
|
||||
Processor unit type (`CPU` or `GPU` enumeration label).
|
||||
--
|
||||
@@ -0,0 +1,67 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef PLUGIN_CTF_BARECTF_EVENT_RECORD_H
|
||||
#define PLUGIN_CTF_BARECTF_EVENT_RECORD_H
|
||||
|
||||
#include <memory>
|
||||
#include <cstdint>
|
||||
|
||||
struct barectf_default_ctx;
|
||||
|
||||
namespace rocm_ctf {
|
||||
|
||||
// Abstract base class of any barectf event record.
|
||||
//
|
||||
// A concrete event record class must implement Write() which must call
|
||||
// a corresponding barectf tracing function.
|
||||
//
|
||||
// `CtxT` is the specific type of the barectf context which Write()
|
||||
// receives.
|
||||
template <typename CtxT> class BarectfEventRecord {
|
||||
protected:
|
||||
// Builds a barectf event record having the clock value `clock_val`.
|
||||
explicit BarectfEventRecord(const std::uint64_t clock_val) noexcept : clock_val_{clock_val} {}
|
||||
|
||||
public:
|
||||
// Shared pointer to const barectf event record.
|
||||
using SP = std::shared_ptr<const BarectfEventRecord>;
|
||||
|
||||
virtual ~BarectfEventRecord() = default;
|
||||
|
||||
// Disabled copy operations to make this class simpler.
|
||||
BarectfEventRecord(const BarectfEventRecord&) = delete;
|
||||
BarectfEventRecord& operator=(const BarectfEventRecord&) = delete;
|
||||
|
||||
// Clock value of this event record.
|
||||
std::uint64_t GetClockVal() const noexcept { return clock_val_; }
|
||||
|
||||
// Calls a corresponding barectf tracing function using the barectf
|
||||
// context `barectf_ctx`.
|
||||
virtual void Write(CtxT& barectf_ctx) const = 0;
|
||||
|
||||
private:
|
||||
// Clock value.
|
||||
std::uint64_t clock_val_;
|
||||
};
|
||||
|
||||
} // namespace rocm_ctf
|
||||
|
||||
#endif // PLUGIN_CTF_BARECTF_EVENT_RECORD_H
|
||||
@@ -0,0 +1,192 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef PLUGIN_CTF_BARECTF_PLATFORM_H
|
||||
#define PLUGIN_CTF_BARECTF_PLATFORM_H
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstdint>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
#include <functional>
|
||||
#include <experimental/filesystem>
|
||||
|
||||
#include "barectf.h"
|
||||
|
||||
namespace rocm_ctf {
|
||||
|
||||
template <typename> class BarectfWriter;
|
||||
|
||||
// A barectf platform for any barectf writer.
|
||||
//
|
||||
// The user doesn't deal directly with such an object: it's closely
|
||||
// coupled with a barectf writer.
|
||||
//
|
||||
// Each platform takes care of a single CTF data stream file.
|
||||
//
|
||||
// After building such a platform, get the raw barectf context with
|
||||
// GetCtx() to call tracing functions. The platform must still exist
|
||||
// when calling a tracing function.
|
||||
//
|
||||
// Such a platform opens the data stream file on construction and closes
|
||||
// it on destruction.
|
||||
//
|
||||
// `DescrT` is the specific barectf platform descriptor. It must be a
|
||||
// structure having:
|
||||
//
|
||||
// `Ctx`:
|
||||
// Specific barectf context type.
|
||||
//
|
||||
// `static void OpenPacket(Ctx&)`:
|
||||
// Packet opening function.
|
||||
//
|
||||
// `static void ClosePacket(Ctx&)`:
|
||||
// Packet closing function.
|
||||
template <typename DescrT> class BarectfPlatform final {
|
||||
friend class BarectfWriter<DescrT>;
|
||||
|
||||
private:
|
||||
// Builds a barectf platform.
|
||||
//
|
||||
// The platform writes CTF packets of size `packet_size` bytes to the
|
||||
// CTF data stream file `data_stream_file_path`.
|
||||
//
|
||||
// For each event record to write, the platform reads `clock_val` to
|
||||
// know the current timestamp.
|
||||
explicit BarectfPlatform(const std::size_t packet_size,
|
||||
const std::experimental::filesystem::path& data_stream_file_path,
|
||||
const std::uint64_t& clock_val)
|
||||
: clock_val_{&clock_val}, buffer_(packet_size) {
|
||||
// Initialize barectf callbacks.
|
||||
barectf_platform_callbacks callbacks;
|
||||
|
||||
callbacks.default_clock_get_value = GetClockCb;
|
||||
callbacks.is_backend_full = IsBackendFullCb;
|
||||
callbacks.open_packet = OpenPacketCb;
|
||||
callbacks.close_packet = ClosePacketCb;
|
||||
|
||||
// Configure exceptions so that stream operations throw instead of
|
||||
// just setting flags on error.
|
||||
output_.exceptions(std::ofstream::failbit | std::ofstream::badbit);
|
||||
|
||||
// Open CTF data stream output file in binary mode.
|
||||
output_.open(data_stream_file_path, std::ios_base::out | std::ios_base::binary);
|
||||
|
||||
// Initialize the raw barectf context.
|
||||
barectf_init(&ctx_, buffer_.data(), buffer_.size(), callbacks, this);
|
||||
|
||||
// Open the initial packet.
|
||||
OpenPacketCb();
|
||||
}
|
||||
|
||||
public:
|
||||
// Disabled copy operations to make this class simpler.
|
||||
BarectfPlatform(const BarectfPlatform&) = delete;
|
||||
BarectfPlatform& operator=(const BarectfPlatform&) = delete;
|
||||
|
||||
// Closes/writes any last CTF packet and closes the data stream file.
|
||||
~BarectfPlatform() {
|
||||
if (barectf_packet_is_open(&ctx_) && !barectf_packet_is_empty(&ctx_)) {
|
||||
// Close and write last CTF packet (not empty).
|
||||
ClosePacketCb();
|
||||
}
|
||||
|
||||
// Close data stream output file.
|
||||
output_.close();
|
||||
}
|
||||
|
||||
// Returns the raw barectf context of this platform.
|
||||
const typename DescrT::Ctx& GetCtx() const noexcept { return ctx_; }
|
||||
typename DescrT::Ctx& GetCtx() noexcept { return ctx_; }
|
||||
|
||||
private:
|
||||
static BarectfPlatform& AsPlatform(void* const data) noexcept {
|
||||
return *static_cast<BarectfPlatform*>(data);
|
||||
}
|
||||
|
||||
// Four callbacks for barectf.
|
||||
//
|
||||
// Those four functions receive an instance of this class as `data`.
|
||||
|
||||
static std::uint64_t GetClockCb(void* const data) noexcept {
|
||||
// Forward to instance method.
|
||||
return AsPlatform(data).GetClockCb();
|
||||
}
|
||||
|
||||
static int IsBackendFullCb(void* const data) noexcept {
|
||||
// Forward to instance method.
|
||||
return AsPlatform(data).IsBackendFullCb();
|
||||
}
|
||||
|
||||
static void OpenPacketCb(void* const data) {
|
||||
// Forward to instance method.
|
||||
AsPlatform(data).OpenPacketCb();
|
||||
}
|
||||
|
||||
static void ClosePacketCb(void* const data) {
|
||||
// Forward to instance method.
|
||||
AsPlatform(data).ClosePacketCb();
|
||||
}
|
||||
|
||||
// Instance version of the "get clock value" callback.
|
||||
std::uint64_t GetClockCb() noexcept { return *clock_val_; }
|
||||
|
||||
// Instance version of the "is the back end full?" callback.
|
||||
int IsBackendFullCb() noexcept {
|
||||
// Never full.
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Instance version of the "open packet" callback.
|
||||
void OpenPacketCb() {
|
||||
// Forward to user (descriptor) function.
|
||||
DescrT::OpenPacket(ctx_);
|
||||
}
|
||||
|
||||
// Instance version of the "close packet" callback.
|
||||
void ClosePacketCb() {
|
||||
// Forward to user (descriptor) function to finalize the packet.
|
||||
DescrT::ClosePacket(ctx_);
|
||||
|
||||
// Write to the data stream file.
|
||||
WriteCurrentPacket();
|
||||
}
|
||||
|
||||
// Writes the current CTF packet (`buffer_`) to the data stream file.
|
||||
void WriteCurrentPacket() {
|
||||
output_.write(reinterpret_cast<const char*>(buffer_.data()), buffer_.size());
|
||||
}
|
||||
|
||||
// Clock value pointer.
|
||||
const std::uint64_t* clock_val_;
|
||||
|
||||
// CTF data stream output file stream.
|
||||
std::ofstream output_;
|
||||
|
||||
// Raw barectf context.
|
||||
typename DescrT::Ctx ctx_;
|
||||
|
||||
// CTF packet buffer.
|
||||
std::vector<std::uint8_t> buffer_;
|
||||
};
|
||||
|
||||
} // namespace rocm_ctf
|
||||
|
||||
#endif // PLUGIN_CTF_BARECTF_PLATFORM_H
|
||||
@@ -0,0 +1,124 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef PLUGIN_CTF_BARECTF_TRACER_H
|
||||
#define PLUGIN_CTF_BARECTF_TRACER_H
|
||||
|
||||
#include <cstdlib>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <experimental/filesystem>
|
||||
|
||||
#include "barectf_event_record.h"
|
||||
#include "barectf_writer.h"
|
||||
|
||||
namespace rocm_ctf {
|
||||
|
||||
// A barectf tracer offers the AddEventRecord() method to add an event
|
||||
// record which it will ultimately write to some CTF data stream file
|
||||
// within some specified CTF trace directory.
|
||||
//
|
||||
// One important feature of such a tracer is that you don't need to add
|
||||
// event records in order of time. A barectf tracer manages one or more
|
||||
// barectf writers, each one managing a single barectf platform/context
|
||||
// (CTF data stream file).
|
||||
//
|
||||
// All the CTF data stream files which a barectf tracer indirectly
|
||||
// manages share a common specified prefix. You must not use the same
|
||||
// prefix for two barectf tracers writing to the same CTF trace
|
||||
// directory.
|
||||
//
|
||||
// `PlatformDescrT` is the specific barectf platform descriptor (see the
|
||||
// documentation of the `BarectfPlatform` class template).
|
||||
template <typename PlatformDescrT> class BarectfTracer final {
|
||||
public:
|
||||
// Specific barectf event record type.
|
||||
using EventRecord = typename BarectfWriter<PlatformDescrT>::EventRecord;
|
||||
|
||||
// Builds a barectf tracer to write CTF packets of size `packet_size`
|
||||
// bytes to CTF data stream files having the prefix
|
||||
// `data_stream_file_name_prefix` within the CTF trace directory
|
||||
// `trace_dir`.
|
||||
//
|
||||
// The internal barectf writers manage event record queues having a
|
||||
// maximum size of `max_writer_queue_size`. Increasing
|
||||
// `max_writer_queue_size` increases the memory footprint of the
|
||||
// tracer, but may reduce the number of required CTF data stream files
|
||||
// to ensure time-ordered event records.
|
||||
explicit BarectfTracer(const std::size_t packet_size,
|
||||
std::experimental::filesystem::path trace_dir,
|
||||
const char* const data_stream_file_name_prefix,
|
||||
const std::size_t max_writer_queue_size = 200)
|
||||
: packet_size_{packet_size},
|
||||
trace_dir_{std::move(trace_dir)},
|
||||
data_stream_file_name_prefix_{data_stream_file_name_prefix},
|
||||
max_writer_queue_size_{max_writer_queue_size} {}
|
||||
|
||||
// Disabled copy operations to make this class simpler.
|
||||
BarectfTracer(const BarectfTracer&) = delete;
|
||||
BarectfTracer& operator=(const BarectfTracer&) = delete;
|
||||
|
||||
// Adds the event record `event_record` to this tracer.
|
||||
//
|
||||
// The clock value of `event_record` may be less than the clock value
|
||||
// of previously added event records.
|
||||
void AddEventRecord(typename EventRecord::SP event_record) {
|
||||
// Try to find a barectf writer to accept `event_record`.
|
||||
for (auto& writer : writers_) {
|
||||
if (writer->MayAddEventRecord(*event_record)) {
|
||||
// Found: add the event record to this writer and return.
|
||||
writer->AddEventRecord(std::move(event_record));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// No barectf writer found: create a new one.
|
||||
std::ostringstream ss;
|
||||
|
||||
ss << data_stream_file_name_prefix_ << writers_.size();
|
||||
writers_.emplace_back(new BarectfWriter<PlatformDescrT>{packet_size_, trace_dir_ / ss.str(),
|
||||
max_writer_queue_size_});
|
||||
|
||||
// Add the event record to this new barectf writer.
|
||||
assert(writers_.back()->MayAddEventRecord(*event_record));
|
||||
writers_.back()->AddEventRecord(std::move(event_record));
|
||||
}
|
||||
|
||||
private:
|
||||
// CTF packet size.
|
||||
std::size_t packet_size_;
|
||||
|
||||
// CTF trace directory.
|
||||
std::experimental::filesystem::path trace_dir_;
|
||||
|
||||
// CTF data stream file name prefix.
|
||||
std::string data_stream_file_name_prefix_;
|
||||
|
||||
// Maximum event record queue size of a barectf writer.
|
||||
std::size_t max_writer_queue_size_;
|
||||
|
||||
// barectf writers.
|
||||
std::vector<std::unique_ptr<BarectfWriter<PlatformDescrT>>> writers_;
|
||||
};
|
||||
|
||||
} // namespace rocm_ctf
|
||||
|
||||
#endif // PLUGIN_CTF_BARECTF_TRACER_H
|
||||
@@ -0,0 +1,178 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef PLUGIN_CTF_BARECTF_WRITER_H
|
||||
#define PLUGIN_CTF_BARECTF_WRITER_H
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdlib>
|
||||
#include <cstdint>
|
||||
#include <cassert>
|
||||
#include <queue>
|
||||
#include <utility>
|
||||
#include <experimental/filesystem>
|
||||
|
||||
#include "barectf_platform.h"
|
||||
#include "barectf_event_record.h"
|
||||
|
||||
namespace rocm_ctf {
|
||||
|
||||
template <typename> class BarectfTracer;
|
||||
|
||||
// A barectf writer manages a queue of event records, writing them
|
||||
// through barectf when needed.
|
||||
//
|
||||
// Such an object makes it possible to add some event record with a
|
||||
// clock value V and then some other event record of which the clock
|
||||
// value is less than V. The barectf writer ensures that actual barectf
|
||||
// tracing functions are called chronologically, a requirement of CTF.
|
||||
//
|
||||
// A barectf writer keeps event records in memory until its queue is
|
||||
// full (you provide the maximum queue size at construction time), in
|
||||
// which case it writes the oldest event record to some current CTF
|
||||
// packet through a barectf tracing function.
|
||||
//
|
||||
// Call MayAddEventRecord() to check whether or not you may add an event
|
||||
// record to the barectf writer, and then AddEventRecord() if you may.
|
||||
//
|
||||
// A barectf writer writes all its remaining event records on
|
||||
// destruction.
|
||||
//
|
||||
// `PlatformDescrT` is the specific barectf platform descriptor (see the
|
||||
// documentation of the `BarectfPlatform` class template).
|
||||
template <typename PlatformDescrT> class BarectfWriter final {
|
||||
friend class BarectfTracer<PlatformDescrT>;
|
||||
|
||||
public:
|
||||
// Specific barectf event record type.
|
||||
using EventRecord = BarectfEventRecord<typename PlatformDescrT::Ctx>;
|
||||
|
||||
private:
|
||||
// Builds a barectf writer to write CTF packets of size `packet_size`
|
||||
// bytes to the CTF data stream file `data_stream_file_path`.
|
||||
//
|
||||
// The built barectf writer manages an event record queue having a
|
||||
// maximum size of `max_queue_size`.
|
||||
explicit BarectfWriter(const std::size_t packet_size,
|
||||
const std::experimental::filesystem::path& data_stream_file_path,
|
||||
const std::size_t max_queue_size)
|
||||
: platform_{packet_size, data_stream_file_path, clock_val_},
|
||||
max_queue_size_{max_queue_size} {}
|
||||
|
||||
public:
|
||||
// Writes all its remaining event records.
|
||||
~BarectfWriter() {
|
||||
// Write all the remaining event records from the oldest to the
|
||||
// newest.
|
||||
while (!queue_.empty()) {
|
||||
WriteOldestEventRecord();
|
||||
}
|
||||
}
|
||||
|
||||
// Disabled copy operations to make this class simpler.
|
||||
BarectfWriter(const BarectfWriter&) = delete;
|
||||
BarectfWriter& operator=(const BarectfWriter&) = delete;
|
||||
|
||||
// Whether or not you may add the event record `event_record` to this
|
||||
// writer with AddEventRecord().
|
||||
bool MayAddEventRecord(const EventRecord& event_record) const noexcept {
|
||||
if (queue_.empty()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// One may only add an event record if its clock value is greater
|
||||
// than or equal to the clock value of the most recently written
|
||||
// event record.
|
||||
return event_record.GetClockVal() >= clock_val_;
|
||||
}
|
||||
|
||||
// Adds the event record `event_record` to this writer.
|
||||
//
|
||||
// `MayAddEventRecord(*event_record)` must return `true`.
|
||||
void AddEventRecord(typename EventRecord::SP event_record) {
|
||||
assert(MayAddEventRecord(*event_record) && "May add event record");
|
||||
|
||||
// Add event record to queue.
|
||||
queue_.emplace(std::move(event_record));
|
||||
|
||||
if (queue_.size() > max_queue_size_) {
|
||||
// Queue is too large: write the oldest event record now to
|
||||
// satisfy the requirement.
|
||||
WriteOldestEventRecord();
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
// Comparison type for `queue_`.
|
||||
struct EventRecordQueueCompare final {
|
||||
bool operator()(const typename EventRecord::SP& left,
|
||||
const typename EventRecord::SP& right) const noexcept {
|
||||
// "Greater than" so that the top element of the queue is the
|
||||
// oldest event record.
|
||||
return left->GetClockVal() > right->GetClockVal();
|
||||
}
|
||||
};
|
||||
|
||||
// Oldest event record within `queue_`.
|
||||
//
|
||||
// `queue_` must not be empty.
|
||||
const EventRecord& GetOldestEventRecord() const noexcept {
|
||||
assert(!queue_.empty() && "Queue isn't empty");
|
||||
return *queue_.top();
|
||||
}
|
||||
|
||||
// Writes the oldest event record through a barectf tracing function
|
||||
// and removes it from the event record queue.
|
||||
void WriteOldestEventRecord() {
|
||||
auto& oldest_event_record = GetOldestEventRecord();
|
||||
|
||||
// When calling a barectf tracing function, it calls the clock value
|
||||
// accessor callback of the platform, which itself reads from
|
||||
// `clock_val_`.
|
||||
clock_val_ = oldest_event_record.GetClockVal();
|
||||
|
||||
// Forward to a barectf tracing function.
|
||||
oldest_event_record.Write(platform_.GetCtx());
|
||||
|
||||
// Remove from queue.
|
||||
queue_.pop();
|
||||
}
|
||||
|
||||
// barectf platform (manages file I/O).
|
||||
BarectfPlatform<PlatformDescrT> platform_;
|
||||
|
||||
// Current clock value for `platform_`.
|
||||
//
|
||||
// This is also the clock value of the most recently written event
|
||||
// record, therefore that MayAddEventRecord() can rely on this.
|
||||
std::uint64_t clock_val_ = 0;
|
||||
|
||||
// Maximum size of `queue_` below.
|
||||
std::size_t max_queue_size_;
|
||||
|
||||
// Event record queue.
|
||||
std::priority_queue<typename EventRecord::SP, std::vector<typename EventRecord::SP>,
|
||||
EventRecordQueueCompare>
|
||||
queue_;
|
||||
};
|
||||
|
||||
} // namespace rocm_ctf
|
||||
|
||||
#endif // PLUGIN_CTF_BARECTF_WRITER_H
|
||||
@@ -0,0 +1,165 @@
|
||||
################################################################################
|
||||
# Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to
|
||||
# deal in the Software without restriction, including without limitation the
|
||||
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
# sell copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
################################################################################
|
||||
|
||||
%YAML 1.2
|
||||
--- !<tag:barectf.org,2020/3/config>
|
||||
trace:
|
||||
$include:
|
||||
# Environment (generated file).
|
||||
- env.yaml
|
||||
type:
|
||||
$include:
|
||||
- stdint.yaml
|
||||
- stdmisc.yaml
|
||||
native-byte-order: little-endian
|
||||
clock-types:
|
||||
default:
|
||||
origin-is-unix-epoch: true
|
||||
$c-type: uint64_t
|
||||
data-stream-types:
|
||||
hsa_api:
|
||||
event-record-common-context-field-type:
|
||||
class: struct
|
||||
members:
|
||||
- _thread_id: uint32
|
||||
- _queue_id: uint32
|
||||
- _agent_id: uint32
|
||||
- _correlation_id: uint64
|
||||
$include:
|
||||
# Base.
|
||||
- dst_base.yaml
|
||||
|
||||
# HSA API event record types (generated file).
|
||||
- hsa_erts.yaml
|
||||
hip_api:
|
||||
event-record-common-context-field-type:
|
||||
class: struct
|
||||
members:
|
||||
- _thread_id: uint32
|
||||
- _queue_id: uint32
|
||||
- _agent_id: uint32
|
||||
- _correlation_id: uint64
|
||||
- _kernel_name: str
|
||||
$include:
|
||||
# Base.
|
||||
- dst_base.yaml
|
||||
|
||||
# HIP API event record types (generated file).
|
||||
- hip_erts.yaml
|
||||
roctx:
|
||||
$include:
|
||||
# Base
|
||||
- dst_base.yaml
|
||||
event-record-common-context-field-type:
|
||||
class: struct
|
||||
members:
|
||||
- _thread_id: uint32
|
||||
event-record-types:
|
||||
roctx:
|
||||
payload-field-type:
|
||||
class: struct
|
||||
members:
|
||||
- _id: sint64
|
||||
- _msg: str
|
||||
hsa_handles:
|
||||
$include:
|
||||
# Base.
|
||||
- dst_base.yaml
|
||||
event-record-types:
|
||||
hsa_handle_type:
|
||||
payload-field-type:
|
||||
class: struct
|
||||
members:
|
||||
- _handle: uint64
|
||||
- _type:
|
||||
field-type:
|
||||
class: uenum
|
||||
size: 8
|
||||
mappings:
|
||||
CPU: [0]
|
||||
GPU: [1]
|
||||
api_ops:
|
||||
$include:
|
||||
# Base.
|
||||
- dst_base.yaml
|
||||
event-record-common-context-field-type:
|
||||
class: struct
|
||||
members:
|
||||
- _thread_id: uint32
|
||||
- _queue_id: uint32
|
||||
- _agent_id: uint32
|
||||
- _correlation_id: uint64
|
||||
event-record-types:
|
||||
hsa_op_begin:
|
||||
payload-field-type:
|
||||
class: struct
|
||||
hsa_op_end:
|
||||
payload-field-type:
|
||||
class: struct
|
||||
hip_op_begin:
|
||||
payload-field-type:
|
||||
class: struct
|
||||
members:
|
||||
- _kernel_name: str
|
||||
hip_op_end:
|
||||
payload-field-type:
|
||||
class: struct
|
||||
profiler:
|
||||
$include:
|
||||
# Base.
|
||||
- dst_base.yaml
|
||||
event-record-common-context-field-type:
|
||||
class: struct
|
||||
members:
|
||||
- _dispatch: uint64
|
||||
- _gpu_id: uint64
|
||||
- _queue_id: uint64
|
||||
- _queue_index: uint64
|
||||
- _process_id: uint32
|
||||
- _thread_id: uint32
|
||||
- _kernel_id: uint64
|
||||
- _kernel_name: str
|
||||
- _counter_names:
|
||||
field-type:
|
||||
class: dynamic-array
|
||||
element-field-type: str
|
||||
- _counter_values:
|
||||
field-type:
|
||||
class: dynamic-array
|
||||
element-field-type: uint64
|
||||
event-record-types:
|
||||
profiler_record:
|
||||
payload-field-type:
|
||||
class: struct
|
||||
profiler_record_with_kernel_properties:
|
||||
payload-field-type:
|
||||
class: struct
|
||||
members:
|
||||
- _grid_size: uint64
|
||||
- _workgroup_size: uint64
|
||||
- _lds_size: uint64
|
||||
- _scratch_size: uint64
|
||||
- _arch_vgpr_count: uint64
|
||||
- _accum_vgpr_count: uint64
|
||||
- _sgpr_count: uint64
|
||||
- _wave_size: uint64
|
||||
- _signal_handle: uint64
|
||||
@@ -0,0 +1,107 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include <cassert>
|
||||
#include <stdexcept>
|
||||
#include <iostream>
|
||||
#include <experimental/filesystem>
|
||||
|
||||
#include "rocprofiler.h"
|
||||
#include "rocprofiler_plugin.h"
|
||||
|
||||
#include "plugin.h"
|
||||
|
||||
namespace fs = std::experimental::filesystem;
|
||||
|
||||
namespace {
|
||||
|
||||
// Global plugin instance
|
||||
rocm_ctf::Plugin* the_plugin = nullptr;
|
||||
|
||||
} // namespace
|
||||
|
||||
ROCPROFILER_EXPORT int rocprofiler_plugin_initialize(const uint32_t rocprofiler_major_version,
|
||||
const uint32_t rocprofiler_minor_version) {
|
||||
if (rocprofiler_major_version != ROCPROFILER_VERSION_MAJOR ||
|
||||
rocprofiler_minor_version < ROCPROFILER_VERSION_MINOR) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (the_plugin) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
const auto output_dir = getenv("OUTPUT_PATH");
|
||||
|
||||
if (!output_dir) {
|
||||
std::cerr << "rocprofiler_plugin_initialize(): "
|
||||
<< "`OUTPUT_PATH` environment variable isn't set" << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Create the plugin instance.
|
||||
try {
|
||||
the_plugin = new rocm_ctf::Plugin{256 * 1024, fs::path{output_dir} / "trace",
|
||||
CTF_PLUGIN_METADATA_FILE_PATH};
|
||||
} catch (const std::exception& exc) {
|
||||
std::cerr << "rocprofiler_plugin_initialize(): " << exc.what() << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
ROCPROFILER_EXPORT void rocprofiler_plugin_finalize() {
|
||||
delete the_plugin;
|
||||
the_plugin = nullptr;
|
||||
}
|
||||
|
||||
ROCPROFILER_EXPORT int rocprofiler_plugin_write_buffer_records(
|
||||
const rocprofiler_record_header_t* const begin, const rocprofiler_record_header_t* const end,
|
||||
const rocprofiler_session_id_t session_id, const rocprofiler_buffer_id_t buffer_id) {
|
||||
assert(the_plugin);
|
||||
|
||||
try {
|
||||
the_plugin->HandleBufferRecords(begin, end, session_id, buffer_id);
|
||||
} catch (const std::exception& exc) {
|
||||
std::cerr << "rocprofiler_plugin_write_buffer_records(): " << exc.what() << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
ROCPROFILER_EXPORT int rocprofiler_plugin_write_record(const rocprofiler_record_tracer_t record,
|
||||
const rocprofiler_session_id_t session_id) {
|
||||
assert(the_plugin);
|
||||
|
||||
if (record.header.id.handle == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
try {
|
||||
the_plugin->HandleTracerRecord(record, session_id);
|
||||
} catch (const std::exception& exc) {
|
||||
std::cerr << "rocprofiler_plugin_write_record(): " << exc.what() << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,28 @@
|
||||
################################################################################
|
||||
# Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to
|
||||
# deal in the Software without restriction, including without limitation the
|
||||
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
# sell copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
################################################################################
|
||||
|
||||
$default-clock-type-name: default
|
||||
$features:
|
||||
packet:
|
||||
beginning-timestamp-field-type: false
|
||||
discarded-event-records-counter-snapshot-field-type: false
|
||||
end-timestamp-field-type: false
|
||||
@@ -0,0 +1,645 @@
|
||||
################################################################################
|
||||
# Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to
|
||||
# deal in the Software without restriction, including without limitation the
|
||||
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
# sell copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
################################################################################
|
||||
|
||||
import os
|
||||
import os.path
|
||||
import sys
|
||||
import re
|
||||
import yaml
|
||||
import CppHeaderParser
|
||||
|
||||
|
||||
# Numeric field type (abstract).
|
||||
class _NumericFt:
|
||||
# Returns the C++ expression to cast the expression `expr` to the C
|
||||
# type of this field type.
|
||||
def cast(self, expr):
|
||||
return f'static_cast<{self.c_type}>({expr})'
|
||||
|
||||
|
||||
# Integer field type (abstract).
|
||||
class _IntFt(_NumericFt):
|
||||
def __init__(self, size, pref_disp_base='dec'):
|
||||
self._size = size
|
||||
self._pref_disp_base = pref_disp_base
|
||||
|
||||
# Size (bits).
|
||||
@property
|
||||
def size(self):
|
||||
return self._size
|
||||
|
||||
# Preferred display base (`dec` or `hex`).
|
||||
@property
|
||||
def pref_disp_base(self):
|
||||
return self._pref_disp_base
|
||||
|
||||
# Equivalent barectf field type in YAML.
|
||||
@property
|
||||
def barectf_yaml(self):
|
||||
return {
|
||||
'size': self._size,
|
||||
'preferred-display-base': self._pref_disp_base,
|
||||
}
|
||||
|
||||
|
||||
# Signed integer field type.
|
||||
class _SIntFt(_IntFt):
|
||||
# Equivalent barectf field type in YAML.
|
||||
@property
|
||||
def barectf_yaml(self):
|
||||
ret = super().barectf_yaml
|
||||
ret['class'] = 'sint'
|
||||
return ret
|
||||
|
||||
# Equivalent C type
|
||||
@property
|
||||
def c_type(self):
|
||||
return f'std::int{self._size}_t'
|
||||
|
||||
|
||||
# Unsigned integer field type.
|
||||
class _UIntFt(_IntFt):
|
||||
# Equivalent barectf field type in YAML.
|
||||
@property
|
||||
def barectf_yaml(self):
|
||||
ret = super().barectf_yaml
|
||||
ret['class'] = 'uint'
|
||||
return ret
|
||||
|
||||
# Equivalent C type.
|
||||
@property
|
||||
def c_type(self):
|
||||
return f'std::uint{self._size}_t'
|
||||
|
||||
|
||||
# Pointer field type.
|
||||
class _PointerFt(_UIntFt):
|
||||
def __init__(self):
|
||||
super().__init__(64, 'hex')
|
||||
|
||||
# Returns the C++ expression to cast the expression `expr` to the C
|
||||
# type of this field type.
|
||||
def cast(self, expr):
|
||||
return f'static_cast<{self.c_type}>(reinterpret_cast<std::uintptr_t>({expr}))'
|
||||
|
||||
|
||||
# Enumeration field type (abstract).
|
||||
class _EnumFt(_IntFt):
|
||||
def __init__(self, size, mappings):
|
||||
super().__init__(size)
|
||||
self._mappings = mappings.copy()
|
||||
|
||||
# Mappings (names to integers).
|
||||
@property
|
||||
def mappings(self):
|
||||
return self._mappings
|
||||
|
||||
# Equivalent barectf field type in YAML.
|
||||
@property
|
||||
def barectf_yaml(self):
|
||||
ret = super().barectf_yaml
|
||||
mappings = {}
|
||||
|
||||
for name, val in self._mappings.items():
|
||||
mappings[name] = [val]
|
||||
|
||||
ret['mappings'] = mappings
|
||||
return ret
|
||||
|
||||
|
||||
# Unsigned enumeration field type.
|
||||
class _UEnumFt(_EnumFt, _UIntFt):
|
||||
# Equivalent barectf field type in YAML.
|
||||
@property
|
||||
def barectf_yaml(self):
|
||||
ret = super().barectf_yaml
|
||||
ret['class'] = 'uenum'
|
||||
return ret
|
||||
|
||||
|
||||
# Signed enumeration field type.
|
||||
class _SEnumFt(_EnumFt, _UIntFt):
|
||||
# Equivalent barectf field type in YAML.
|
||||
@property
|
||||
def barectf_yaml(self):
|
||||
ret = super().barectf_yaml
|
||||
ret['class'] = 'senum'
|
||||
return ret
|
||||
|
||||
|
||||
# Optional string field type.
|
||||
class _OptStrFt:
|
||||
# Equivalent barectf field type in YAML.
|
||||
@property
|
||||
def barectf_yaml(self):
|
||||
return {
|
||||
'class': 'str',
|
||||
}
|
||||
|
||||
|
||||
# String field type.
|
||||
class _StrFt(_OptStrFt):
|
||||
pass
|
||||
|
||||
|
||||
# Floating-point number field type.
|
||||
class _FloatFt(_NumericFt):
|
||||
def __init__(self, size):
|
||||
self._size = size
|
||||
|
||||
# Size (bits): 32 or 64.
|
||||
@property
|
||||
def size(self):
|
||||
return self._size
|
||||
|
||||
# Equivalent barectf field type in YAML.
|
||||
@property
|
||||
def barectf_yaml(self):
|
||||
return {
|
||||
'class': 'real',
|
||||
'size': self._size,
|
||||
}
|
||||
|
||||
# Equivalent C type.
|
||||
@property
|
||||
def c_type(self):
|
||||
if self._size == 32:
|
||||
return 'float'
|
||||
else:
|
||||
assert self._size == 64
|
||||
return 'double'
|
||||
|
||||
|
||||
# Event record type.
|
||||
class _Ert:
|
||||
def __init__(self, api_func_name, members):
|
||||
self._api_func_name = api_func_name
|
||||
self._members = members
|
||||
|
||||
# API function name
|
||||
@property
|
||||
def api_func_name(self):
|
||||
return self._api_func_name
|
||||
|
||||
# Parameters of function (list of `_ErtMember`).
|
||||
@property
|
||||
def members(self):
|
||||
return self._members
|
||||
|
||||
|
||||
# Beginning event record type.
|
||||
class _BeginErt(_Ert):
|
||||
# Name of event record type depending on the API prefix.
|
||||
def name(self, api_prefix):
|
||||
suffix = '_begin' if api_prefix == 'hsa' else 'Begin'
|
||||
return f'{self._api_func_name}{suffix}'
|
||||
|
||||
|
||||
# End event record type.
|
||||
class _EndErt(_Ert):
|
||||
# Name of event record type depending on the API prefix.
|
||||
def name(self, api_prefix):
|
||||
suffix = '_end' if api_prefix == 'hsa' else 'End'
|
||||
return f'{self._api_func_name}{suffix}'
|
||||
|
||||
|
||||
# Event record type member.
|
||||
class _ErtMember:
|
||||
def __init__(self, access, member_names, ft):
|
||||
self._access = access
|
||||
self._member_names = member_names.copy()
|
||||
self._ft = ft
|
||||
|
||||
# C++ access expression.
|
||||
@property
|
||||
def access(self):
|
||||
return self._access
|
||||
|
||||
# List of member names.
|
||||
@property
|
||||
def member_names(self):
|
||||
return self._member_names
|
||||
|
||||
# Equivalent field type.
|
||||
@property
|
||||
def ft(self):
|
||||
return self._ft
|
||||
|
||||
|
||||
# Makes sure some condition is satisfied, or prints the error message
|
||||
# `error_msg` and quits with exit status 1 otherwise.
|
||||
#
|
||||
# This is an unconditional assertion.
|
||||
def _make_sure(cond, error_msg):
|
||||
if not cond:
|
||||
print(f'Error: {error_msg}', file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def _enumerator_effective_val(enum_val):
|
||||
# Try the value, but this value may be a string (an
|
||||
# enumerator/definition).
|
||||
val = enum_val.get('value')
|
||||
|
||||
if type(val) is int:
|
||||
return val
|
||||
|
||||
# Try the raw value.
|
||||
val = enum_val.get('raw_value')
|
||||
|
||||
if val is not None:
|
||||
if type(val) is int:
|
||||
# Raw value is already an integer.
|
||||
return val
|
||||
else:
|
||||
# Try to parse the raw value string as an integer.
|
||||
try:
|
||||
return int(val, 0)
|
||||
except:
|
||||
pass
|
||||
|
||||
_make_sure(False,
|
||||
f'Cannot get the integral value of enumerator `{enum_val["name"]}`')
|
||||
|
||||
|
||||
# Returns the equivalent field type of the C type `c_type`.
|
||||
def _number_ft_from_c_type(cpp_header, c_type):
|
||||
# Check for known enumeration.
|
||||
m = re.match(r'(?:enum\s+)?(\w+)', c_type)
|
||||
|
||||
if m:
|
||||
size = 32
|
||||
|
||||
for enum_info in cpp_header.enums:
|
||||
if m.group(1) == enum_info.get('name'):
|
||||
# Fill enumeration field type mappings.
|
||||
mappings = {
|
||||
str(v['name']): _enumerator_effective_val(v)
|
||||
for v in enum_info['values']
|
||||
}
|
||||
|
||||
if len(mappings) == 0:
|
||||
return _SIntFt(64)
|
||||
|
||||
if max(mappings.values()) >= 2**31 or min(mappings.values()) < -2**31:
|
||||
size = 64
|
||||
|
||||
_make_sure(len(mappings) > 0, f'Enumeration `{enum_info["name"]}` is empty')
|
||||
|
||||
# Create corresponding enumeration field type.
|
||||
return _SEnumFt(size, mappings)
|
||||
|
||||
# Find corresponding basic field type.
|
||||
is_unsigned = 'unsigned' in c_type
|
||||
|
||||
if 'long' in c_type:
|
||||
if is_unsigned:
|
||||
return _UIntFt(64)
|
||||
else:
|
||||
return _SIntFt(64)
|
||||
elif 'short' in c_type:
|
||||
if is_unsigned:
|
||||
return _UIntFt(16)
|
||||
else:
|
||||
return _SIntFt(16)
|
||||
elif 'char' in c_type:
|
||||
if is_unsigned:
|
||||
return _UIntFt(8)
|
||||
else:
|
||||
return _SIntFt(8)
|
||||
elif 'float' in c_type:
|
||||
return _FloatFt(32)
|
||||
elif 'double' in c_type:
|
||||
return _FloatFt(64)
|
||||
else:
|
||||
# Assume `int` (often an unresolved C enumeration).
|
||||
if is_unsigned:
|
||||
return _UIntFt(32)
|
||||
else:
|
||||
return _SIntFt(32)
|
||||
|
||||
|
||||
# Returns whether or not a property has a pointer type.
|
||||
def _prop_is_pointer(prop, c_type):
|
||||
if prop['pointer'] or prop['function_pointer']:
|
||||
return True
|
||||
|
||||
if prop['array'] and 'array_size' in prop:
|
||||
return True
|
||||
|
||||
if prop['unresolved']:
|
||||
# HSA API function pointers.
|
||||
if prop['name'] in ('callback', 'handler'):
|
||||
return True
|
||||
|
||||
# HIP API function pointers.
|
||||
if c_type.endswith('Fn_t'):
|
||||
return True
|
||||
|
||||
# Check the C type itself.
|
||||
if '*' in c_type or '*' in prop.get('raw_type', ''):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
# Returns a list of event record type member objects for the structure
|
||||
# `struct` considering the initial C++ access expression `access` and
|
||||
# member names `member_names`.
|
||||
def _get_ert_members_for_struct(cpp_header, struct, access, member_names):
|
||||
members = []
|
||||
member_names = member_names.copy()
|
||||
member_names.append(None)
|
||||
props = struct['properties']['public']
|
||||
|
||||
for index, prop in enumerate(props):
|
||||
# Property name.
|
||||
name = prop['name']
|
||||
|
||||
# Member names, access, and C type.
|
||||
member_names[-1] = str(name)
|
||||
this_access = f'{access}.{name}'
|
||||
c_type = prop['type']
|
||||
aliases = prop['aliases']
|
||||
|
||||
# Skip no type.
|
||||
if c_type == '':
|
||||
continue
|
||||
|
||||
# Skip unnamed or union.
|
||||
if name == '' or 'union' in name or re.match(r'\bunion\b', c_type):
|
||||
continue
|
||||
|
||||
# Check for known C type alias.
|
||||
while True:
|
||||
c_type_alias = cpp_header.typedefs.get(c_type)
|
||||
|
||||
if c_type_alias is None:
|
||||
break
|
||||
|
||||
c_type = c_type_alias
|
||||
|
||||
# Check for C string.
|
||||
if re.match(r'^((const\s+char)|(char\s+const)|char)\s*\*$',
|
||||
c_type.strip()):
|
||||
members.append(_ErtMember(this_access, member_names, _OptStrFt()))
|
||||
continue
|
||||
|
||||
# Check for pointer.
|
||||
if _prop_is_pointer(prop, c_type):
|
||||
# Pointer: use numeric value.
|
||||
members.append(_ErtMember(this_access, member_names, _PointerFt()))
|
||||
continue
|
||||
|
||||
# Check for substructure.
|
||||
sub_struct = cpp_header.classes.get(c_type)
|
||||
|
||||
if sub_struct is None and len(aliases) == 1:
|
||||
sub_struct = cpp_header.classes.get(aliases[0])
|
||||
|
||||
if sub_struct is not None:
|
||||
members += _get_ert_members_for_struct(cpp_header, sub_struct,
|
||||
this_access, member_names)
|
||||
continue
|
||||
|
||||
# Use a basic field type.
|
||||
members.append(_ErtMember(this_access, member_names,
|
||||
_number_ft_from_c_type(cpp_header, c_type)))
|
||||
|
||||
return members
|
||||
|
||||
|
||||
# Returns the beginning and end event record type objects for the
|
||||
# callback data structure `struct`.
|
||||
def _erts_from_cb_data_struct(api_prefix, cpp_header, retval_info, struct):
|
||||
# The location of the `args` union within the nested structures of
|
||||
# `struct`.
|
||||
args_nested_cls_index = 0
|
||||
|
||||
# Create return value members (to be used later).
|
||||
if retval_info is not None:
|
||||
args_nested_cls_index = 1
|
||||
retval_members = {}
|
||||
nested_classes = struct['nested_classes']
|
||||
_make_sure(len(nested_classes) >= 1,
|
||||
f"Return value union doesn't exist in `{struct['name']}`")
|
||||
retval_union = nested_classes[0]
|
||||
|
||||
for prop in retval_union['properties']['public']:
|
||||
name = str(prop['name'])
|
||||
member = _ErtMember(f'GetApiData().{name}', ['retval'],
|
||||
_number_ft_from_c_type(cpp_header, prop['type']))
|
||||
retval_members[prop['name']] = member
|
||||
|
||||
# Make sure we have everything we need.
|
||||
for api_func_name, retval_name in retval_info.items():
|
||||
if retval_name is not None:
|
||||
_make_sure(retval_name in retval_members,
|
||||
f"Return value union member `{retval_name}` doesn't exist (function {api_func_name}())")
|
||||
|
||||
# Create beginning/end event record type objects.
|
||||
begin_erts = []
|
||||
end_erts = []
|
||||
nested_classes = struct['nested_classes'][args_nested_cls_index]['nested_classes']
|
||||
props = struct['nested_classes'][args_nested_cls_index]['properties']['public']
|
||||
_make_sure(len(nested_classes) == len(props),
|
||||
f'Mismatch between nested structure and member count in `{struct["name"]}`')
|
||||
|
||||
for index, prop in enumerate(props):
|
||||
# API function name is the name of the member.
|
||||
api_func_name = str(prop['name'])
|
||||
|
||||
# Get the parameters.
|
||||
members = _get_ert_members_for_struct(cpp_header,
|
||||
nested_classes[index],
|
||||
f'GetApiData().args.{api_func_name}',
|
||||
[])
|
||||
|
||||
# Append new beginning event record type object.
|
||||
begin_erts.append(_BeginErt(api_func_name, members))
|
||||
|
||||
# Append new end event record type object if possible.
|
||||
ret_members = []
|
||||
|
||||
if retval_info is not None:
|
||||
retval_type = retval_info.get(api_func_name)
|
||||
|
||||
if retval_type is not None:
|
||||
ret_members.append(retval_members[retval_type])
|
||||
|
||||
end_erts.append(_EndErt(api_func_name, ret_members))
|
||||
|
||||
return begin_erts, end_erts
|
||||
|
||||
|
||||
# Creates and returns the return value information dictionary.
|
||||
#
|
||||
# This dictionary maps API function names to the member to get within
|
||||
# the callback data structure.
|
||||
#
|
||||
# This only applies to the HSA API: for other APIs, this function
|
||||
# returns `None`.
|
||||
def _get_retval_info(path):
|
||||
if 'hsa' not in os.path.basename(path):
|
||||
return
|
||||
|
||||
retval_info = {}
|
||||
cur_api_func_name = None
|
||||
|
||||
with open(path) as f:
|
||||
for line in f:
|
||||
if 'out << ")' in line and cur_api_func_name is not None:
|
||||
m = re.search(r'api_data.(\w+_retval)', line)
|
||||
retval_info[cur_api_func_name] = m.group(1) if m else None
|
||||
else:
|
||||
m = re.search(r'out << "(hsa_\w+)\(";', line)
|
||||
|
||||
if m:
|
||||
cur_api_func_name = m.group(1)
|
||||
|
||||
return retval_info
|
||||
|
||||
|
||||
# Returns a partial barectf data stream type in YAML with the event
|
||||
# record types `erts`.
|
||||
def _yaml_dst_from_erts(api_prefix, erts):
|
||||
# Base.
|
||||
yaml_erts = {}
|
||||
yaml_dst = {
|
||||
'event-record-types': yaml_erts,
|
||||
}
|
||||
|
||||
# Create one event record type per API function.
|
||||
for ert in erts:
|
||||
# Base.
|
||||
yaml_members = []
|
||||
yaml_ert = {
|
||||
'payload-field-type': {
|
||||
'class': 'struct',
|
||||
'members': yaml_members,
|
||||
},
|
||||
}
|
||||
|
||||
# Create one structure field type member per member.
|
||||
for member in ert.members:
|
||||
# barectf doesn't support nested CTF structures, so join
|
||||
# individual member names with `__` to flatten.
|
||||
yaml_members.append({
|
||||
'_' + '__'.join(member.member_names): {
|
||||
'field-type': member.ft.barectf_yaml,
|
||||
},
|
||||
})
|
||||
|
||||
# Add event record type.
|
||||
yaml_erts[ert.name(api_prefix)] = yaml_ert
|
||||
|
||||
# Convert to YAML.
|
||||
return yaml.dump(yaml_dst)
|
||||
|
||||
|
||||
# Returns the C++ switch statement which calls the correct barectf
|
||||
# tracing function depending on the API function operation ID.
|
||||
def _cpp_switch_statement_from_erts(api_prefix, erts):
|
||||
lines = []
|
||||
lines.append('switch (GetOp()) {')
|
||||
|
||||
for ert in erts:
|
||||
lines.append(f' case {api_prefix.upper()}_API_ID_{ert.api_func_name}:')
|
||||
lines.append(f' barectf_{api_prefix}_api_trace_{ert.name(api_prefix)}(')
|
||||
lines.append(f' &barectf_ctx,')
|
||||
lines.append(f' GetThreadId(),')
|
||||
lines.append(f' GetQueueId(),')
|
||||
lines.append(f' GetAgentId(),')
|
||||
lines.append(f' GetCorrelationId(),')
|
||||
|
||||
if api_prefix == 'hip':
|
||||
lines.append(f' GetKernelName().c_str(),')
|
||||
|
||||
if len(ert.members) == 0:
|
||||
# Remove last comma.
|
||||
lines[-1] = lines[-1].replace(',', '')
|
||||
|
||||
for index, member in enumerate(ert.members):
|
||||
if type(member.ft) is _OptStrFt:
|
||||
# Only dereference C string if not null, otherwise use
|
||||
# an empty string.
|
||||
lines.append(f' {member.access} ? {member.access} : ""')
|
||||
elif type(member.ft) is _StrFt:
|
||||
lines.append(f' {member.access}')
|
||||
else:
|
||||
lines.append(f' {member.ft.cast(member.access)}')
|
||||
|
||||
if index + 1 < len(ert.members):
|
||||
lines[-1] += ','
|
||||
|
||||
lines.append(' );')
|
||||
lines.append(' break;')
|
||||
|
||||
lines.append('}')
|
||||
return lines
|
||||
|
||||
|
||||
# Processes the complete API header file `path`.
|
||||
def _process_file(api_prefix, path):
|
||||
# Create `CppHeader` object.
|
||||
try:
|
||||
cpp_header = CppHeaderParser.CppHeader(path)
|
||||
except CppHeaderParser.CppParseError as exc:
|
||||
print(exc, file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
# Get return value information dictionary.
|
||||
retval_info = _get_retval_info(path)
|
||||
|
||||
# Find callback data structure.
|
||||
for struct_name, struct in cpp_header.classes.items():
|
||||
if re.match(r'^' + api_prefix + r'_api_data\w+$', struct_name):
|
||||
# Process callback data structure.
|
||||
begin_erts, end_erts = _erts_from_cb_data_struct(api_prefix,
|
||||
cpp_header,
|
||||
retval_info,
|
||||
struct)
|
||||
|
||||
# Write barectf YAML file.
|
||||
with open(f'{api_prefix}_erts.yaml', 'w') as f:
|
||||
f.write(_yaml_dst_from_erts(api_prefix, begin_erts + end_erts))
|
||||
|
||||
# Write C++ code (beginning event record).
|
||||
with open(f'{api_prefix}_begin.cpp.i', 'w') as f:
|
||||
f.write('\n'.join(_cpp_switch_statement_from_erts(api_prefix,
|
||||
begin_erts)))
|
||||
|
||||
# Write C++ code (end event record).
|
||||
with open(f'{api_prefix}_end.cpp.i', 'w') as f:
|
||||
f.write('\n'.join(_cpp_switch_statement_from_erts(api_prefix,
|
||||
end_erts)))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# Disable `CppHeaderParser` printing to standard output.
|
||||
CppHeaderParser.CppHeaderParser.print_warnings = 0
|
||||
CppHeaderParser.CppHeaderParser.print_errors = 0
|
||||
CppHeaderParser.CppHeaderParser.debug = 0
|
||||
CppHeaderParser.CppHeaderParser.debug_trace = 0
|
||||
|
||||
# Process the complete API header file.
|
||||
_process_file(sys.argv[1], sys.argv[2])
|
||||
@@ -0,0 +1,33 @@
|
||||
################################################################################
|
||||
# Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to
|
||||
# deal in the Software without restriction, including without limitation the
|
||||
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
# sell copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
################################################################################
|
||||
|
||||
import sys
|
||||
import yaml
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
with open('env.yaml', 'w') as f:
|
||||
f.write(yaml.dump({
|
||||
'environment': {
|
||||
'rocprofiler_version': sys.argv[1],
|
||||
}
|
||||
}))
|
||||
@@ -0,0 +1,869 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdlib>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
#include <iostream>
|
||||
#include <utility>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <limits>
|
||||
#include <fstream>
|
||||
#include <experimental/filesystem>
|
||||
#include <time.h>
|
||||
|
||||
#include <hsa/hsa.h>
|
||||
#include <hsa/hsa_ext_amd.h>
|
||||
#include "hsa_prof_str.h"
|
||||
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <hip/amd_detail/hip_prof_str.h>
|
||||
|
||||
#include "rocprofiler.h"
|
||||
#include "rocprofiler_plugin.h"
|
||||
#include "../utils.h"
|
||||
|
||||
#include "barectf.h"
|
||||
#include "barectf_event_record.h"
|
||||
#include "barectf_tracer.h"
|
||||
#include "plugin.h"
|
||||
|
||||
namespace fs = std::experimental::filesystem;
|
||||
|
||||
namespace rocm_ctf {
|
||||
namespace {
|
||||
|
||||
// Abstract tracer event record using the barectf context type `CtxT`.
|
||||
template <typename CtxT> class TracerEventRecord : public BarectfEventRecord<CtxT> {
|
||||
protected:
|
||||
explicit TracerEventRecord(const rocprofiler_record_tracer_t& record, const std::uint64_t clock_val)
|
||||
: BarectfEventRecord<CtxT>{clock_val},
|
||||
op_{record.operation_id.id},
|
||||
thread_id_{record.thread_id.value},
|
||||
queue_id_{record.queue_id.handle},
|
||||
agent_id_{record.agent_id.handle},
|
||||
correlation_id_{record.correlation_id.value} {}
|
||||
|
||||
std::uint32_t GetOp() const noexcept { return op_; }
|
||||
std::uint32_t GetThreadId() const noexcept { return thread_id_; }
|
||||
std::uint64_t GetQueueId() const noexcept { return queue_id_; }
|
||||
std::uint64_t GetAgentId() const noexcept { return agent_id_; }
|
||||
std::uint64_t GetCorrelationId() const noexcept { return correlation_id_; }
|
||||
|
||||
private:
|
||||
std::uint32_t op_;
|
||||
std::uint32_t thread_id_;
|
||||
std::uint64_t queue_id_;
|
||||
std::uint64_t agent_id_;
|
||||
std::uint64_t correlation_id_;
|
||||
};
|
||||
|
||||
// Returns the beginning clock value of the tracer or profiler record
|
||||
// `record`.
|
||||
template <typename RecordT> std::uint64_t GetRecordBeginClockVal(const RecordT& record) {
|
||||
return record.timestamps.begin.value;
|
||||
}
|
||||
|
||||
// Returns the end clock value of the tracer or profiler record
|
||||
// `record`.
|
||||
template <typename RecordT> std::uint64_t GetRecordEndClockVal(const RecordT& record) {
|
||||
return record.timestamps.end.value;
|
||||
}
|
||||
|
||||
// Queries allocated string data using the size query function
|
||||
// `query_size_func` and the data query function `query_data_func`,
|
||||
// returning the corresponding string and freeing temporary allocated
|
||||
// memory.
|
||||
//
|
||||
// Returns an empty string if anything goes wrong.
|
||||
template <typename QuerySizeFuncT, typename QueryDataFuncT>
|
||||
std::string QueryAllocStr(QuerySizeFuncT&& query_size_func, QueryDataFuncT&& query_data_func) {
|
||||
// Query size first.
|
||||
std::size_t size = 0;
|
||||
[[maybe_unused]] auto ret = query_size_func(&size);
|
||||
|
||||
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query size");
|
||||
|
||||
if (size == 0) {
|
||||
// No size: return empty string.
|
||||
return {};
|
||||
}
|
||||
|
||||
// Query data (allocated by query_data_func()).
|
||||
char* alloc_str = nullptr;
|
||||
|
||||
ret = query_data_func(&alloc_str);
|
||||
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query data");
|
||||
|
||||
if (!alloc_str) {
|
||||
// No data: return empty string.
|
||||
return {};
|
||||
}
|
||||
|
||||
// Allocate return value.
|
||||
std::string str_ret{alloc_str};
|
||||
|
||||
// Free allocated data.
|
||||
std::free(alloc_str);
|
||||
|
||||
// Return string object.
|
||||
return str_ret;
|
||||
}
|
||||
|
||||
// rocTX event record.
|
||||
class RocTxEventRecord final : public TracerEventRecord<barectf_roctx_ctx> {
|
||||
public:
|
||||
explicit RocTxEventRecord(const rocprofiler_record_tracer_t& record,
|
||||
const rocprofiler_session_id_t session_id)
|
||||
: TracerEventRecord<barectf_roctx_ctx>{record, GetRecordBeginClockVal(record)},
|
||||
id_{QueryId(record, session_id)},
|
||||
msg_{QueryMsg(record, session_id)} {}
|
||||
|
||||
void Write(barectf_roctx_ctx& barectf_ctx) const override {
|
||||
barectf_roctx_trace_roctx(&barectf_ctx, GetThreadId(), id_, msg_.c_str());
|
||||
}
|
||||
|
||||
private:
|
||||
// Queries and returns the rocTX message of the record `record` and
|
||||
// session ID `session_id`.
|
||||
//
|
||||
// Returns an empty string if not available.
|
||||
static std::string QueryMsg(const rocprofiler_record_tracer_t& record,
|
||||
const rocprofiler_session_id_t session_id) {
|
||||
// Query size first.
|
||||
std::size_t msg_size = 0;
|
||||
[[maybe_unused]] auto ret = rocprofiler_query_roctx_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_ROCTX_MESSAGE, record.api_data_handle, record.operation_id,
|
||||
&msg_size);
|
||||
|
||||
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query rocTX message size");
|
||||
|
||||
if (msg_size == 0) {
|
||||
// No size: return empty string.
|
||||
return {};
|
||||
}
|
||||
|
||||
// Query data (borrowed from the record: no need to free).
|
||||
char* msg = nullptr;
|
||||
|
||||
ret = rocprofiler_query_roctx_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_ROCTX_MESSAGE, record.api_data_handle, record.operation_id, &msg);
|
||||
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query rocTX message");
|
||||
|
||||
if (!msg) {
|
||||
// No data: return empty string.
|
||||
return {};
|
||||
}
|
||||
|
||||
return rocmtools::cxx_demangle(msg);
|
||||
}
|
||||
|
||||
// Queries and returns the rocTX ID of the record `record` and the
|
||||
// session ID `session_id`.
|
||||
//
|
||||
// Returns 0 if anything goes wrong.
|
||||
static std::uint64_t QueryId(const rocprofiler_record_tracer_t& record,
|
||||
const rocprofiler_session_id_t session_id) {
|
||||
try {
|
||||
return std::stoull(QueryAllocStr(
|
||||
[&record, session_id](const auto size) {
|
||||
return rocprofiler_query_roctx_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_ROCTX_ID, record.api_data_handle, record.operation_id, size);
|
||||
},
|
||||
[&record, session_id](const auto str) {
|
||||
return rocprofiler_query_roctx_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_ROCTX_ID, record.api_data_handle, record.operation_id, str);
|
||||
}));
|
||||
} catch (...) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
std::uint64_t id_;
|
||||
std::string msg_;
|
||||
};
|
||||
|
||||
// Abstract HSA API event record.
|
||||
class HsaApiEventRecord : public TracerEventRecord<barectf_hsa_api_ctx> {
|
||||
protected:
|
||||
explicit HsaApiEventRecord(const rocprofiler_record_tracer_t& record,
|
||||
const rocprofiler_session_id_t session_id, const std::uint64_t clock_val)
|
||||
: TracerEventRecord<barectf_hsa_api_ctx>{record, clock_val},
|
||||
api_data_{QueryApiData(record, session_id)} {}
|
||||
|
||||
const hsa_api_data_t& GetApiData() const noexcept { return api_data_; }
|
||||
|
||||
private:
|
||||
// Queries and returns the API data of the record `record` and session
|
||||
// ID `session_id`.
|
||||
static const hsa_api_data_t& QueryApiData(const rocprofiler_record_tracer_t& record,
|
||||
const rocprofiler_session_id_t session_id) {
|
||||
// Query size first (only for assertions).
|
||||
[[maybe_unused]] std::size_t size = 0;
|
||||
[[maybe_unused]] auto ret = rocprofiler_query_hsa_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_HSA_API_DATA, record.api_data_handle, record.operation_id, &size);
|
||||
|
||||
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query HSA API data size");
|
||||
assert(size > 0);
|
||||
|
||||
// Query data (borrowed from the record).
|
||||
char* data = nullptr;
|
||||
ret = rocprofiler_query_hsa_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_HSA_API_DATA, record.api_data_handle, record.operation_id, &data);
|
||||
|
||||
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query HSA API data");
|
||||
assert(data);
|
||||
|
||||
// Reinterpret as an HSA API data pointer.
|
||||
return *reinterpret_cast<const hsa_api_data_t*>(data);
|
||||
}
|
||||
|
||||
hsa_api_data_t api_data_;
|
||||
};
|
||||
|
||||
// HSA API event record (beginning).
|
||||
class HsaApiEventRecordBegin final : public HsaApiEventRecord {
|
||||
public:
|
||||
explicit HsaApiEventRecordBegin(const rocprofiler_record_tracer_t& record,
|
||||
const rocprofiler_session_id_t session_id)
|
||||
: HsaApiEventRecord{record, session_id, GetRecordBeginClockVal(record)} {}
|
||||
|
||||
void Write(barectf_hsa_api_ctx& barectf_ctx) const override {
|
||||
// Include generated switch statement.
|
||||
#include "hsa_begin.cpp.i"
|
||||
}
|
||||
};
|
||||
|
||||
// HSA API event record (end).
|
||||
class HsaApiEventRecordEnd final : public HsaApiEventRecord {
|
||||
public:
|
||||
explicit HsaApiEventRecordEnd(const rocprofiler_record_tracer_t& record,
|
||||
const rocprofiler_session_id_t session_id)
|
||||
: HsaApiEventRecord{record, session_id, GetRecordEndClockVal(record)} {}
|
||||
|
||||
void Write(barectf_hsa_api_ctx& barectf_ctx) const override {
|
||||
// Include generated switch statement.
|
||||
#include "hsa_end.cpp.i"
|
||||
}
|
||||
};
|
||||
|
||||
// Abstract HIP API event record.
|
||||
class HipApiEventRecord : public TracerEventRecord<barectf_hip_api_ctx> {
|
||||
protected:
|
||||
explicit HipApiEventRecord(const rocprofiler_record_tracer_t& record,
|
||||
const rocprofiler_session_id_t session_id, const std::uint64_t clock_val)
|
||||
: TracerEventRecord<barectf_hip_api_ctx>{record, clock_val},
|
||||
api_data_{QueryApiData(record, session_id)},
|
||||
kernel_name_{QueryKernelName(record, session_id)} {}
|
||||
|
||||
const hip_api_data_t& GetApiData() const noexcept { return api_data_; }
|
||||
const std::string& GetKernelName() const noexcept { return kernel_name_; }
|
||||
|
||||
private:
|
||||
// Queries and returns the API data of the record `record` and session
|
||||
// ID `session_id`.
|
||||
static const hip_api_data_t& QueryApiData(const rocprofiler_record_tracer_t& record,
|
||||
const rocprofiler_session_id_t session_id) {
|
||||
// Query size first (only for assertions).
|
||||
[[maybe_unused]] std::size_t size = 0;
|
||||
[[maybe_unused]] auto ret = rocprofiler_query_hip_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_HIP_API_DATA, record.api_data_handle, record.operation_id, &size);
|
||||
|
||||
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query HIP API data size");
|
||||
assert(size > 0);
|
||||
|
||||
// Query data (borrowed from the record).
|
||||
char* data = nullptr;
|
||||
|
||||
ret = rocprofiler_query_hip_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_HIP_API_DATA, record.api_data_handle, record.operation_id, &data);
|
||||
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query HIP API data");
|
||||
assert(data);
|
||||
|
||||
// Reinterpret as an HIP API data pointer.
|
||||
return *reinterpret_cast<const hip_api_data_t*>(data);
|
||||
}
|
||||
|
||||
// Queries and returns the kernel name of the record `record` and
|
||||
// session ID `session_id`.
|
||||
//
|
||||
// Returns an empty string if not available.
|
||||
static std::string QueryKernelName(const rocprofiler_record_tracer_t& record,
|
||||
const rocprofiler_session_id_t session_id) {
|
||||
const auto kernel_name = QueryAllocStr(
|
||||
[&record, session_id](const auto size) {
|
||||
return rocprofiler_query_hip_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_HIP_KERNEL_NAME, record.api_data_handle, record.operation_id,
|
||||
size);
|
||||
},
|
||||
[&record, session_id](const auto str) {
|
||||
return rocprofiler_query_hip_tracer_api_data_info(session_id, ROCPROFILER_HIP_KERNEL_NAME,
|
||||
record.api_data_handle,
|
||||
record.operation_id, str);
|
||||
});
|
||||
|
||||
if (kernel_name.size() > 1) {
|
||||
// Return demangled version.
|
||||
return rocmtools::cxx_demangle(kernel_name);
|
||||
}
|
||||
|
||||
return kernel_name;
|
||||
}
|
||||
|
||||
hip_api_data_t api_data_;
|
||||
std::string kernel_name_;
|
||||
};
|
||||
|
||||
// HIP API event record (beginning).
|
||||
class HipApiEventRecordBegin final : public HipApiEventRecord {
|
||||
public:
|
||||
explicit HipApiEventRecordBegin(const rocprofiler_record_tracer_t& record,
|
||||
const rocprofiler_session_id_t session_id)
|
||||
: HipApiEventRecord{record, session_id, GetRecordBeginClockVal(record)} {}
|
||||
|
||||
void Write(barectf_hip_api_ctx& barectf_ctx) const override {
|
||||
// Include generated switch statement.
|
||||
#include "hip_begin.cpp.i"
|
||||
}
|
||||
};
|
||||
|
||||
// HIP API event record (end).
|
||||
class HipApiEventRecordEnd final : public HipApiEventRecord {
|
||||
public:
|
||||
explicit HipApiEventRecordEnd(const rocprofiler_record_tracer_t& record,
|
||||
const rocprofiler_session_id_t session_id)
|
||||
: HipApiEventRecord{record, session_id, GetRecordEndClockVal(record)} {}
|
||||
|
||||
void Write(barectf_hip_api_ctx& barectf_ctx) const override {
|
||||
// Include generated switch statement.
|
||||
#include "hip_end.cpp.i"
|
||||
}
|
||||
};
|
||||
|
||||
// HSA API handle type event record.
|
||||
class HsaHandleTypeEventRecord final : public BarectfEventRecord<barectf_hsa_handles_ctx> {
|
||||
public:
|
||||
enum class Type {
|
||||
CPU = 0,
|
||||
GPU = 1,
|
||||
};
|
||||
|
||||
explicit HsaHandleTypeEventRecord(const std::uint64_t handle, const Type type)
|
||||
: BarectfEventRecord<barectf_hsa_handles_ctx>{0}, handle_{handle}, type_{type} {}
|
||||
|
||||
void Write(barectf_hsa_handles_ctx& barectf_ctx) const override {
|
||||
barectf_hsa_handles_trace_hsa_handle_type(&barectf_ctx, handle_,
|
||||
static_cast<std::uint8_t>(type_));
|
||||
}
|
||||
|
||||
private:
|
||||
std::uint64_t handle_;
|
||||
Type type_;
|
||||
};
|
||||
|
||||
// Abstract API operation event record.
|
||||
class ApiOpEventRecord : public TracerEventRecord<barectf_api_ops_ctx> {
|
||||
protected:
|
||||
explicit ApiOpEventRecord(const rocprofiler_record_tracer_t& record, const std::uint64_t clock_val)
|
||||
: TracerEventRecord<barectf_api_ops_ctx>{record, clock_val} {}
|
||||
};
|
||||
|
||||
// HSA API operation event record (beginning).
|
||||
class HsaOpEventRecordBegin final : public ApiOpEventRecord {
|
||||
public:
|
||||
explicit HsaOpEventRecordBegin(const rocprofiler_record_tracer_t& record)
|
||||
: ApiOpEventRecord{record, GetRecordBeginClockVal(record)} {}
|
||||
|
||||
void Write(barectf_api_ops_ctx& barectf_ctx) const override {
|
||||
barectf_api_ops_trace_hsa_op_begin(&barectf_ctx, GetThreadId(), GetQueueId(), GetAgentId(),
|
||||
GetCorrelationId());
|
||||
}
|
||||
};
|
||||
|
||||
// HSA API operation event record (end).
|
||||
class HsaOpEventRecordEnd final : public ApiOpEventRecord {
|
||||
public:
|
||||
explicit HsaOpEventRecordEnd(const rocprofiler_record_tracer_t& record)
|
||||
: ApiOpEventRecord{record, GetRecordEndClockVal(record)} {}
|
||||
|
||||
void Write(barectf_api_ops_ctx& barectf_ctx) const override {
|
||||
barectf_api_ops_trace_hsa_op_end(&barectf_ctx, GetThreadId(), GetQueueId(), GetAgentId(),
|
||||
GetCorrelationId());
|
||||
}
|
||||
};
|
||||
|
||||
// HIP API operation event record (beginning).
|
||||
class HipOpEventRecordBegin final : public ApiOpEventRecord {
|
||||
public:
|
||||
explicit HipOpEventRecordBegin(const rocprofiler_record_tracer_t& record)
|
||||
: ApiOpEventRecord{record, GetRecordBeginClockVal(record)},
|
||||
kernel_name_{QueryKernelName(record)} {}
|
||||
|
||||
void Write(barectf_api_ops_ctx& barectf_ctx) const override {
|
||||
barectf_api_ops_trace_hip_op_begin(&barectf_ctx, GetThreadId(), GetQueueId(), GetAgentId(),
|
||||
GetCorrelationId(), kernel_name_.c_str());
|
||||
}
|
||||
|
||||
private:
|
||||
// Queries and returns the kernel name of the record `record`.
|
||||
//
|
||||
// Returns an empty string if not available.
|
||||
static std::string QueryKernelName(const rocprofiler_record_tracer_t& record) {
|
||||
if (record.operation_id.id == 0) {
|
||||
if (const auto api_handle = record.api_data_handle.handle) {
|
||||
const auto str = reinterpret_cast<const char*>(api_handle);
|
||||
|
||||
if (std::strlen(str) > 1) {
|
||||
// Return demangled version.
|
||||
return rocmtools::cxx_demangle(str);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
std::string kernel_name_;
|
||||
};
|
||||
|
||||
// HIP API operation event record (end).
|
||||
class HipOpEventRecordEnd final : public ApiOpEventRecord {
|
||||
public:
|
||||
explicit HipOpEventRecordEnd(const rocprofiler_record_tracer_t& record)
|
||||
: ApiOpEventRecord{record, GetRecordEndClockVal(record)} {}
|
||||
|
||||
void Write(barectf_api_ops_ctx& barectf_ctx) const override {
|
||||
barectf_api_ops_trace_hip_op_end(&barectf_ctx, GetThreadId(), GetQueueId(), GetAgentId(),
|
||||
GetCorrelationId());
|
||||
}
|
||||
};
|
||||
|
||||
// Profiler record base.
|
||||
class ProfilerEventRecord : public BarectfEventRecord<barectf_profiler_ctx> {
|
||||
public:
|
||||
explicit ProfilerEventRecord(const rocprofiler_record_profiler_t& record,
|
||||
const rocprofiler_session_id_t session_id)
|
||||
: BarectfEventRecord<barectf_profiler_ctx>{GetRecordBeginClockVal(record)},
|
||||
dispatch_{record.header.id.handle},
|
||||
gpu_id_{record.gpu_id.handle},
|
||||
queue_id_{record.queue_id.handle},
|
||||
queue_index_{record.queue_idx.value},
|
||||
process_id_{GetPid()},
|
||||
thread_id_{record.thread_id.value},
|
||||
kernel_id_{record.kernel_id.handle},
|
||||
kernel_name_{QueryKernelName(record)},
|
||||
counter_infos_{QueryCounterInfos(record, session_id)} {}
|
||||
|
||||
void Write(barectf_profiler_ctx& barectf_ctx) const override {
|
||||
barectf_profiler_trace_profiler_record(
|
||||
&barectf_ctx, dispatch_, gpu_id_, queue_id_, queue_index_, process_id_, thread_id_,
|
||||
kernel_id_, kernel_name_.c_str(), counter_infos_.names.size(), counter_infos_.names.data(),
|
||||
counter_infos_.values.size(), counter_infos_.values.data());
|
||||
}
|
||||
|
||||
protected:
|
||||
// Counter infos.
|
||||
//
|
||||
// `names[i]` names the counter value `values[i]`.
|
||||
struct CounterInfos final {
|
||||
// `names_storage` owns the strings while the elements of `names`
|
||||
// point to the internal C strings of `names_storage`.
|
||||
//
|
||||
// This is needed because barectf expects an array of contiguous
|
||||
// C string pointers.
|
||||
std::vector<std::string> names_storage;
|
||||
std::vector<const char*> names;
|
||||
|
||||
// Counter values.
|
||||
std::vector<std::uint64_t> values;
|
||||
};
|
||||
|
||||
std::uint64_t GetDispatch() const noexcept { return dispatch_; }
|
||||
std::uint64_t GetGpuId() const noexcept { return gpu_id_; }
|
||||
std::uint64_t GetQueueId() const noexcept { return queue_id_; }
|
||||
std::uint64_t GetQueueIndex() const noexcept { return queue_index_; }
|
||||
std::uint32_t GetProcessId() const noexcept { return process_id_; }
|
||||
std::uint32_t GetThreadId() const noexcept { return thread_id_; }
|
||||
std::uint64_t GetKernelId() const noexcept { return kernel_id_; }
|
||||
const std::string& GetKernelName() const noexcept { return kernel_name_; }
|
||||
const CounterInfos& GetCounterInfos() const noexcept { return counter_infos_; }
|
||||
|
||||
private:
|
||||
// Queries and returns the kernel name of the record `record`.
|
||||
//
|
||||
// Returns an empty string if not available.
|
||||
static std::string QueryKernelName(const rocprofiler_record_profiler_t& record) {
|
||||
const auto kernel_name = QueryAllocStr(
|
||||
[&record](const auto size) {
|
||||
return rocprofiler_query_kernel_info_size(ROCPROFILER_KERNEL_NAME, record.kernel_id, size);
|
||||
},
|
||||
[&record](const auto str) {
|
||||
return rocprofiler_query_kernel_info(ROCPROFILER_KERNEL_NAME, record.kernel_id,
|
||||
const_cast<const char**>(str));
|
||||
});
|
||||
|
||||
if (kernel_name.size() <= 1) {
|
||||
return {};
|
||||
}
|
||||
|
||||
// Return truncated and demangled version.
|
||||
return rocmtools::truncate_name(rocmtools::cxx_demangle(kernel_name));
|
||||
}
|
||||
|
||||
// Queries and returns the counter infos of the record `record` and
|
||||
// session ID `session_id`.
|
||||
static CounterInfos QueryCounterInfos(const rocprofiler_record_profiler_t& record,
|
||||
const rocprofiler_session_id_t session_id) {
|
||||
if (!record.counters) {
|
||||
// No counters.
|
||||
return {};
|
||||
}
|
||||
|
||||
CounterInfos infos;
|
||||
|
||||
for (std::size_t i = 0; i < record.counters_count.value; ++i) {
|
||||
auto& counter = record.counters[i];
|
||||
|
||||
if (counter.counter_handler.handle == 0) {
|
||||
// Not available: continue.
|
||||
continue;
|
||||
}
|
||||
|
||||
// Query counter name size first
|
||||
std::size_t counter_name_size = 0;
|
||||
[[maybe_unused]] auto ret = rocprofiler_query_counter_info_size(
|
||||
session_id, ROCPROFILER_COUNTER_NAME, counter.counter_handler, &counter_name_size);
|
||||
|
||||
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query counter name size");
|
||||
|
||||
if (counter_name_size == 0) {
|
||||
// No size: continue.
|
||||
continue;
|
||||
}
|
||||
|
||||
// Query counter name (borrowed from `record`: no need to free).
|
||||
const char* counter_name = nullptr;
|
||||
|
||||
ret = rocprofiler_query_counter_info(session_id, ROCPROFILER_COUNTER_NAME,
|
||||
counter.counter_handler, &counter_name);
|
||||
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query counter name");
|
||||
|
||||
if (!counter_name) {
|
||||
// Not available: continue.
|
||||
continue;
|
||||
}
|
||||
|
||||
// Push back infos.
|
||||
infos.names_storage.emplace_back(counter_name);
|
||||
infos.names.push_back(infos.names_storage.back().c_str());
|
||||
infos.values.push_back(counter.value.value);
|
||||
}
|
||||
|
||||
return infos;
|
||||
}
|
||||
|
||||
std::uint64_t dispatch_;
|
||||
std::uint64_t gpu_id_;
|
||||
std::uint64_t queue_id_;
|
||||
std::uint64_t queue_index_;
|
||||
std::uint32_t process_id_;
|
||||
std::uint32_t thread_id_;
|
||||
std::uint64_t kernel_id_;
|
||||
std::string kernel_name_;
|
||||
CounterInfos counter_infos_;
|
||||
};
|
||||
|
||||
// Profiler record base.
|
||||
class ProfilerWithKernelPropsEventRecord final : public ProfilerEventRecord {
|
||||
private:
|
||||
// According to `plugin/file/file.cpp`:
|
||||
//
|
||||
// > Taken from rocprofiler: The size hasn't changed in recent past
|
||||
static constexpr std::uint32_t lds_block_size_ = 128 * 4;
|
||||
|
||||
public:
|
||||
explicit ProfilerWithKernelPropsEventRecord(const rocprofiler_record_profiler_t& record,
|
||||
const rocprofiler_session_id_t session_id)
|
||||
: ProfilerEventRecord{record, session_id},
|
||||
grid_size_{record.kernel_properties.grid_size},
|
||||
workgroup_size_{record.kernel_properties.workgroup_size},
|
||||
lds_size_{
|
||||
((record.kernel_properties.lds_size + (lds_block_size_ - 1)) & ~(lds_block_size_ - 1))},
|
||||
scratch_size_{record.kernel_properties.scratch_size},
|
||||
arch_vgpr_count_{record.kernel_properties.arch_vgpr_count},
|
||||
accum_vgpr_count_{record.kernel_properties.accum_vgpr_count},
|
||||
sgpr_count_{record.kernel_properties.sgpr_count},
|
||||
wave_size_{record.kernel_properties.wave_size},
|
||||
signal_handle_{record.kernel_properties.signal_handle} {}
|
||||
|
||||
void Write(barectf_profiler_ctx& barectf_ctx) const override {
|
||||
barectf_profiler_trace_profiler_record_with_kernel_properties(
|
||||
&barectf_ctx, GetDispatch(), GetGpuId(), GetQueueId(), GetQueueIndex(), GetProcessId(),
|
||||
GetThreadId(), GetKernelId(), GetKernelName().c_str(), GetCounterInfos().names.size(),
|
||||
GetCounterInfos().names.data(), GetCounterInfos().values.size(),
|
||||
GetCounterInfos().values.data(), grid_size_, workgroup_size_, lds_size_, scratch_size_,
|
||||
arch_vgpr_count_, accum_vgpr_count_, sgpr_count_, wave_size_, signal_handle_);
|
||||
}
|
||||
|
||||
private:
|
||||
std::uint64_t grid_size_;
|
||||
std::uint64_t workgroup_size_;
|
||||
std::uint64_t lds_size_;
|
||||
std::uint64_t scratch_size_;
|
||||
std::uint64_t arch_vgpr_count_;
|
||||
std::uint64_t accum_vgpr_count_;
|
||||
std::uint64_t sgpr_count_;
|
||||
std::uint64_t wave_size_;
|
||||
std::uint64_t signal_handle_;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
Plugin::Plugin(const std::size_t packet_size, const fs::path& trace_dir,
|
||||
const fs::path& metadata_stream_path)
|
||||
: roctx_tracer_{packet_size, trace_dir, "roctx_"},
|
||||
hsa_api_tracer_{packet_size, trace_dir, "hsa_api_"},
|
||||
hip_api_tracer_{packet_size, trace_dir, "hip_api_"},
|
||||
api_ops_tracer_{packet_size, trace_dir, "api_ops_"},
|
||||
hsa_handles_tracer_{packet_size, trace_dir, "hsa_handles_"},
|
||||
profiler_tracer_{packet_size, trace_dir, "profiler_"} {
|
||||
// Make sure the trace directory doesn't exist.
|
||||
if (fs::exists(trace_dir)) {
|
||||
std::ostringstream ss;
|
||||
|
||||
ss << "CTF trace directory `" << trace_dir.string() << "` already exists";
|
||||
throw std::runtime_error{ss.str()};
|
||||
}
|
||||
|
||||
// Make sure the metadata stream file exists.
|
||||
if (!fs::exists(metadata_stream_path)) {
|
||||
std::ostringstream ss;
|
||||
|
||||
ss << "CTF metadata stream file `" << metadata_stream_path.string() << "` doesn't exist";
|
||||
throw std::runtime_error{ss.str()};
|
||||
}
|
||||
|
||||
// Create trace directory.
|
||||
if (!fs::create_directory(trace_dir)) {
|
||||
std::ostringstream ss;
|
||||
|
||||
ss << "Cannot create the CTF trace directory `" << trace_dir.string() << "`";
|
||||
throw std::runtime_error{ss.str()};
|
||||
}
|
||||
|
||||
// Copy adjusted metadata stream file to trace directory.
|
||||
try {
|
||||
CopyAdjustedMetadataStreamFile(metadata_stream_path, trace_dir);
|
||||
} catch (const std::exception& exc) {
|
||||
std::ostringstream ss;
|
||||
|
||||
ss << "Cannot adjust and copy metadata stream file `" << metadata_stream_path.string()
|
||||
<< "` to the CTF trace directory `" << trace_dir.string() << "`: " << exc.what();
|
||||
throw std::runtime_error{ss.str()};
|
||||
}
|
||||
|
||||
// Write HSA handle type event records.
|
||||
WriteHsaHandleTypes();
|
||||
}
|
||||
|
||||
void Plugin::HandleTracerRecord(const rocprofiler_record_tracer_t& record,
|
||||
const rocprofiler_session_id_t session_id) {
|
||||
std::lock_guard<std::mutex> lock{lock_};
|
||||
|
||||
// Depending on the domain, create and add an event record to the
|
||||
// corresponding tracer.
|
||||
switch (record.domain) {
|
||||
case ACTIVITY_DOMAIN_ROCTX:
|
||||
roctx_tracer_.AddEventRecord(std::make_shared<const RocTxEventRecord>(record, session_id));
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HSA_API: {
|
||||
hsa_api_tracer_.AddEventRecord(
|
||||
std::make_shared<const HsaApiEventRecordBegin>(record, session_id));
|
||||
hsa_api_tracer_.AddEventRecord(
|
||||
std::make_shared<const HsaApiEventRecordEnd>(record, session_id));
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HIP_API: {
|
||||
hip_api_tracer_.AddEventRecord(
|
||||
std::make_shared<const HipApiEventRecordBegin>(record, session_id));
|
||||
hip_api_tracer_.AddEventRecord(
|
||||
std::make_shared<const HipApiEventRecordEnd>(record, session_id));
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HSA_OPS:
|
||||
api_ops_tracer_.AddEventRecord(std::make_shared<const HsaOpEventRecordBegin>(record));
|
||||
api_ops_tracer_.AddEventRecord(std::make_shared<const HsaOpEventRecordEnd>(record));
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HIP_OPS:
|
||||
api_ops_tracer_.AddEventRecord(std::make_shared<const HipOpEventRecordBegin>(record));
|
||||
api_ops_tracer_.AddEventRecord(std::make_shared<const HipOpEventRecordEnd>(record));
|
||||
break;
|
||||
default:
|
||||
// Warn
|
||||
std::cerr << "rocm_ctf::Plugin::HandleTracerRecord(): "
|
||||
<< "ignoring record for unknown domain #" << record.domain << std::endl;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void Plugin::HandleProfilerRecord(const rocprofiler_record_profiler_t& record,
|
||||
const rocprofiler_session_id_t session_id) {
|
||||
std::lock_guard<std::mutex> lock{lock_};
|
||||
profiler_tracer_.AddEventRecord(
|
||||
std::make_shared<const ProfilerWithKernelPropsEventRecord>(record, session_id));
|
||||
}
|
||||
|
||||
void Plugin::HandleBufferRecords(const rocprofiler_record_header_t* begin,
|
||||
const rocprofiler_record_header_t* const end,
|
||||
const rocprofiler_session_id_t session_id,
|
||||
const rocprofiler_buffer_id_t buffer_id) {
|
||||
while (begin && begin < end) {
|
||||
if (begin->kind == ROCPROFILER_TRACER_RECORD) {
|
||||
HandleTracerRecord(*reinterpret_cast<const rocprofiler_record_tracer_t*>(begin), session_id);
|
||||
} else {
|
||||
assert(begin->kind == ROCPROFILER_PROFILER_RECORD);
|
||||
HandleProfilerRecord(*reinterpret_cast<const rocprofiler_record_profiler_t*>(begin),
|
||||
session_id);
|
||||
}
|
||||
|
||||
rocprofiler_next_record(begin, &begin, session_id, buffer_id);
|
||||
}
|
||||
}
|
||||
|
||||
void Plugin::WriteHsaHandleTypes() {
|
||||
[[maybe_unused]] const auto status = hsa_iterate_agents(
|
||||
[](const auto agent, const auto user_data) {
|
||||
auto& tracer = *static_cast<HsaHandlesTracer*>(user_data);
|
||||
hsa_device_type_t type;
|
||||
|
||||
if (hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &type) != HSA_STATUS_SUCCESS) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
using Type = HsaHandleTypeEventRecord::Type;
|
||||
|
||||
auto event_record = std::make_shared<HsaHandleTypeEventRecord>(
|
||||
agent.handle, type == HSA_DEVICE_TYPE_CPU ? Type::CPU : Type::GPU);
|
||||
|
||||
tracer.AddEventRecord(std::move(event_record));
|
||||
return HSA_STATUS_SUCCESS;
|
||||
},
|
||||
&hsa_handles_tracer_);
|
||||
|
||||
assert(status == HSA_STATUS_SUCCESS && "Iterate HSA agents");
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr std::uint64_t ns_per_s = 1'000'000'000ULL;
|
||||
|
||||
// Samples the ROCMTools clock and returns the value.
|
||||
std::uint64_t GetClkVal() {
|
||||
rocprofiler_timestamp_t ts;
|
||||
[[maybe_unused]] const auto ret = rocprofiler_get_timestamp(&ts);
|
||||
|
||||
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Get timestamp");
|
||||
return ts.value;
|
||||
}
|
||||
|
||||
// Updates `offset` and `delta`, if needed, to a more accurate clock
|
||||
// class offset and a smaller ROCMTools clock value delta.
|
||||
//
|
||||
// This function samples the ROCMTools clock twice, also sampling the
|
||||
// real-time clock in between, and uses the average ROCMTools clock
|
||||
// value to approximate the actual clock class offset.
|
||||
//
|
||||
// This strategy is based on the measure_single_clock_offset() function
|
||||
// of the LTTng-tools project <https://lttng.org/>.
|
||||
void UpdateClkClsOffsetAndDelta(std::uint64_t& offset, std::uint64_t& delta) {
|
||||
// Sample ROCMTools clock (first time).
|
||||
const auto rocm_clk_val1 = GetClkVal();
|
||||
|
||||
// Sample real-time clock.
|
||||
timespec realtime_spec = {0, 0};
|
||||
[[maybe_unused]] const auto ret = clock_gettime(CLOCK_REALTIME, &realtime_spec);
|
||||
|
||||
assert(ret == 0);
|
||||
|
||||
// Sample ROCMTools clock (second time).
|
||||
const auto rocm_clk_val2 = GetClkVal();
|
||||
|
||||
// Compute the current ROCMTools clock value delta.
|
||||
const auto this_delta = rocm_clk_val2 - rocm_clk_val1;
|
||||
|
||||
if (this_delta > delta) {
|
||||
// Discard larger delta.
|
||||
return;
|
||||
}
|
||||
|
||||
// Compute the average ROCMTools clock value.
|
||||
const auto rocm_clk_val_avg = (rocm_clk_val1 + rocm_clk_val2) >> 1;
|
||||
|
||||
// Compute the real-time clock value in nanoseconds.
|
||||
const auto realtime_ns =
|
||||
(static_cast<std::uint64_t>(realtime_spec.tv_sec) * ns_per_s) + realtime_spec.tv_nsec;
|
||||
|
||||
// Update clock class offset and delta.
|
||||
assert(rocm_clk_val_avg < realtime_ns);
|
||||
offset = realtime_ns - rocm_clk_val_avg;
|
||||
delta = this_delta;
|
||||
}
|
||||
|
||||
// Computes and returns the most possible accurate clock class offset.
|
||||
std::uint64_t GetMetadataClkClsOffset() {
|
||||
std::uint64_t offset = 0;
|
||||
std::uint64_t delta = std::numeric_limits<std::uint64_t>::max();
|
||||
|
||||
// Best effort to find the most accurate offset.
|
||||
for (auto i = 0U; i < 50U; ++i) {
|
||||
UpdateClkClsOffsetAndDelta(offset, delta);
|
||||
}
|
||||
|
||||
return offset;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void Plugin::CopyAdjustedMetadataStreamFile(const fs::path& metadata_stream_path,
|
||||
const fs::path& trace_dir) {
|
||||
// Load installed metadata stream file contents.
|
||||
std::string metadata;
|
||||
std::getline(std::ifstream{metadata_stream_path}, metadata, '\0');
|
||||
|
||||
// Replace the original `offset` property.
|
||||
{
|
||||
static constexpr auto offset_term = "offset = 0;";
|
||||
std::ostringstream ss;
|
||||
|
||||
ss << "offset = " << GetMetadataClkClsOffset() << ';';
|
||||
metadata.replace(metadata.find(offset_term), std::strlen(offset_term), ss.str());
|
||||
}
|
||||
|
||||
// Write adjusted metadata stream to trace directory.
|
||||
{
|
||||
std::ofstream output{trace_dir / "metadata"};
|
||||
|
||||
output.write(metadata.data(), metadata.size());
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace rocm_ctf
|
||||
@@ -0,0 +1,146 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef PLUGIN_CTF_PLUGIN_H
|
||||
#define PLUGIN_CTF_PLUGIN_H
|
||||
|
||||
#include <mutex>
|
||||
#include <cstdlib>
|
||||
#include <experimental/filesystem>
|
||||
|
||||
#include "rocprofiler.h"
|
||||
#include "rocprofiler_plugin.h"
|
||||
|
||||
#include "barectf.h"
|
||||
#include "barectf_tracer.h"
|
||||
|
||||
namespace rocm_ctf {
|
||||
|
||||
// CTF plugin.
|
||||
//
|
||||
// Build a plugin instance, and then call HandleTracerRecord(),
|
||||
// HandleProfilerRecord(), and HandleBufferRecords() to add event
|
||||
// records.
|
||||
//
|
||||
// A plugin instance performs important tasks at destruction time.
|
||||
class Plugin final {
|
||||
public:
|
||||
// Builds a plugin instance to write a CTF trace in the `trace_dir`
|
||||
// directory with packets of size `packet_size` bytes.
|
||||
//
|
||||
// `trace_dir` must not exist.
|
||||
//
|
||||
// This constructor immediately adjusts and copies the metadata stream
|
||||
// file `metadata_stream_path` to the trace directory (`trace_dir`).
|
||||
explicit Plugin(std::size_t packet_size, const std::experimental::filesystem::path& trace_dir,
|
||||
const std::experimental::filesystem::path& metadata_stream_path);
|
||||
|
||||
// Handles a tracer record.
|
||||
void HandleTracerRecord(const rocprofiler_record_tracer_t& record,
|
||||
rocprofiler_session_id_t session_id);
|
||||
|
||||
// Handles a profiler record.
|
||||
void HandleProfilerRecord(const rocprofiler_record_profiler_t& record,
|
||||
rocprofiler_session_id_t session_id);
|
||||
|
||||
// Handles tracer or profiler records from `begin` to `end`
|
||||
// (excluded).
|
||||
void HandleBufferRecords(const rocprofiler_record_header_t* begin,
|
||||
const rocprofiler_record_header_t* end, rocprofiler_session_id_t session_id,
|
||||
rocprofiler_buffer_id_t buffer_id);
|
||||
|
||||
private:
|
||||
// rocTX barectf platform descriptor.
|
||||
struct RocTxPlatformDescr final {
|
||||
using Ctx = barectf_roctx_ctx;
|
||||
|
||||
static void OpenPacket(Ctx& ctx) { barectf_roctx_open_packet(&ctx); }
|
||||
static void ClosePacket(Ctx& ctx) { barectf_roctx_close_packet(&ctx); }
|
||||
};
|
||||
|
||||
// HSA API barectf platform descriptor.
|
||||
struct HsaApiPlatformDescr final {
|
||||
using Ctx = barectf_hsa_api_ctx;
|
||||
|
||||
static void OpenPacket(Ctx& ctx) { barectf_hsa_api_open_packet(&ctx); }
|
||||
static void ClosePacket(Ctx& ctx) { barectf_hsa_api_close_packet(&ctx); }
|
||||
};
|
||||
|
||||
// HIP API barectf platform descriptor.
|
||||
struct HipApiPlatformDescr final {
|
||||
using Ctx = barectf_hip_api_ctx;
|
||||
|
||||
static void OpenPacket(Ctx& ctx) { barectf_hip_api_open_packet(&ctx); }
|
||||
static void ClosePacket(Ctx& ctx) { barectf_hip_api_close_packet(&ctx); }
|
||||
};
|
||||
|
||||
// HSA handles barectf platform descriptor.
|
||||
struct HsaHandlesPlatformDescr final {
|
||||
using Ctx = barectf_hsa_handles_ctx;
|
||||
|
||||
static void OpenPacket(Ctx& ctx) { barectf_hsa_handles_open_packet(&ctx); }
|
||||
static void ClosePacket(Ctx& ctx) { barectf_hsa_handles_close_packet(&ctx); }
|
||||
};
|
||||
|
||||
// API operations barectf platform descriptor.
|
||||
struct ApiOpsPlatformDescr final {
|
||||
using Ctx = barectf_api_ops_ctx;
|
||||
|
||||
static void OpenPacket(Ctx& ctx) { barectf_api_ops_open_packet(&ctx); }
|
||||
static void ClosePacket(Ctx& ctx) { barectf_api_ops_close_packet(&ctx); }
|
||||
};
|
||||
|
||||
// Profiler barectf platform descriptor.
|
||||
struct ProfilerPlatformDescr final {
|
||||
using Ctx = barectf_profiler_ctx;
|
||||
|
||||
static void OpenPacket(Ctx& ctx) { barectf_profiler_open_packet(&ctx); }
|
||||
static void ClosePacket(Ctx& ctx) { barectf_profiler_close_packet(&ctx); }
|
||||
};
|
||||
|
||||
// barectf tracer for HSA handle mappings.
|
||||
using HsaHandlesTracer = BarectfTracer<HsaHandlesPlatformDescr>;
|
||||
|
||||
// Writes the HSA handle type mappings to a dedicated data stream
|
||||
// file.
|
||||
void WriteHsaHandleTypes();
|
||||
|
||||
// Loads the existing metadata stream file `metadata_stream_path`,
|
||||
// adjusts the `offset` property of its single clock class, and writes
|
||||
// the result to the `metadata` file within the `trace_dir` directory.
|
||||
void CopyAdjustedMetadataStreamFile(
|
||||
const std::experimental::filesystem::path& metadata_stream_path,
|
||||
const std::experimental::filesystem::path& trace_dir);
|
||||
|
||||
// Dedicated tracers.
|
||||
BarectfTracer<RocTxPlatformDescr> roctx_tracer_;
|
||||
BarectfTracer<HsaApiPlatformDescr> hsa_api_tracer_;
|
||||
BarectfTracer<HipApiPlatformDescr> hip_api_tracer_;
|
||||
BarectfTracer<ApiOpsPlatformDescr> api_ops_tracer_;
|
||||
HsaHandlesTracer hsa_handles_tracer_;
|
||||
BarectfTracer<ProfilerPlatformDescr> profiler_tracer_;
|
||||
|
||||
// Locks any operation performed on the data of this.
|
||||
std::mutex lock_;
|
||||
};
|
||||
|
||||
} // namespace rocm_ctf
|
||||
|
||||
#endif // PLUGIN_CTF_PLUGIN_H
|
||||
@@ -0,0 +1,7 @@
|
||||
{
|
||||
global: rocprofiler_plugin_initialize;
|
||||
rocprofiler_plugin_finalize;
|
||||
rocprofiler_plugin_write_buffer_records;
|
||||
rocprofiler_plugin_write_record;
|
||||
local: *;
|
||||
};
|
||||
@@ -0,0 +1,44 @@
|
||||
# ###############################################################################
|
||||
# # Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
# #
|
||||
# # Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# # of this software and associated documentation files (the "Software"), to
|
||||
# # deal in the Software without restriction, including without limitation the
|
||||
# # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
# # sell copies of the Software, and to permit persons to whom the Software is
|
||||
# # furnished to do so, subject to the following conditions:
|
||||
# #
|
||||
# # The above copyright notice and this permission notice shall be included in
|
||||
# # all copies or substantial portions of the Software.
|
||||
# #
|
||||
# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# # IN THE SOFTWARE.
|
||||
# ###############################################################################
|
||||
|
||||
file(GLOB ROCPROFILER_UTIL_SRC_FILES ${PROJECT_SOURCE_DIR}/src/utils/helper.cpp)
|
||||
|
||||
file(GLOB FILE_SOURCES "*.cpp")
|
||||
add_library(file_plugin SHARED ${FILE_SOURCES} ${ROCPROFILER_UTIL_SRC_FILES})
|
||||
|
||||
set_target_properties(file_plugin PROPERTIES
|
||||
CXX_VISIBILITY_PRESET hidden
|
||||
LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/../exportmap
|
||||
LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR})
|
||||
|
||||
target_compile_definitions(file_plugin
|
||||
PRIVATE HIP_PROF_HIP_API_STRING=1 __HIP_PLATFORM_HCC__=1)
|
||||
|
||||
target_include_directories(file_plugin PRIVATE ${PROJECT_SOURCE_DIR}/inc ${PROJECT_SOURCE_DIR})
|
||||
|
||||
target_link_options(file_plugin PRIVATE -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../exportmap -Wl,--no-undefined)
|
||||
|
||||
target_link_libraries(file_plugin PRIVATE ${ROCPROFILER_TARGET} hsa-runtime64::hsa-runtime64 systemd stdc++fs amd_comgr dl)
|
||||
|
||||
install(TARGETS file_plugin LIBRARY
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/${PROJECT_NAME}
|
||||
COMPONENT runtime)
|
||||
@@ -0,0 +1,472 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include <cxxabi.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <cassert>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <experimental/filesystem>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <ostream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <hsa/hsa.h>
|
||||
#include <mutex>
|
||||
|
||||
#include "rocprofiler.h"
|
||||
#include "rocprofiler_plugin.h"
|
||||
#include "../utils.h"
|
||||
|
||||
namespace fs = std::experimental::filesystem;
|
||||
|
||||
namespace {
|
||||
|
||||
static std::string output_file_name;
|
||||
class file_plugin_t {
|
||||
private:
|
||||
enum class output_type_t {
|
||||
COUNTER,
|
||||
TRACER,
|
||||
PC_SAMPLING
|
||||
};
|
||||
|
||||
class output_file_t {
|
||||
public:
|
||||
output_file_t(std::string name) : name_(std::move(name)) {}
|
||||
|
||||
std::string name() const { return name_; }
|
||||
|
||||
template <typename T> std::ostream& operator<<(T&& value) {
|
||||
if (!is_open()) open();
|
||||
return stream_ << std::forward<T>(value);
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& (*func)(std::ostream&)) {
|
||||
if (!is_open()) open();
|
||||
return stream_ << func;
|
||||
}
|
||||
|
||||
void open() {
|
||||
// If the stream is already in the failed state, there's no need to try
|
||||
// to open the file.
|
||||
if (fail()) return;
|
||||
|
||||
const char* output_dir = getenv("OUTPUT_PATH");
|
||||
output_file_name = getenv("OUT_FILE_NAME") ? std::string(getenv("OUT_FILE_NAME")) + "_" : "";
|
||||
|
||||
if (output_dir == nullptr) {
|
||||
stream_.copyfmt(std::cout);
|
||||
stream_.clear(std::cout.rdstate());
|
||||
stream_.basic_ios<char>::rdbuf(std::cout.rdbuf());
|
||||
return;
|
||||
}
|
||||
|
||||
fs::path output_prefix(output_dir);
|
||||
if (!fs::is_directory(fs::status(output_prefix))) {
|
||||
if (!stream_.fail()) rocmtools::warning("Cannot open output directory '%s'", output_dir);
|
||||
stream_.setstate(std::ios_base::failbit);
|
||||
return;
|
||||
}
|
||||
|
||||
std::stringstream ss;
|
||||
ss << output_file_name << GetPid() << "_" << name_;
|
||||
stream_.open(output_prefix / ss.str());
|
||||
}
|
||||
|
||||
bool is_open() const { return stream_.is_open(); }
|
||||
bool fail() const { return stream_.fail(); }
|
||||
|
||||
private:
|
||||
const std::string name_;
|
||||
std::ofstream stream_;
|
||||
};
|
||||
|
||||
output_file_t* get_output_file(output_type_t output_type, uint32_t domain = 0) {
|
||||
switch (output_type) {
|
||||
case output_type_t::COUNTER:
|
||||
return &output_file_;
|
||||
case output_type_t::TRACER:
|
||||
switch (domain) {
|
||||
case ACTIVITY_DOMAIN_ROCTX:
|
||||
return &roctx_file_;
|
||||
case ACTIVITY_DOMAIN_HSA_API:
|
||||
return &hsa_api_file_;
|
||||
case ACTIVITY_DOMAIN_HIP_API:
|
||||
return &hip_api_file_;
|
||||
case ACTIVITY_DOMAIN_HIP_OPS:
|
||||
return &hip_activity_file_;
|
||||
case ACTIVITY_DOMAIN_HSA_OPS:
|
||||
return &hsa_async_copy_file_;
|
||||
default:
|
||||
assert(!"domain/op not supported!");
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case output_type_t::PC_SAMPLING:
|
||||
return &pc_sample_file_;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
public:
|
||||
file_plugin_t() {
|
||||
output_file_t hsa_handles("hsa_handles.txt");
|
||||
|
||||
[[maybe_unused]] hsa_status_t status = hsa_iterate_agents(
|
||||
[](hsa_agent_t agent, void* user_data) {
|
||||
auto* file = static_cast<decltype(hsa_handles)*>(user_data);
|
||||
hsa_device_type_t type;
|
||||
|
||||
if (hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &type) != HSA_STATUS_SUCCESS)
|
||||
return HSA_STATUS_ERROR;
|
||||
|
||||
*file << std::hex << std::showbase << agent.handle << " agent "
|
||||
<< ((type == HSA_DEVICE_TYPE_CPU) ? "cpu" : "gpu") << std::endl;
|
||||
return HSA_STATUS_SUCCESS;
|
||||
},
|
||||
&hsa_handles);
|
||||
assert(status == HSA_STATUS_SUCCESS && "failed to iterate HSA agents");
|
||||
if (hsa_handles.fail()) {
|
||||
rocmtools::warning("Cannot write to '%s'", hsa_handles.name().c_str());
|
||||
return;
|
||||
}
|
||||
|
||||
// App begin timestamp begin_ts_file.txt
|
||||
output_file_t begin_ts("begin_ts_file.txt");
|
||||
|
||||
[[maybe_unused]] rocprofiler_timestamp_t app_begin_timestamp = {};
|
||||
CHECK_ROCMTOOLS(rocprofiler_get_timestamp(&app_begin_timestamp));
|
||||
|
||||
begin_ts << std::dec << app_begin_timestamp.value << std::endl;
|
||||
if (begin_ts.fail()) {
|
||||
rocmtools::warning("Cannot write to '%s'", begin_ts.name().c_str());
|
||||
return;
|
||||
}
|
||||
|
||||
valid_ = true;
|
||||
}
|
||||
|
||||
std::mutex writing_lock;
|
||||
|
||||
const char* GetDomainName(rocprofiler_tracer_activity_domain_t domain) {
|
||||
switch (domain) {
|
||||
case ACTIVITY_DOMAIN_ROCTX:
|
||||
return "ROCTX_DOMAIN";
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HIP_API:
|
||||
return "HIP_API_DOMAIN";
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HIP_OPS:
|
||||
return "HIP_OPS_DOMAIN";
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HSA_API:
|
||||
return "HSA_API_DOMAIN";
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HSA_OPS:
|
||||
return "HSA_OPS_DOMAIN";
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HSA_EVT:
|
||||
return "HSA_EVT_DOMAIN";
|
||||
break;
|
||||
default:
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
void FlushTracerRecord(rocprofiler_record_tracer_t tracer_record, rocprofiler_session_id_t session_id,
|
||||
rocprofiler_buffer_id_t buffer_id = rocprofiler_buffer_id_t{0}) {
|
||||
std::lock_guard<std::mutex> lock(writing_lock);
|
||||
std::string kernel_name;
|
||||
std::string function_name;
|
||||
std::string roctx_message;
|
||||
uint64_t roctx_id;
|
||||
if ((tracer_record.operation_id.id == 0 && tracer_record.domain == ACTIVITY_DOMAIN_HIP_OPS)) {
|
||||
if (tracer_record.api_data_handle.handle &&
|
||||
strlen(reinterpret_cast<const char*>(tracer_record.api_data_handle.handle)) > 1)
|
||||
kernel_name = rocmtools::cxx_demangle(
|
||||
reinterpret_cast<const char*>(tracer_record.api_data_handle.handle));
|
||||
}
|
||||
if (tracer_record.domain == ACTIVITY_DOMAIN_HSA_API) {
|
||||
size_t function_name_size = 0;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hsa_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_HSA_FUNCTION_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &function_name_size));
|
||||
if (function_name_size > 1) {
|
||||
char* function_name_c = (char*)malloc(function_name_size);
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hsa_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_HSA_FUNCTION_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &function_name_c));
|
||||
if (function_name_c) function_name = std::string(function_name_c);
|
||||
}
|
||||
}
|
||||
if (tracer_record.domain == ACTIVITY_DOMAIN_HIP_API) {
|
||||
size_t function_name_size = 0;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_HIP_FUNCTION_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &function_name_size));
|
||||
if (function_name_size > 1) {
|
||||
char* function_name_c = (char*)malloc(function_name_size);
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_HIP_FUNCTION_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &function_name_c));
|
||||
if (function_name_c) function_name = std::string(function_name_c);
|
||||
}
|
||||
size_t kernel_name_size = 0;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_HIP_KERNEL_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &kernel_name_size));
|
||||
if (kernel_name_size > 1) {
|
||||
char* kernel_name_str = (char*)malloc(kernel_name_size * sizeof(char));
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_HIP_KERNEL_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &kernel_name_str));
|
||||
if (kernel_name_str) kernel_name = rocmtools::cxx_demangle(std::string(kernel_name_str));
|
||||
}
|
||||
}
|
||||
if (tracer_record.domain == ACTIVITY_DOMAIN_ROCTX) {
|
||||
size_t roctx_message_size = 0;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_roctx_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_ROCTX_MESSAGE, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &roctx_message_size));
|
||||
if (roctx_message_size > 1) {
|
||||
[[maybe_unused]] char* roctx_message_str =
|
||||
static_cast<char*>(malloc(roctx_message_size * sizeof(char)));
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_roctx_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_ROCTX_MESSAGE, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &roctx_message_str));
|
||||
if (roctx_message_str)
|
||||
roctx_message = rocmtools::cxx_demangle(std::string(strdup(roctx_message_str)));
|
||||
}
|
||||
size_t roctx_id_size = 0;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_roctx_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_ROCTX_ID, tracer_record.api_data_handle, tracer_record.operation_id,
|
||||
&roctx_id_size));
|
||||
if (roctx_id_size > 1) {
|
||||
[[maybe_unused]] char* roctx_id_str =
|
||||
static_cast<char*>(malloc(roctx_id_size * sizeof(char)));
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_roctx_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_ROCTX_ID, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &roctx_id_str));
|
||||
if (roctx_id_str) {
|
||||
roctx_id = std::stoll(std::string(strdup(roctx_id_str)));
|
||||
free(roctx_id_str);
|
||||
}
|
||||
}
|
||||
}
|
||||
output_file_t* output_file = get_output_file(output_type_t::TRACER, tracer_record.domain);
|
||||
*output_file << "Record [" << tracer_record.header.id.handle << "], Domain("
|
||||
<< GetDomainName(tracer_record.domain) << "), Begin("
|
||||
<< tracer_record.timestamps.begin.value << "), End("
|
||||
<< tracer_record.timestamps.end.value << "), Correlation ID( "
|
||||
<< tracer_record.correlation_id.value << ")";
|
||||
if (roctx_id >= 0) *output_file << ", ROCTX ID(" << roctx_id << ")";
|
||||
if (roctx_message.size() > 1) *output_file << ", ROCTX Message(" << roctx_message << ")";
|
||||
if (function_name.size() > 1) *output_file << ", Function(" << function_name << ")";
|
||||
if (kernel_name.size() > 1) *output_file << ", Kernel Name(" << kernel_name.c_str() << ")";
|
||||
*output_file << std::endl;
|
||||
}
|
||||
|
||||
void FlushProfilerRecord(const rocprofiler_record_profiler_t* profiler_record,
|
||||
rocprofiler_session_id_t session_id, rocprofiler_buffer_id_t buffer_id) {
|
||||
std::lock_guard<std::mutex> lock(writing_lock);
|
||||
size_t name_length = 0;
|
||||
output_file_t* output_file{nullptr};
|
||||
output_file = get_output_file(output_type_t::COUNTER);
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_kernel_info_size(ROCPROFILER_KERNEL_NAME,
|
||||
profiler_record->kernel_id, &name_length));
|
||||
// Taken from rocprofiler: The size hasn't changed in recent past
|
||||
static const uint32_t lds_block_size = 128 * 4;
|
||||
const char* kernel_name_c;
|
||||
if (name_length > 1) {
|
||||
kernel_name_c = static_cast<const char*>(malloc(name_length * sizeof(char)));
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_kernel_info(ROCPROFILER_KERNEL_NAME, profiler_record->kernel_id,
|
||||
&kernel_name_c));
|
||||
}
|
||||
*output_file << std::string("dispatch[") << std::to_string(profiler_record->header.id.handle)
|
||||
<< "], " << std::string("gpu_id(")
|
||||
<< std::to_string(profiler_record->gpu_id.handle) << "), "
|
||||
<< std::string("queue_id(") << std::to_string(profiler_record->queue_id.handle)
|
||||
<< "), " << std::string("queue_index(")
|
||||
<< std::to_string(profiler_record->queue_idx.value) << "), " << std::string("pid(")
|
||||
<< std::to_string(GetPid()) << "), " << std::string("tid(")
|
||||
<< std::to_string(profiler_record->thread_id.value) << ")";
|
||||
*output_file << ", " << std::string("grd(")
|
||||
<< std::to_string(profiler_record->kernel_properties.grid_size) << "), "
|
||||
<< std::string("wgr(")
|
||||
<< std::to_string(profiler_record->kernel_properties.workgroup_size) << "), "
|
||||
<< std::string("lds(")
|
||||
<< std::to_string(
|
||||
((profiler_record->kernel_properties.lds_size + (lds_block_size - 1)) &
|
||||
~(lds_block_size - 1)))
|
||||
<< "), " << std::string("scr(")
|
||||
<< std::to_string(profiler_record->kernel_properties.scratch_size) << "), "
|
||||
<< std::string("arch_vgpr(")
|
||||
<< std::to_string(profiler_record->kernel_properties.arch_vgpr_count) << "), "
|
||||
<< std::string("accum_vgpr(")
|
||||
<< std::to_string(profiler_record->kernel_properties.accum_vgpr_count) << "), "
|
||||
<< std::string("sgpr(")
|
||||
<< std::to_string(profiler_record->kernel_properties.sgpr_count) << "), "
|
||||
<< std::string("wave_size(")
|
||||
<< std::to_string(profiler_record->kernel_properties.wave_size) << "), "
|
||||
<< std::string("sig(")
|
||||
<< std::to_string(profiler_record->kernel_properties.signal_handle);
|
||||
std::string kernel_name = "";
|
||||
if (name_length > 1) {
|
||||
kernel_name = rocmtools::truncate_name(rocmtools::cxx_demangle(kernel_name_c));
|
||||
}
|
||||
*output_file << "), " << std::string("obj(")
|
||||
<< std::to_string(profiler_record->kernel_id.handle) << "), "
|
||||
<< std::string("kernel-name(\"") << kernel_name << "\")"
|
||||
<< std::string(", start_time(")
|
||||
<< std::to_string(profiler_record->timestamps.begin.value) << ")"
|
||||
<< std::string(", end_time(")
|
||||
<< std::to_string(profiler_record->timestamps.end.value) << ")";
|
||||
|
||||
// For Counters
|
||||
*output_file << std::endl;
|
||||
if (profiler_record->counters) {
|
||||
for (uint64_t i = 0; i < profiler_record->counters_count.value; i++) {
|
||||
if (profiler_record->counters[i].counter_handler.handle > 0) {
|
||||
size_t counter_name_length = 0;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_counter_info_size(
|
||||
session_id, ROCPROFILER_COUNTER_NAME, profiler_record->counters[i].counter_handler,
|
||||
&counter_name_length));
|
||||
if (counter_name_length > 1) {
|
||||
const char* name_c = static_cast<const char*>(malloc(name_length * sizeof(char)));
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_counter_info(
|
||||
session_id, ROCPROFILER_COUNTER_NAME, profiler_record->counters[i].counter_handler,
|
||||
&name_c));
|
||||
*output_file << ", " << name_c << " ("
|
||||
<< std::to_string(profiler_record->counters[i].value.value) << ")"
|
||||
<< std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void FlushPCSamplingRecord(
|
||||
const rocprofiler_record_pc_sample_t *pc_sampling_record) {
|
||||
output_file_t* output_file{nullptr};
|
||||
output_file = get_output_file(output_type_t::PC_SAMPLING);
|
||||
const auto &sample = pc_sampling_record->pc_sample;
|
||||
*output_file << "dispatch[" << sample.dispatch_id.value << "], "
|
||||
<< "timestamp(" << sample.timestamp.value << "), "
|
||||
<< "gpu_id(" << sample.gpu_id.handle << "), "
|
||||
<< "pc-sample(" << std::hex << std::showbase << sample.pc << "), "
|
||||
<< "se(" << sample.se << ')'
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
int WriteBufferRecords(const rocprofiler_record_header_t* begin,
|
||||
const rocprofiler_record_header_t* end, rocprofiler_session_id_t session_id,
|
||||
rocprofiler_buffer_id_t buffer_id) {
|
||||
while (begin < end) {
|
||||
if (!begin) return 0;
|
||||
switch (begin->kind) {
|
||||
case ROCPROFILER_PROFILER_RECORD: {
|
||||
const rocprofiler_record_profiler_t* profiler_record =
|
||||
reinterpret_cast<const rocprofiler_record_profiler_t*>(begin);
|
||||
FlushProfilerRecord(profiler_record, session_id, buffer_id);
|
||||
break;
|
||||
}
|
||||
case ROCPROFILER_TRACER_RECORD: {
|
||||
rocprofiler_record_tracer_t* tracer_record = const_cast<rocprofiler_record_tracer_t*>(
|
||||
reinterpret_cast<const rocprofiler_record_tracer_t*>(begin));
|
||||
FlushTracerRecord(*tracer_record, session_id, buffer_id);
|
||||
break;
|
||||
}
|
||||
case ROCPROFILER_PC_SAMPLING_RECORD: {
|
||||
const rocprofiler_record_pc_sample_t *pc_sampling_record =
|
||||
reinterpret_cast<const rocprofiler_record_pc_sample_t *>(begin);
|
||||
FlushPCSamplingRecord(pc_sampling_record);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
rocprofiler_next_record(begin, &begin, session_id, buffer_id);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool is_valid() const { return valid_; }
|
||||
|
||||
private:
|
||||
bool valid_{false};
|
||||
|
||||
output_file_t roctx_file_{"roctx_trace.txt"}, hsa_api_file_{"hsa_api_trace.txt"},
|
||||
hip_api_file_{"hip_api_trace.txt"}, hip_activity_file_{"hcc_ops_trace.txt"},
|
||||
hsa_async_copy_file_{"async_copy_trace.txt"}, pc_sample_file_{"pcs_trace.txt"},
|
||||
output_file_{"results.txt"};
|
||||
};
|
||||
|
||||
file_plugin_t* file_plugin = nullptr;
|
||||
|
||||
} // namespace
|
||||
|
||||
ROCPROFILER_EXPORT int rocprofiler_plugin_initialize(uint32_t rocprofiler_major_version,
|
||||
uint32_t rocprofiler_minor_version) {
|
||||
if (rocprofiler_major_version != ROCPROFILER_VERSION_MAJOR ||
|
||||
rocprofiler_minor_version < ROCPROFILER_VERSION_MINOR)
|
||||
return -1;
|
||||
|
||||
if (file_plugin != nullptr) return -1;
|
||||
|
||||
file_plugin = new file_plugin_t();
|
||||
if (file_plugin->is_valid()) return 0;
|
||||
|
||||
// The plugin failed to initialied, destroy it and return an error.
|
||||
delete file_plugin;
|
||||
file_plugin = nullptr;
|
||||
return -1;
|
||||
}
|
||||
|
||||
ROCPROFILER_EXPORT void rocprofiler_plugin_finalize() {
|
||||
if (!file_plugin) return;
|
||||
delete file_plugin;
|
||||
file_plugin = nullptr;
|
||||
}
|
||||
|
||||
ROCPROFILER_EXPORT int rocprofiler_plugin_write_buffer_records(const rocprofiler_record_header_t* begin,
|
||||
const rocprofiler_record_header_t* end,
|
||||
rocprofiler_session_id_t session_id,
|
||||
rocprofiler_buffer_id_t buffer_id) {
|
||||
if (!file_plugin || !file_plugin->is_valid()) return -1;
|
||||
return file_plugin->WriteBufferRecords(begin, end, session_id, buffer_id);
|
||||
}
|
||||
|
||||
ROCPROFILER_EXPORT int rocprofiler_plugin_write_record(rocprofiler_record_tracer_t record,
|
||||
rocprofiler_session_id_t session_id) {
|
||||
if (!file_plugin || !file_plugin->is_valid()) return -1;
|
||||
if (record.header.id.handle == 0) return 0;
|
||||
file_plugin->FlushTracerRecord(record, session_id);
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,27 @@
|
||||
file(GLOB ROCPROFILER_UTIL_SRC_FILES ${PROJECT_SOURCE_DIR}/src/utils/helper.cpp)
|
||||
|
||||
add_library(perfetto_plugin
|
||||
${LIBRARY_TYPE} ${ROCPROFILER_UTIL_SRC_FILES}
|
||||
perfetto.cpp perfetto_sdk/sdk/perfetto.cc)
|
||||
|
||||
set_target_properties(perfetto_plugin PROPERTIES
|
||||
CXX_VISIBILITY_PRESET hidden
|
||||
LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/../exportmap
|
||||
LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR})
|
||||
|
||||
target_compile_definitions(perfetto_plugin
|
||||
PRIVATE HIP_PROF_HIP_API_STRING=1
|
||||
__HIP_PLATFORM_HCC__=1)
|
||||
|
||||
target_include_directories(perfetto_plugin
|
||||
PRIVATE ${PROJECT_SOURCE_DIR}/inc ${PROJECT_SOURCE_DIR}
|
||||
${PROJECT_SOURCE_DIR}/plugin/perfetto/perfetto_sdk/sdk)
|
||||
|
||||
target_link_options(perfetto_plugin
|
||||
PRIVATE -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../exportmap -Wl,--no-undefined)
|
||||
|
||||
target_link_libraries(perfetto_plugin PRIVATE ${ROCPROFILER_TARGET} Threads::Threads systemd stdc++fs amd_comgr)
|
||||
|
||||
install(TARGETS perfetto_plugin LIBRARY
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/${PROJECT_NAME}
|
||||
COMPONENT runtime)
|
||||
@@ -0,0 +1,804 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "rocprofiler.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <condition_variable>
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <experimental/filesystem>
|
||||
#include <fstream>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <ostream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <functional>
|
||||
#include <iostream>
|
||||
|
||||
#include <cxxabi.h>
|
||||
#include <fcntl.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
#include <systemd/sd-id128.h>
|
||||
|
||||
#include "perfetto_sdk/sdk/perfetto.h"
|
||||
#include "rocprofiler_plugin.h"
|
||||
#include "../utils.h"
|
||||
|
||||
#define STREAM_CONSTANT 98736677
|
||||
#define QUEUE_CONSTANT 18746479
|
||||
|
||||
namespace fs = std::experimental::filesystem;
|
||||
|
||||
PERFETTO_DEFINE_CATEGORIES(
|
||||
perfetto::Category("GENERIC").SetDescription("GENERAL_CATEGORY"),
|
||||
perfetto::Category("ROCTX_API").SetDescription("ACTIVITY_DOMAIN_ROCTX_API"),
|
||||
perfetto::Category("HSA_API").SetDescription("ACTIVITY_DOMAIN_HSA_API"),
|
||||
perfetto::Category("HIP_API").SetDescription("ACTIVITY_DOMAIN_HIP_API"),
|
||||
perfetto::Category("External_API").SetDescription("ACTIVITY_DOMAIN_EXT_API"),
|
||||
perfetto::Category("HIP_OPS").SetDescription("ACTIVITY_DOMAIN_HIP_OPS"),
|
||||
perfetto::Category("HSA_OPS").SetDescription("ACTIVITY_DOMAIN_HSA_OPS"),
|
||||
perfetto::Category("KERNELS").SetDescription("KERNEL_DISPATCHES"),
|
||||
perfetto::Category("COUNTERS").SetDescription("PERFORMANCE_COUNTERS"));
|
||||
|
||||
PERFETTO_TRACK_EVENT_STATIC_STORAGE();
|
||||
|
||||
namespace {
|
||||
|
||||
std::string process_name;
|
||||
static std::string output_file_name;
|
||||
|
||||
std::string get_kernel_name(rocprofiler_record_profiler_t& profiler_record) {
|
||||
std::string kernel_name = "";
|
||||
size_t name_length = 1;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_kernel_info_size(ROCPROFILER_KERNEL_NAME, profiler_record.kernel_id,
|
||||
&name_length));
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
|
||||
#pragma GCC diagnostic ignored "-Wstringop-overread"
|
||||
if (name_length > 1) {
|
||||
const char* kernel_name_c = static_cast<const char*>(malloc(name_length * sizeof(char)));
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_kernel_info(ROCPROFILER_KERNEL_NAME, profiler_record.kernel_id,
|
||||
&kernel_name_c));
|
||||
if (kernel_name_c && strlen(kernel_name_c) > 1)
|
||||
kernel_name = rocmtools::cxx_demangle(strdup(kernel_name_c));
|
||||
}
|
||||
#pragma GCC diagnostic pop
|
||||
return kernel_name;
|
||||
}
|
||||
|
||||
|
||||
class perfetto_plugin_t {
|
||||
public:
|
||||
perfetto_plugin_t() {
|
||||
const char* output_dir = getenv("OUTPUT_PATH");
|
||||
const char* temp_file_name = getenv("OUT_FILE_NAME");
|
||||
output_file_name = temp_file_name ? std::string(temp_file_name) + "_" : "";
|
||||
|
||||
if (output_dir == nullptr) {
|
||||
stream_.copyfmt(std::cout);
|
||||
stream_.clear(std::cout.rdstate());
|
||||
stream_.basic_ios<char>::rdbuf(std::cout.rdbuf());
|
||||
return;
|
||||
}
|
||||
|
||||
output_prefix_ = output_dir;
|
||||
if (!fs::is_directory(fs::status(output_prefix_))) {
|
||||
if (!stream_.fail()) rocmtools::warning("Cannot open output directory '%s'", output_dir);
|
||||
stream_.setstate(std::ios_base::failbit);
|
||||
return;
|
||||
}
|
||||
|
||||
perfetto::TracingInitArgs args;
|
||||
args.backends |= perfetto::kInProcessBackend;
|
||||
|
||||
perfetto::Tracing::Initialize(args);
|
||||
perfetto::TrackEvent::Register();
|
||||
|
||||
perfetto::protos::gen::TrackEventConfig track_event_cfg;
|
||||
track_event_cfg.add_disabled_categories("*");
|
||||
track_event_cfg.add_enabled_categories("GENERIC");
|
||||
track_event_cfg.add_enabled_categories("ROCTX_API");
|
||||
track_event_cfg.add_enabled_categories("HSA_API");
|
||||
track_event_cfg.add_enabled_categories("HIP_API");
|
||||
track_event_cfg.add_enabled_categories("External_API");
|
||||
track_event_cfg.add_enabled_categories("HIP_OPS");
|
||||
track_event_cfg.add_enabled_categories("HSA_OPS");
|
||||
track_event_cfg.add_enabled_categories("KERNELS");
|
||||
track_event_cfg.add_enabled_categories("COUNTERS");
|
||||
|
||||
perfetto::TraceConfig trace_cfg;
|
||||
|
||||
auto buffer_cfg = trace_cfg.add_buffers();
|
||||
uint32_t max_buffer_size = 10 * 1024 * 1024; // Default max buffer size is 10 GB
|
||||
const char* max_buffer_size_str = getenv("rocprofiler_PERFETTO_MAX_BUFFER_SIZE_KIB");
|
||||
if (max_buffer_size_str && std::atol(max_buffer_size_str) > 0)
|
||||
max_buffer_size = std::atol(max_buffer_size_str);
|
||||
// Record up to max buffer size determined by user or the 10 GB (default value)
|
||||
buffer_cfg->set_size_kb(max_buffer_size);
|
||||
|
||||
auto* data_source_cfg = trace_cfg.add_data_sources()->mutable_config();
|
||||
data_source_cfg->set_name("track_event");
|
||||
data_source_cfg->set_track_event_config_raw(track_event_cfg.SerializeAsString());
|
||||
|
||||
output_prefix_.append(output_file_name + std::to_string(GetPid()) + "_output.pftrace");
|
||||
file_descriptor_ = open(output_prefix_.string().c_str(), O_RDWR | O_CREAT | O_TRUNC, 0600);
|
||||
if (file_descriptor_ == -1) rocmtools::warning("Can't open output file\n");
|
||||
|
||||
tracing_session_ = perfetto::Tracing::NewTrace();
|
||||
tracing_session_->Setup(trace_cfg, file_descriptor_);
|
||||
tracing_session_->StartBlocking();
|
||||
|
||||
|
||||
hostname_[1023] = '\0';
|
||||
gethostname(hostname_, 1023);
|
||||
sd_id128_t ret;
|
||||
char machine_id[SD_ID128_STRING_MAX];
|
||||
[[maybe_unused]] int status = sd_id128_get_machine(&ret);
|
||||
assert(status == 0 && "Error: Couldn't get machine id!");
|
||||
if (sd_id128_to_string(ret, machine_id)) machine_id_ = std::hash<std::string>{}(machine_id);
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(thread_tracks_lock_);
|
||||
process_name =
|
||||
perfetto::ProcessTrack::Current().Serialize().mutable_process()->process_name();
|
||||
auto process_track_desc = perfetto::ProcessTrack::Current().Serialize();
|
||||
uint64_t track_id =
|
||||
track_counter_.fetch_add((1 + machine_id_) * GetPid(), std::memory_order_acquire);
|
||||
for (uint64_t tid : track_ids_used_) {
|
||||
while (track_id == tid) {
|
||||
track_id =
|
||||
track_counter_.fetch_add((1 + machine_id_) * GetPid(), std::memory_order_acquire);
|
||||
}
|
||||
}
|
||||
std::string thread_track_str =
|
||||
rocmtools::string_printf("Node: %s Process ID: %lu Thread ID:", hostname_, GetPid());
|
||||
process_track_desc.mutable_process()->set_process_name(thread_track_str);
|
||||
perfetto::TrackEvent::SetTrackDescriptor(perfetto::ProcessTrack::Current(),
|
||||
process_track_desc);
|
||||
perfetto::ProcessTrack::Current().Serialize().set_uuid(track_id);
|
||||
thread_tracks_.emplace(GetPid(), perfetto::ProcessTrack::Current());
|
||||
}
|
||||
|
||||
is_valid_ = true;
|
||||
}
|
||||
|
||||
~perfetto_plugin_t() {
|
||||
if (is_valid_) {
|
||||
tracing_session_->StopBlocking();
|
||||
close(file_descriptor_);
|
||||
}
|
||||
}
|
||||
|
||||
const char* GetDomainName(rocprofiler_tracer_activity_domain_t domain) {
|
||||
switch (domain) {
|
||||
case ACTIVITY_DOMAIN_ROCTX:
|
||||
return "ROCTX_DOMAIN";
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HIP_API:
|
||||
return "HIP_API_DOMAIN";
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HIP_OPS:
|
||||
return "HIP_OPS_DOMAIN";
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HSA_API:
|
||||
return "HSA_API_DOMAIN";
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HSA_OPS:
|
||||
return "HSA_OPS_DOMAIN";
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HSA_EVT:
|
||||
return "HSA_EVT_DOMAIN";
|
||||
break;
|
||||
default:
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
std::mutex writing_lock;
|
||||
|
||||
int FlushProfilerRecord(rocprofiler_record_profiler_t profiler_record,
|
||||
rocprofiler_session_id_t session_id) {
|
||||
std::lock_guard<std::mutex> lock(writing_lock);
|
||||
// ToDO: rename this variable?
|
||||
if (!tracing_session_) rocmtools::warning("Tracing session is deleted!\n");
|
||||
|
||||
int device_id = profiler_record.gpu_id.handle;
|
||||
std::unordered_map<int, perfetto::Track>::iterator device_track_it;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(device_tracks_lock_);
|
||||
device_track_it = device_tracks_.find(device_id);
|
||||
if (device_track_it == device_tracks_.end()) {
|
||||
/* Create a new perfetto::Track (Sub-Track) */
|
||||
device_track_it =
|
||||
device_tracks_
|
||||
.emplace(device_id, perfetto::ProcessTrack::Global(((device_id + 1) * machine_id_)))
|
||||
.first;
|
||||
auto gpu_desc = device_track_it->second.Serialize();
|
||||
gpu_desc.mutable_process()->set_pid(device_id);
|
||||
std::string gpu_str = rocmtools::string_printf("Node: %s Device:", hostname_);
|
||||
gpu_desc.mutable_process()->set_process_name(gpu_str);
|
||||
perfetto::TrackEvent::SetTrackDescriptor(device_track_it->second, gpu_desc);
|
||||
track_ids_used_.emplace_back(device_id + 1 + machine_id_);
|
||||
}
|
||||
}
|
||||
auto& gpu_track = device_track_it->second;
|
||||
std::pair<int, uint64_t> gpu_queue_id =
|
||||
std::make_pair(device_id, profiler_record.queue_id.handle);
|
||||
auto queue_track_it = queue_tracks_.find(gpu_queue_id.first);
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(queue_tracks_lock_);
|
||||
queue_track_it = queue_tracks_.find(gpu_queue_id.first);
|
||||
if (queue_track_it == queue_tracks_.end()) {
|
||||
/* Create a new perfetto::Track */
|
||||
queue_track_it = queue_tracks_
|
||||
.emplace(gpu_queue_id.first,
|
||||
perfetto::Track((profiler_record.queue_id.handle + 1 +
|
||||
profiler_record.gpu_id.handle) *
|
||||
QUEUE_CONSTANT * machine_id_ * GetPid(),
|
||||
gpu_track))
|
||||
.first;
|
||||
|
||||
auto queue_desc = queue_track_it->second.Serialize();
|
||||
std::string queue_str =
|
||||
rocmtools::string_printf("Process ID: %lu Queue %ld", GetPid(), gpu_queue_id.second);
|
||||
queue_desc.set_name(queue_str);
|
||||
perfetto::TrackEvent::SetTrackDescriptor(queue_track_it->second, queue_desc);
|
||||
}
|
||||
track_ids_used_.emplace_back(profiler_record.queue_id.handle + machine_id_ + 1 +
|
||||
profiler_record.gpu_id.handle);
|
||||
}
|
||||
auto& queue_track = queue_track_it->second;
|
||||
|
||||
// Taken from rocprofiler: The size hasn't changed in recent past
|
||||
static const uint32_t lds_block_size = 128 * 4;
|
||||
|
||||
std::string full_kernel_name = get_kernel_name(profiler_record);
|
||||
// std::string truncated_kernel_name = rocmtools::truncate_name(full_kernel_name);
|
||||
// perfetto::StaticString kernel_name(truncated_kernel_name.c_str());
|
||||
TRACE_EVENT_BEGIN("KERNELS", perfetto::StaticString(full_kernel_name.c_str()), queue_track,
|
||||
profiler_record.timestamps.begin.value, "Full Kernel Name",
|
||||
full_kernel_name.c_str(), "Agent ID", device_id, "Queue ID",
|
||||
profiler_record.queue_id.handle, "GRD",
|
||||
profiler_record.kernel_properties.grid_size, "WGR",
|
||||
profiler_record.kernel_properties.workgroup_size, "LDS",
|
||||
(((profiler_record.kernel_properties.lds_size + (lds_block_size - 1)) &
|
||||
~(lds_block_size - 1))),
|
||||
"SCR", profiler_record.kernel_properties.scratch_size, "Arch. VGPR",
|
||||
profiler_record.kernel_properties.arch_vgpr_count, "Accumilative Vgpr",
|
||||
profiler_record.kernel_properties.accum_vgpr_count, "SGPR",
|
||||
profiler_record.kernel_properties.sgpr_count, "Wave Size",
|
||||
profiler_record.kernel_properties.wave_size, "Signal",
|
||||
profiler_record.kernel_properties.signal_handle);
|
||||
|
||||
TRACE_EVENT_END("KERNELS", queue_track, profiler_record.timestamps.end.value);
|
||||
|
||||
auto get_counter_track_fn = [&](std::string counter_name) {
|
||||
std ::string counter_track_id =
|
||||
std::to_string(machine_id_) + std::to_string(GetPid()) + counter_name;
|
||||
std::pair<int, std::string> gpu_counter_track_id = std::make_pair(device_id, counter_name);
|
||||
std::unordered_map<std::string, perfetto::CounterTrack>::iterator counters_track_it;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(counter_tracks_lock_);
|
||||
counters_track_it = counter_tracks_.find(gpu_counter_track_id.second);
|
||||
if (counters_track_it == counter_tracks_.end()) {
|
||||
/* Create a new perfetto::Track */
|
||||
counters_track_it =
|
||||
counter_tracks_
|
||||
.emplace(gpu_counter_track_id.second,
|
||||
perfetto::CounterTrack(counter_track_id.c_str(), gpu_track))
|
||||
.first;
|
||||
|
||||
auto counter_track_desc = counters_track_it->second.Serialize();
|
||||
std::string counter_track_str = "Process ID " + std::to_string(GetPid()) + " - Counter " +
|
||||
gpu_counter_track_id.second;
|
||||
counter_track_desc.set_name(counter_track_str);
|
||||
perfetto::TrackEvent::SetTrackDescriptor(counters_track_it->second, counter_track_desc);
|
||||
}
|
||||
}
|
||||
return counters_track_it->second;
|
||||
};
|
||||
|
||||
// For Counters
|
||||
if (profiler_record.counters) {
|
||||
for (uint64_t i = 0; i < profiler_record.counters_count.value; i++) {
|
||||
if (profiler_record.counters[i].counter_handler.handle > 0) {
|
||||
size_t name_length = 0;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_counter_info_size(
|
||||
session_id, ROCPROFILER_COUNTER_NAME, profiler_record.counters[i].counter_handler,
|
||||
&name_length));
|
||||
if (name_length > 1) {
|
||||
const char* name_c = static_cast<const char*>(malloc(name_length * sizeof(char)));
|
||||
CHECK_ROCMTOOLS(
|
||||
rocprofiler_query_counter_info(session_id, ROCPROFILER_COUNTER_NAME,
|
||||
profiler_record.counters[i].counter_handler, &name_c));
|
||||
|
||||
perfetto::CounterTrack counters_track = get_counter_track_fn(std::string(name_c));
|
||||
TRACE_COUNTER("COUNTERS", counters_track, profiler_record.timestamps.begin.value,
|
||||
profiler_record.counters[i].value.value);
|
||||
// Added an extra zero event for maintaining start-end of the counter
|
||||
TRACE_COUNTER("COUNTERS", counters_track, profiler_record.timestamps.end.value, 0.001);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int FlushTracerRecord(rocprofiler_record_tracer_t tracer_record,
|
||||
rocprofiler_session_id_t session_id) {
|
||||
std::lock_guard<std::mutex> lock(writing_lock);
|
||||
if (!tracing_session_) rocmtools::warning("Tracing session is deleted!\n");
|
||||
std::string kernel_name;
|
||||
char* function_name;
|
||||
char* activity_name;
|
||||
std::string roctx_message;
|
||||
uint64_t roctx_id = 0;
|
||||
uint64_t thread_id = tracer_record.thread_id.value;
|
||||
std::unordered_map<uint64_t, perfetto::Track>::iterator thread_track_it;
|
||||
std::unordered_map<int, perfetto::Track>::iterator device_track_it;
|
||||
if (tracer_record.domain == ACTIVITY_DOMAIN_HIP_OPS ||
|
||||
tracer_record.domain == ACTIVITY_DOMAIN_HSA_OPS) {
|
||||
int device_id = tracer_record.agent_id.handle;
|
||||
if (tracer_record.domain == ACTIVITY_DOMAIN_HIP_OPS && device_id > 0) device_id--;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(device_tracks_lock_);
|
||||
device_track_it = device_tracks_.find(device_id);
|
||||
if (device_track_it == device_tracks_.end()) {
|
||||
/* Create a new perfetto::Track (Sub-Track) */
|
||||
device_track_it =
|
||||
device_tracks_
|
||||
.emplace(device_id,
|
||||
perfetto::ProcessTrack::Global(((device_id + 1) * machine_id_)))
|
||||
.first;
|
||||
auto gpu_desc = device_track_it->second.Serialize();
|
||||
gpu_desc.mutable_process()->set_pid(device_id);
|
||||
std::string gpu_str = rocmtools::string_printf("Node: %s Device:", hostname_);
|
||||
gpu_desc.mutable_process()->set_process_name(gpu_str);
|
||||
perfetto::TrackEvent::SetTrackDescriptor(device_track_it->second, gpu_desc);
|
||||
track_ids_used_.emplace_back(1 + machine_id_ + device_id);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
std::lock_guard<std::mutex> lock(thread_tracks_lock_);
|
||||
thread_track_it = thread_tracks_.find(thread_id);
|
||||
if (thread_track_it == thread_tracks_.end()) {
|
||||
uint64_t track_id =
|
||||
track_counter_.fetch_add((1 + machine_id_) * GetPid(), std::memory_order_acquire);
|
||||
for (uint64_t tid : track_ids_used_) {
|
||||
while (track_id == tid) {
|
||||
track_id =
|
||||
track_counter_.fetch_add((1 + machine_id_) * GetPid(), std::memory_order_acquire);
|
||||
}
|
||||
}
|
||||
thread_track_it =
|
||||
thread_tracks_.emplace(thread_id, perfetto::ProcessTrack::Global(track_id)).first;
|
||||
auto thread_track_desc = thread_track_it->second.Serialize();
|
||||
std::string thread_track_str =
|
||||
rocmtools::string_printf("Node: %s Process ID: %lu Thread ID:", hostname_, GetPid());
|
||||
thread_track_desc.mutable_process()->set_pid(thread_id);
|
||||
thread_track_desc.mutable_process()->set_process_name(thread_track_str);
|
||||
perfetto::TrackEvent::SetTrackDescriptor(thread_track_it->second, thread_track_desc);
|
||||
}
|
||||
}
|
||||
auto& thread_track = thread_track_it->second;
|
||||
auto& gpu_track = device_track_it->second;
|
||||
switch (tracer_record.domain) {
|
||||
case ACTIVITY_DOMAIN_ROCTX: {
|
||||
std::unordered_map<uint64_t, perfetto::Track>::iterator roctx_track_it;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(roctx_tracks_lock_);
|
||||
roctx_track_it = roctx_tracks_.find(thread_id);
|
||||
if (roctx_track_it == roctx_tracks_.end()) {
|
||||
/* Create a new perfetto::Track */
|
||||
uint64_t track_id =
|
||||
track_counter_.fetch_add((1 + machine_id_) * GetPid(), std::memory_order_acquire);
|
||||
for (uint64_t tid : track_ids_used_) {
|
||||
while (track_id == tid) {
|
||||
track_id = track_counter_.fetch_add((1 + machine_id_) * GetPid(),
|
||||
std::memory_order_acquire);
|
||||
}
|
||||
}
|
||||
roctx_track_it =
|
||||
roctx_tracks_.emplace(thread_id, perfetto::Track(track_id, thread_track)).first;
|
||||
|
||||
auto roctx_track_desc = roctx_track_it->second.Serialize();
|
||||
std::string roctx_track_str = rocmtools::string_printf("ROCTX Markers");
|
||||
roctx_track_desc.set_name(roctx_track_str);
|
||||
perfetto::TrackEvent::SetTrackDescriptor(roctx_track_it->second, roctx_track_desc);
|
||||
}
|
||||
}
|
||||
auto& roctx_track = roctx_track_it->second;
|
||||
|
||||
size_t roctx_message_size = 0;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_roctx_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_ROCTX_MESSAGE, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &roctx_message_size));
|
||||
if (roctx_message_size > 1) {
|
||||
char* roctx_message_str = static_cast<char*>(malloc(roctx_message_size * sizeof(char)));
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_roctx_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_ROCTX_MESSAGE, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &roctx_message_str));
|
||||
if (roctx_message_str)
|
||||
roctx_message = rocmtools::cxx_demangle(std::string(strdup(roctx_message_str)));
|
||||
}
|
||||
size_t roctx_id_size = 0;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_roctx_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_ROCTX_ID, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &roctx_id_size));
|
||||
if (roctx_id_size > 1) {
|
||||
char* roctx_id_str = static_cast<char*>(malloc(roctx_id_size * sizeof(char)));
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_roctx_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_ROCTX_ID, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &roctx_id_str));
|
||||
if (roctx_id_str) {
|
||||
roctx_id = std::stoll(std::string(strdup(roctx_id_str)));
|
||||
free(roctx_id_str);
|
||||
}
|
||||
}
|
||||
|
||||
if (tracer_record.operation_id.id == 1) {
|
||||
perfetto::StaticString roctx_message_pft(
|
||||
(!roctx_message.empty() ? roctx_message.c_str() : ""));
|
||||
TRACE_EVENT_BEGIN("ROCTX_API", roctx_message_pft, roctx_track,
|
||||
tracer_record.timestamps.begin.value, "Timestamp(ns)",
|
||||
tracer_record.timestamps.begin.value, "RocTx ID", roctx_id);
|
||||
roctx_track_entries_++;
|
||||
} else {
|
||||
TRACE_EVENT_END("ROCTX_API", roctx_track, tracer_record.timestamps.begin.value);
|
||||
roctx_track_entries_--;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HSA_API: {
|
||||
std::unordered_map<uint64_t, perfetto::Track>::iterator hsa_track_it;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(hsa_tracks_lock_);
|
||||
hsa_track_it = hsa_tracks_.find(thread_id);
|
||||
if (hsa_track_it == hsa_tracks_.end()) {
|
||||
/* Create a new perfetto::Track */
|
||||
uint64_t track_id =
|
||||
track_counter_.fetch_add((1 + machine_id_) * GetPid(), std::memory_order_acquire);
|
||||
for (uint64_t tid : track_ids_used_) {
|
||||
while (track_id == tid) {
|
||||
track_id = track_counter_.fetch_add((1 + machine_id_) * GetPid(),
|
||||
std::memory_order_acquire);
|
||||
}
|
||||
}
|
||||
hsa_track_it =
|
||||
hsa_tracks_.emplace(thread_id, perfetto::Track(track_id, thread_track)).first;
|
||||
auto hsa_track_desc = hsa_track_it->second.Serialize();
|
||||
std::string hsa_track_str = rocmtools::string_printf("HSA API");
|
||||
hsa_track_desc.set_name(hsa_track_str);
|
||||
perfetto::TrackEvent::SetTrackDescriptor(hsa_track_it->second, hsa_track_desc);
|
||||
}
|
||||
}
|
||||
auto& hsa_track = hsa_track_it->second;
|
||||
size_t function_name_size = 0;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hsa_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_HSA_FUNCTION_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &function_name_size));
|
||||
if (function_name_size > 1) {
|
||||
function_name = static_cast<char*>(malloc(function_name_size * sizeof(char)));
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hsa_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_HSA_FUNCTION_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &function_name));
|
||||
}
|
||||
TRACE_EVENT_BEGIN("HSA_API", perfetto::StaticString(function_name), hsa_track,
|
||||
tracer_record.timestamps.begin.value,
|
||||
perfetto::Flow::ProcessScoped(tracer_record.correlation_id.value));
|
||||
TRACE_EVENT_END("HSA_API", hsa_track, tracer_record.timestamps.end.value);
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HIP_API: {
|
||||
std::unordered_map<uint64_t, perfetto::Track>::iterator hip_track_it;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(hip_tracks_lock_);
|
||||
hip_track_it = hip_tracks_.find(thread_id);
|
||||
if (hip_track_it == hip_tracks_.end()) {
|
||||
/* Create a new perfetto::Track */
|
||||
uint64_t track_id =
|
||||
track_counter_.fetch_add((1 + machine_id_) * GetPid(), std::memory_order_acquire);
|
||||
for (uint64_t tid : track_ids_used_) {
|
||||
while (track_id == tid) {
|
||||
track_id = track_counter_.fetch_add((1 + machine_id_) * GetPid(),
|
||||
std::memory_order_acquire);
|
||||
}
|
||||
}
|
||||
hip_track_it =
|
||||
hip_tracks_.emplace(thread_id, perfetto::Track(track_id, thread_track)).first;
|
||||
|
||||
auto hip_track_desc = hip_track_it->second.Serialize();
|
||||
std::string hip_track_str = rocmtools::string_printf("HIP API");
|
||||
hip_track_desc.set_name(hip_track_str);
|
||||
perfetto::TrackEvent::SetTrackDescriptor(hip_track_it->second, hip_track_desc);
|
||||
}
|
||||
}
|
||||
auto& hip_track = hip_track_it->second;
|
||||
size_t function_name_size = 0;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_HIP_FUNCTION_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &function_name_size));
|
||||
if (function_name_size > 1) {
|
||||
function_name = static_cast<char*>(malloc(function_name_size * sizeof(char)));
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_HIP_FUNCTION_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &function_name));
|
||||
}
|
||||
size_t kernel_name_size = 0;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_HIP_KERNEL_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &kernel_name_size));
|
||||
char* kernel_name_str;
|
||||
if (kernel_name_size > 1) {
|
||||
kernel_name_str = static_cast<char*>(malloc(kernel_name_size * sizeof(char)));
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_HIP_KERNEL_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &kernel_name_str));
|
||||
if (kernel_name_str) {
|
||||
kernel_name = rocmtools::cxx_demangle(std::string(kernel_name_str));
|
||||
free(kernel_name_str);
|
||||
}
|
||||
}
|
||||
if (kernel_name.size() > 0) {
|
||||
TRACE_EVENT_BEGIN("HIP_API", perfetto::StaticString(function_name), hip_track,
|
||||
tracer_record.timestamps.begin.value, "Kernel Name", kernel_name,
|
||||
perfetto::Flow::ProcessScoped(tracer_record.correlation_id.value));
|
||||
} else {
|
||||
TRACE_EVENT_BEGIN("HIP_API", perfetto::StaticString(function_name), hip_track,
|
||||
tracer_record.timestamps.begin.value,
|
||||
perfetto::Flow::ProcessScoped(tracer_record.correlation_id.value));
|
||||
}
|
||||
TRACE_EVENT_END("HIP_API", hip_track, tracer_record.timestamps.end.value);
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_EXT_API: {
|
||||
printf("Warning: External API is not supported!\n");
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HIP_OPS: {
|
||||
uint64_t stream_id = 0;
|
||||
size_t stream_id_str_size = 0;
|
||||
char* stream_id_str;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_HIP_STREAM_ID, rocprofiler_tracer_api_data_handle_t{nullptr, 0},
|
||||
rocprofiler_tracer_operation_id_t{(uint32_t)tracer_record.correlation_id.value},
|
||||
&stream_id_str_size));
|
||||
if (stream_id_str_size > 1) {
|
||||
stream_id_str = static_cast<char*>(malloc(stream_id_str_size * sizeof(char)));
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_HIP_STREAM_ID, rocprofiler_tracer_api_data_handle_t{nullptr, 0},
|
||||
rocprofiler_tracer_operation_id_t{(uint32_t)tracer_record.correlation_id.value},
|
||||
&stream_id_str));
|
||||
if (stream_id_str != nullptr) stream_id = std::stoll(stream_id_str);
|
||||
}
|
||||
std::unordered_map<int, perfetto::Track>::iterator stream_track_it;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(stream_tracks_lock_);
|
||||
stream_track_it = stream_tracks_.find(stream_id);
|
||||
if (stream_track_it == stream_tracks_.end()) {
|
||||
/* Create a new perfetto::Track */
|
||||
uint64_t track_id = ((1 + stream_id + tracer_record.agent_id.handle) * machine_id_ *
|
||||
STREAM_CONSTANT * GetPid());
|
||||
stream_track_it =
|
||||
stream_tracks_.emplace(stream_id, perfetto::Track(track_id, gpu_track)).first;
|
||||
|
||||
auto stream_desc = stream_track_it->second.Serialize();
|
||||
std::string stream_str =
|
||||
rocmtools::string_printf("Process ID: %lu Stream %d", GetPid(), stream_id);
|
||||
stream_desc.set_name(stream_str);
|
||||
perfetto::TrackEvent::SetTrackDescriptor(stream_track_it->second, stream_desc);
|
||||
track_ids_used_.emplace_back(1 + machine_id_ + tracer_record.agent_id.handle);
|
||||
}
|
||||
}
|
||||
auto& stream_track = stream_track_it->second;
|
||||
if (tracer_record.api_data_handle.handle && tracer_record.api_data_handle.size > 1) {
|
||||
kernel_name = rocmtools::cxx_demangle(
|
||||
strdup(reinterpret_cast<const char*>(tracer_record.api_data_handle.handle)));
|
||||
TRACE_EVENT_BEGIN(
|
||||
"HIP_OPS",
|
||||
perfetto::StaticString(strdup(rocmtools::truncate_name(kernel_name).c_str())),
|
||||
stream_track, tracer_record.timestamps.begin.value, "Agent ID",
|
||||
tracer_record.agent_id.handle, "Process ID", GetPid(), "Kernel Name", kernel_name,
|
||||
perfetto::Flow::ProcessScoped(tracer_record.correlation_id.value));
|
||||
} else {
|
||||
size_t activity_name_size = 0;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_HIP_ACTIVITY_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &activity_name_size));
|
||||
if (activity_name_size > 1) {
|
||||
activity_name = static_cast<char*>(malloc(activity_name_size * sizeof(char)));
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_HIP_ACTIVITY_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &activity_name));
|
||||
} else {
|
||||
activity_name = const_cast<char*>(std::string("N/A").c_str());
|
||||
}
|
||||
TRACE_EVENT_BEGIN("HIP_OPS", perfetto::StaticString(activity_name), stream_track,
|
||||
tracer_record.timestamps.begin.value, "Agent ID",
|
||||
tracer_record.agent_id.handle, "Process ID", GetPid(),
|
||||
perfetto::Flow::ProcessScoped(tracer_record.correlation_id.value));
|
||||
}
|
||||
TRACE_EVENT_END("HIP_OPS", stream_track, tracer_record.timestamps.end.value);
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HSA_OPS: {
|
||||
std::pair<int, uint64_t> gpu_queue_id =
|
||||
std::make_pair(tracer_record.agent_id.handle, tracer_record.queue_id.handle);
|
||||
std::unordered_map<int, perfetto::Track>::iterator queue_track_it;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(queue_tracks_lock_);
|
||||
queue_track_it = queue_tracks_.find(gpu_queue_id.first);
|
||||
if (queue_track_it == queue_tracks_.end()) {
|
||||
/* Create a new perfetto::Track */
|
||||
uint64_t track_id =
|
||||
((1 + tracer_record.queue_id.handle + tracer_record.agent_id.handle) * machine_id_ *
|
||||
QUEUE_CONSTANT * GetPid());
|
||||
queue_track_it =
|
||||
queue_tracks_.emplace(gpu_queue_id.first, perfetto::Track(track_id, gpu_track))
|
||||
.first;
|
||||
|
||||
auto queue_desc = queue_track_it->second.Serialize();
|
||||
std::string queue_str = rocmtools::string_printf("Process ID: %lu Queue %ld", GetPid(),
|
||||
gpu_queue_id.second);
|
||||
queue_desc.set_name(queue_str);
|
||||
perfetto::TrackEvent::SetTrackDescriptor(queue_track_it->second, queue_desc);
|
||||
}
|
||||
track_ids_used_.emplace_back(tracer_record.queue_id.handle + machine_id_ + 1 +
|
||||
tracer_record.agent_id.handle);
|
||||
}
|
||||
auto& queue_track = queue_track_it->second;
|
||||
size_t activity_name_size = 0;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hsa_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_HSA_ACTIVITY_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &activity_name_size));
|
||||
if (activity_name_size > 1) {
|
||||
activity_name = static_cast<char*>(malloc(activity_name_size * sizeof(char)));
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hsa_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_HSA_ACTIVITY_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &activity_name));
|
||||
}
|
||||
TRACE_EVENT_BEGIN("HSA_OPS", perfetto::StaticString(activity_name), queue_track,
|
||||
tracer_record.timestamps.begin.value, "Agent ID",
|
||||
tracer_record.agent_id.handle, "Queue ID", tracer_record.queue_id.handle,
|
||||
"Process ID", GetPid(),
|
||||
perfetto::Flow::ProcessScoped(tracer_record.correlation_id.value));
|
||||
TRACE_EVENT_END("HSA_OPS", queue_track, tracer_record.timestamps.end.value);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
rocmtools::warning("ignored record for domain %d", tracer_record.domain);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int WriteBufferRecords(const rocprofiler_record_header_t* begin,
|
||||
const rocprofiler_record_header_t* end, rocprofiler_session_id_t session_id,
|
||||
rocprofiler_buffer_id_t buffer_id) {
|
||||
if (!tracing_session_) rocmtools::warning("Tracing session is deleted!\n");
|
||||
while (begin < end) {
|
||||
if (!begin) return 0;
|
||||
switch (begin->kind) {
|
||||
case ROCPROFILER_PROFILER_RECORD: {
|
||||
rocprofiler_record_profiler_t* profiler_record = const_cast<rocprofiler_record_profiler_t*>(
|
||||
reinterpret_cast<const rocprofiler_record_profiler_t*>(begin));
|
||||
FlushProfilerRecord(*profiler_record, session_id);
|
||||
break;
|
||||
}
|
||||
case ROCPROFILER_TRACER_RECORD: {
|
||||
rocprofiler_record_tracer_t* tracer_record = const_cast<rocprofiler_record_tracer_t*>(
|
||||
reinterpret_cast<const rocprofiler_record_tracer_t*>(begin));
|
||||
FlushTracerRecord(*tracer_record, session_id);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
rocprofiler_next_record(begin, &begin, session_id, buffer_id);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool IsValid() const { return is_valid_; }
|
||||
|
||||
private:
|
||||
fs::path output_prefix_;
|
||||
std::unique_ptr<perfetto::TracingSession> tracing_session_;
|
||||
int file_descriptor_;
|
||||
bool is_valid_{false};
|
||||
size_t roctx_track_entries_{0};
|
||||
|
||||
// Correlate stream id(s) with correlation id(s) to identify the stream id of every HIP activity
|
||||
std::unordered_map<uint64_t, uint64_t> stream_ids_;
|
||||
|
||||
// Callback Tracks
|
||||
std::unordered_map<uint64_t, perfetto::Track> thread_tracks_;
|
||||
std::unordered_map<uint64_t, perfetto::Track> roctx_tracks_, hsa_tracks_, hip_tracks_,
|
||||
hip_ext_tracks_;
|
||||
|
||||
// Activity Tracks
|
||||
std::unordered_map<int, perfetto::Track> device_tracks_;
|
||||
std::unordered_map<int, perfetto::Track> queue_tracks_, stream_tracks_;
|
||||
|
||||
std::unordered_map<std::string, perfetto::CounterTrack> counter_tracks_;
|
||||
|
||||
std::atomic<uint64_t> track_counter_{GetPid()};
|
||||
std::vector<uint64_t> track_ids_used_;
|
||||
|
||||
std::mutex stream_ids_lock_, thread_tracks_lock_, roctx_tracks_lock_, hsa_tracks_lock_,
|
||||
hip_tracks_lock_, hip_ext_tracks_lock_, device_tracks_lock_, queue_tracks_lock_,
|
||||
stream_tracks_lock_, counter_tracks_lock_;
|
||||
|
||||
char hostname_[1024];
|
||||
uint64_t machine_id_;
|
||||
|
||||
std::ofstream stream_;
|
||||
};
|
||||
|
||||
perfetto_plugin_t* perfetto_plugin = nullptr;
|
||||
|
||||
} // namespace
|
||||
|
||||
int rocprofiler_plugin_initialize(uint32_t rocprofiler_major_version,
|
||||
uint32_t rocprofiler_minor_version) {
|
||||
if (rocprofiler_major_version != ROCPROFILER_VERSION_MAJOR ||
|
||||
rocprofiler_minor_version > ROCPROFILER_VERSION_MINOR)
|
||||
return -1;
|
||||
|
||||
if (perfetto_plugin != nullptr) return -1;
|
||||
|
||||
perfetto_plugin = new perfetto_plugin_t();
|
||||
if (perfetto_plugin->IsValid()) return 0;
|
||||
|
||||
delete perfetto_plugin;
|
||||
perfetto_plugin = nullptr;
|
||||
return -1;
|
||||
}
|
||||
|
||||
void rocprofiler_plugin_finalize() {
|
||||
if (!perfetto_plugin) return;
|
||||
delete perfetto_plugin;
|
||||
perfetto_plugin = nullptr;
|
||||
}
|
||||
|
||||
ROCPROFILER_EXPORT int rocprofiler_plugin_write_buffer_records(const rocprofiler_record_header_t* begin,
|
||||
const rocprofiler_record_header_t* end,
|
||||
rocprofiler_session_id_t session_id,
|
||||
rocprofiler_buffer_id_t buffer_id) {
|
||||
if (!perfetto_plugin || !perfetto_plugin->IsValid()) return -1;
|
||||
return perfetto_plugin->WriteBufferRecords(begin, end, session_id, buffer_id);
|
||||
}
|
||||
|
||||
ROCPROFILER_EXPORT int rocprofiler_plugin_write_record(rocprofiler_record_tracer_t record,
|
||||
rocprofiler_session_id_t session_id) {
|
||||
if (!perfetto_plugin || !perfetto_plugin->IsValid()) return -1;
|
||||
if (record.header.id.handle == 0) return 0;
|
||||
perfetto_plugin->FlushTracerRecord(record, session_id);
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,189 @@
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
Copyright (c) 2017, The Android Open Source Project
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
|
||||
@@ -0,0 +1,35 @@
|
||||
# Global OWNERS that can approve Perfetto changes.
|
||||
# Please look first at OWNERS in the various subdirectories before falling back
|
||||
# on this, as the former tend to be more brain-cache-hot.
|
||||
|
||||
# Perfetto tracing internals and API/ABI boundaries.
|
||||
primiano@google.com
|
||||
skyostil@google.com
|
||||
|
||||
# UI, Ftrace interop, traced_probes, protozero, Android internals.
|
||||
hjd@google.com
|
||||
|
||||
# Trace Processor, metrics, infra.
|
||||
lalitm@google.com
|
||||
|
||||
# Callstack / memory profilers, traced_probes & Linux internals.
|
||||
ddiproietto@google.com
|
||||
rsavitski@google.com
|
||||
|
||||
# Chromium-related things and tracing SDK.
|
||||
eseckler@google.com
|
||||
nuskos@google.com
|
||||
oysteine@google.com
|
||||
|
||||
# Most Android-related metrics.
|
||||
ilkos@google.com
|
||||
|
||||
# fmayer@ left the team. Please try first rsavitski@, ddiproietto@ or primiano@
|
||||
# and leave fmayer@ as an emergency-only escalation on profilers.
|
||||
fmayer@google.com
|
||||
|
||||
# chromium.org aliases for adding DEPS entries from chromium subprojects to
|
||||
# third_party/perfetto.
|
||||
eseckler@chromium.org
|
||||
nuskos@chromium.org
|
||||
skyostil@chromium.org
|
||||
@@ -0,0 +1,394 @@
|
||||
# Tracing SDK
|
||||
|
||||
The Perfetto Tracing SDK is a C++11 library that allows userspace applications
|
||||
to emit trace events and add more app-specific context to a Perfetto trace.
|
||||
|
||||
When using the Tracing SDK there are two main aspects to consider:
|
||||
|
||||
1. Whether you are interested only in tracing events coming from your own app
|
||||
or want to collect full-stack traces that overlay app trace events with
|
||||
system trace events like scheduler traces, syscalls or any other Perfetto
|
||||
data source.
|
||||
|
||||
2. For app-specific tracing, whether you need to trace simple types of timeline
|
||||
events (e.g., slices, counters) or need to define complex data sources with a
|
||||
custom strongly-typed schema (e.g., for dumping the state of a subsystem of
|
||||
your app into the trace).
|
||||
|
||||
For Android-only instrumentation, the advice is to keep using the existing
|
||||
[android.os.Trace (SDK)][atrace-sdk] / [ATrace_* (NDK)][atrace-ndk] if they
|
||||
are sufficient for your use cases. Atrace-based instrumentation is fully
|
||||
supported in Perfetto.
|
||||
See the [Data Sources -> Android System -> Atrace Instrumentation][atrace-ds]
|
||||
for details.
|
||||
|
||||
## Getting started
|
||||
|
||||
TIP: The code from these examples is also available [in the
|
||||
repository](/examples/sdk/README.md).
|
||||
|
||||
To start using the Client API, first check out the latest SDK release:
|
||||
|
||||
```bash
|
||||
git clone https://android.googlesource.com/platform/external/perfetto -b v23.0
|
||||
```
|
||||
|
||||
The SDK consists of two files, `sdk/perfetto.h` and `sdk/perfetto.cc`. These are
|
||||
an amalgamation of the Client API designed to easy to integrate to existing
|
||||
build systems. The sources are self-contained and require only a C++11 compliant
|
||||
standard library.
|
||||
|
||||
For example, to add the SDK to a CMake project, edit your CMakeLists.txt:
|
||||
|
||||
```cmake
|
||||
cmake_minimum_required(VERSION 3.13)
|
||||
project(PerfettoExample)
|
||||
find_package(Threads)
|
||||
|
||||
# Define a static library for Perfetto.
|
||||
include_directories(perfetto/sdk)
|
||||
add_library(perfetto STATIC perfetto/sdk/perfetto.cc)
|
||||
|
||||
# Link the library to your main executable.
|
||||
add_executable(example example.cc)
|
||||
target_link_libraries(example perfetto ${CMAKE_THREAD_LIBS_INIT})
|
||||
```
|
||||
|
||||
Next, initialize Perfetto in your program:
|
||||
|
||||
```C++
|
||||
#include <perfetto.h>
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
perfetto::TracingInitArgs args;
|
||||
|
||||
// The backends determine where trace events are recorded. You may select one
|
||||
// or more of:
|
||||
|
||||
// 1) The in-process backend only records within the app itself.
|
||||
args.backends |= perfetto::kInProcessBackend;
|
||||
|
||||
// 2) The system backend writes events into a system Perfetto daemon,
|
||||
// allowing merging app and system events (e.g., ftrace) on the same
|
||||
// timeline. Requires the Perfetto `traced` daemon to be running (e.g.,
|
||||
// on Android Pie and newer).
|
||||
args.backends |= perfetto::kSystemBackend;
|
||||
|
||||
perfetto::Tracing::Initialize(args);
|
||||
}
|
||||
```
|
||||
|
||||
You are now ready to instrument your app with trace events.
|
||||
|
||||
## Custom data sources vs Track events
|
||||
|
||||
The SDK offers two abstraction layers to inject tracing data, built on top of
|
||||
each other, which trade off code complexity vs expressive power:
|
||||
[track events](#track-events) and [custom data sources](#custom-data-sources).
|
||||
|
||||
### Track events
|
||||
|
||||
Track events are the suggested option when dealing with app-specific tracing as
|
||||
they take care of a number of subtleties (e.g., thread safety, flushing, string
|
||||
interning).
|
||||
Track events are time bounded events (e.g., slices, counter) based on simple
|
||||
`TRACE_EVENT` annotation tags in the codebase, like this:
|
||||
|
||||
```c++
|
||||
#include <perfetto.h>
|
||||
|
||||
PERFETTO_DEFINE_CATEGORIES(
|
||||
perfetto::Category("rendering")
|
||||
.SetDescription("Events from the graphics subsystem"),
|
||||
perfetto::Category("network")
|
||||
.SetDescription("Network upload and download statistics"));
|
||||
|
||||
...
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
...
|
||||
perfetto::Tracing::Initialize(args);
|
||||
perfetto::TrackEvent::Register();
|
||||
}
|
||||
|
||||
...
|
||||
|
||||
void LayerTreeHost::DoUpdateLayers() {
|
||||
TRACE_EVENT("rendering", "LayerTreeHost::DoUpdateLayers");
|
||||
...
|
||||
for (PictureLayer& pl : layers) {
|
||||
TRACE_EVENT("rendering", "PictureLayer::Update");
|
||||
pl.Update();
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Which are rendered in the UI as follows:
|
||||
|
||||

|
||||
|
||||
Track events are the best default option and serve most tracing use cases with
|
||||
very little complexity.
|
||||
|
||||
To include your new track events in the trace, ensure that the `track_event`
|
||||
data source is included in the trace config. If you do not specify any
|
||||
categories then all non-debug categories will be included by default. However,
|
||||
you can also add just the categories you are interested in like so:
|
||||
|
||||
```protobuf
|
||||
data_sources {
|
||||
config {
|
||||
name: "track_event"
|
||||
track_event_config {
|
||||
enabled_categories: "rendering"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
See the [Track events page](track-events.md) for full instructions.
|
||||
|
||||
### Custom data sources
|
||||
|
||||
For most uses, track events are the most straightforward way of instrumenting
|
||||
apps for tracing. However, in some rare circumstances they are not
|
||||
flexible enough, e.g., when the data doesn't fit the notion of a track or is
|
||||
high volume enough that it needs a strongly typed schema to minimize the size of
|
||||
each event. In this case, you can implement a *custom data source* for
|
||||
Perfetto.
|
||||
|
||||
Unlike track events, when working with custom data sources, you will also need
|
||||
corresponding changes in [trace processor](/docs/analysis/trace-processor.md)
|
||||
to enable importing your data format.
|
||||
|
||||
A custom data source is a subclass of `perfetto::DataSource`. Perfetto will
|
||||
automatically create one instance of the class for each tracing session it is
|
||||
active in (usually just one).
|
||||
|
||||
```C++
|
||||
class CustomDataSource : public perfetto::DataSource<CustomDataSource> {
|
||||
public:
|
||||
void OnSetup(const SetupArgs&) override {
|
||||
// Use this callback to apply any custom configuration to your data source
|
||||
// based on the TraceConfig in SetupArgs.
|
||||
}
|
||||
|
||||
void OnStart(const StartArgs&) override {
|
||||
// This notification can be used to initialize the GPU driver, enable
|
||||
// counters, etc. StartArgs will contains the DataSourceDescriptor,
|
||||
// which can be extended.
|
||||
}
|
||||
|
||||
void OnStop(const StopArgs&) override {
|
||||
// Undo any initialization done in OnStart.
|
||||
}
|
||||
|
||||
// Data sources can also have per-instance state.
|
||||
int my_custom_state = 0;
|
||||
};
|
||||
|
||||
PERFETTO_DECLARE_DATA_SOURCE_STATIC_MEMBERS(CustomDataSource);
|
||||
```
|
||||
|
||||
The data source's static data should be defined in one source file like this:
|
||||
|
||||
```C++
|
||||
PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(CustomDataSource);
|
||||
```
|
||||
|
||||
Custom data sources need to be registered with Perfetto:
|
||||
|
||||
```C++
|
||||
int main(int argc, char** argv) {
|
||||
...
|
||||
perfetto::Tracing::Initialize(args);
|
||||
// Add the following:
|
||||
perfetto::DataSourceDescriptor dsd;
|
||||
dsd.set_name("com.example.custom_data_source");
|
||||
CustomDataSource::Register(dsd);
|
||||
}
|
||||
```
|
||||
|
||||
As with all data sources, the custom data source needs to be specified in the
|
||||
trace config to enable tracing:
|
||||
|
||||
```C++
|
||||
perfetto::TraceConfig cfg;
|
||||
auto* ds_cfg = cfg.add_data_sources()->mutable_config();
|
||||
ds_cfg->set_name("com.example.custom_data_source");
|
||||
```
|
||||
|
||||
Finally, call the `Trace()` method to record an event with your custom data
|
||||
source. The lambda function passed to that method will only be called if tracing
|
||||
is enabled. It is always called synchronously and possibly multiple times if
|
||||
multiple concurrent tracing sessions are active.
|
||||
|
||||
```C++
|
||||
CustomDataSource::Trace([](CustomDataSource::TraceContext ctx) {
|
||||
auto packet = ctx.NewTracePacket();
|
||||
packet->set_timestamp(perfetto::TrackEvent::GetTraceTimeNs());
|
||||
packet->set_for_testing()->set_str("Hello world!");
|
||||
});
|
||||
```
|
||||
|
||||
If necessary the `Trace()` method can access the custom data source state
|
||||
(`my_custom_state` in the example above). Doing so, will take a mutex to
|
||||
ensure data source isn't destroyed (e.g., because of stopping tracing) while
|
||||
the `Trace()` method is called on another thread. For example:
|
||||
|
||||
```C++
|
||||
CustomDataSource::Trace([](CustomDataSource::TraceContext ctx) {
|
||||
auto safe_handle = trace_args.GetDataSourceLocked(); // Holds a RAII lock.
|
||||
DoSomethingWith(safe_handle->my_custom_state);
|
||||
});
|
||||
```
|
||||
|
||||
## In-process vs System mode
|
||||
|
||||
The two modes are not mutually exclusive. An app can be configured to work
|
||||
in both modes and respond both to in-process tracing requests and system
|
||||
tracing requests. Both modes generate the same trace file format.
|
||||
|
||||
### In-process mode
|
||||
|
||||
In this mode both the perfetto service and the app-defined data sources are
|
||||
hosted fully in-process, in the same process of the profiled app. No connection
|
||||
to the system `traced` daemon will be attempted.
|
||||
|
||||
In-process mode can be enabled by setting
|
||||
`TracingInitArgs.backends = perfetto::kInProcessBackend` when initializing the
|
||||
SDK, see examples below.
|
||||
|
||||
This mode is used to generate traces that contain only events emitted by
|
||||
the app, but not other types of events (e.g. scheduler traces).
|
||||
|
||||
The main advantage is that by running fully in-process, it doesn't require any
|
||||
special OS privileges and the profiled process can control the lifecycle of
|
||||
tracing sessions.
|
||||
|
||||
This mode is supported on Android, Linux, MacOS and Windows.
|
||||
|
||||
### System mode
|
||||
|
||||
In this mode the app-defined data sources will connect to the external `traced`
|
||||
service using the [IPC over UNIX socket][ipc].
|
||||
|
||||
System mode can be enabled by setting
|
||||
`TracingInitArgs.backends = perfetto::kSystemBackend` when initializing the SDK,
|
||||
see examples below.
|
||||
|
||||
The main advantage of this mode is that it is possible to create fused traces where
|
||||
app events are overlaid on the same timeline of OS events. This enables
|
||||
full-stack performance investigations, looking all the way through syscalls and
|
||||
kernel scheduling events.
|
||||
|
||||
The main limitation of this mode is that it requires the external `traced` daemon
|
||||
to be up and running and reachable through the UNIX socket connection.
|
||||
|
||||
This is suggested for local debugging or lab testing scenarios where the user
|
||||
(or the test harness) can control the OS deployment (e.g., sideload binaries on
|
||||
Android).
|
||||
|
||||
When using system mode, the tracing session must be controlled from the outside,
|
||||
using the `perfetto` command-line client
|
||||
(See [reference](/docs/reference/perfetto-cli)). This is because when collecting
|
||||
system traces, tracing data producers are not allowed to read back the trace
|
||||
data as it might disclose information about other processes and allow
|
||||
side-channel attacks.
|
||||
|
||||
* On Android 9 (Pie) and beyond, traced is shipped as part of the platform.
|
||||
* On older versions of Android, traced can be built from sources using the
|
||||
the [standalone NDK-based workflow](/docs/contributing/build-instructions.md)
|
||||
and sideloaded via adb shell.
|
||||
* On Linux and MacOS `traced` must be built and run separately. See the
|
||||
[Linux quickstart](/docs/quickstart/linux-tracing.md) for instructions.
|
||||
|
||||
_System mode is not yet supported on Windows, due to the lack of an IPC
|
||||
implementation_.
|
||||
|
||||
## {#recording} Recording traces through the API
|
||||
|
||||
_Tracing through the API is currently only supported with the in-process mode.
|
||||
When using system mode, use the `perfetto` cmdline client (see quickstart
|
||||
guides)._
|
||||
|
||||
First initialize a [TraceConfig](/docs/reference/trace-config-proto.autogen)
|
||||
message which specifies what type of data to record.
|
||||
|
||||
If your app includes [track events](track-events.md) (i.e, `TRACE_EVENT`), you
|
||||
typically want to choose the categories which are enabled for tracing.
|
||||
|
||||
By default, all non-debug categories are enabled, but you can enable a specific
|
||||
one like this:
|
||||
|
||||
```C++
|
||||
perfetto::protos::gen::TrackEventConfig track_event_cfg;
|
||||
track_event_cfg.add_disabled_categories("*");
|
||||
track_event_cfg.add_enabled_categories("rendering");
|
||||
```
|
||||
|
||||
Next, build the main trace config together with the track event part:
|
||||
|
||||
```C++
|
||||
perfetto::TraceConfig cfg;
|
||||
cfg.add_buffers()->set_size_kb(1024); // Record up to 1 MiB.
|
||||
auto* ds_cfg = cfg.add_data_sources()->mutable_config();
|
||||
ds_cfg->set_name("track_event");
|
||||
ds_cfg->set_track_event_config_raw(track_event_cfg.SerializeAsString());
|
||||
```
|
||||
|
||||
If your app includes a custom data source, you can also enable it here:
|
||||
|
||||
```C++
|
||||
ds_cfg = cfg.add_data_sources()->mutable_config();
|
||||
ds_cfg->set_name("my_data_source");
|
||||
```
|
||||
|
||||
After building the trace config, you can begin tracing:
|
||||
|
||||
```C++
|
||||
std::unique_ptr<perfetto::TracingSession> tracing_session(
|
||||
perfetto::Tracing::NewTrace());
|
||||
tracing_session->Setup(cfg);
|
||||
tracing_session->StartBlocking();
|
||||
```
|
||||
|
||||
TIP: API methods with `Blocking` in their name will suspend the calling thread
|
||||
until the respective operation is complete. There are also asynchronous
|
||||
variants that don't have this limitation.
|
||||
|
||||
Now that tracing is active, instruct your app to perform the operation you
|
||||
want to record. After that, stop tracing and collect the
|
||||
protobuf-formatted trace data:
|
||||
|
||||
```C++
|
||||
tracing_session->StopBlocking();
|
||||
std::vector<char> trace_data(tracing_session->ReadTraceBlocking());
|
||||
|
||||
// Write the trace into a file.
|
||||
std::ofstream output;
|
||||
output.open("example.perfetto-trace", std::ios::out | std::ios::binary);
|
||||
output.write(&trace_data[0], trace_data.size());
|
||||
output.close();
|
||||
```
|
||||
|
||||
To save memory with longer traces, you can also tell Perfetto to write
|
||||
directly into a file by passing a file descriptor into Setup(), remembering
|
||||
to close the file after tracing is done:
|
||||
|
||||
```C++
|
||||
int fd = open("example.perfetto-trace", O_RDWR | O_CREAT | O_TRUNC, 0600);
|
||||
tracing_session->Setup(cfg, fd);
|
||||
tracing_session->StartBlocking();
|
||||
// ...
|
||||
tracing_session->StopBlocking();
|
||||
close(fd);
|
||||
```
|
||||
|
||||
The resulting trace file can be directly opened in the [Perfetto
|
||||
UI](https://ui.perfetto.dev) or the [Trace Processor](/docs/analysis/trace-processor.md).
|
||||
|
||||
[ipc]: /docs/design-docs/api-and-abi.md#socket-protocol
|
||||
[atrace-ds]: /docs/data-sources/atrace.md
|
||||
[atrace-ndk]: https://developer.android.com/ndk/reference/group/tracing
|
||||
[atrace-sdk]: https://developer.android.com/reference/android/os/Trace
|
||||
Filskillnaden har hållits tillbaka eftersom den är för stor
Load Diff
Filskillnaden har hållits tillbaka eftersom den är för stor
Load Diff
@@ -0,0 +1,63 @@
|
||||
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cxxabi.h>
|
||||
#include <fcntl.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <sys/types.h>
|
||||
#include <systemd/sd-id128.h>
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <string>
|
||||
|
||||
#include "src/utils/helper.h"
|
||||
|
||||
// Macro to check ROCMTools calls status
|
||||
#define CHECK_ROCMTOOLS(call) \
|
||||
do { \
|
||||
if ((call) != ROCPROFILER_STATUS_SUCCESS) rocmtools::fatal("Error: ROCMTools API Call Error!"); \
|
||||
} while (false)
|
||||
|
||||
namespace {
|
||||
|
||||
[[maybe_unused]] uint32_t GetPid() {
|
||||
static uint32_t pid = syscall(__NR_getpid);
|
||||
return pid;
|
||||
}
|
||||
|
||||
[[maybe_unused]] uint64_t GetMachineID() {
|
||||
char hostname[1023] = "\0";
|
||||
gethostname(hostname, 1023);
|
||||
sd_id128_t ret;
|
||||
char machine_id[SD_ID128_STRING_MAX];
|
||||
[[maybe_unused]] int status = sd_id128_get_machine(&ret);
|
||||
assert(status == 0 && "Error: Couldn't get machine id!");
|
||||
if (sd_id128_to_string(ret, machine_id)) return std::hash<std::string>{}(machine_id);
|
||||
return std::rand();
|
||||
}
|
||||
|
||||
} // namespace
|
||||
Executable
+247
@@ -0,0 +1,247 @@
|
||||
#!/bin/bash
|
||||
|
||||
ROCPROFV2_DIR=$(dirname -- $(realpath ${BASH_SOURCE[0]}));
|
||||
ROCM_DIR=$(dirname -- ${ROCPROFV2_DIR})
|
||||
RUN_FROM_BUILD=0
|
||||
if [[ $ROCPROFV2_DIR == *"/build"* ]]; then
|
||||
RUN_FROM_BUILD=1
|
||||
elif [[ $ROCPROFV2_DIR == *"/rocmtools"* ]]; then
|
||||
RUN_FROM_BUILD=1
|
||||
ROCM_DIR=$ROCPROFV2_DIR
|
||||
fi
|
||||
|
||||
usage() {
|
||||
echo -e "ROCMTools Run Script Usage:"
|
||||
echo -e "-h | --help For showing this message"
|
||||
echo -e "--list-counters For showing all available counters for the current GPUs"
|
||||
if [ $RUN_FROM_BUILD == 1 ]; then
|
||||
echo -e "-b | --build For compiling"
|
||||
echo -e "-cb | --clean-build For full clean build"
|
||||
echo -e "-t | --test For Running the tests"
|
||||
echo -e "-ct | --clean-build-test For Running the tests after a clean build"
|
||||
echo -e "-mt | --mem-test For Running the Memory Leak tests. This run requires building using -acb | --asan-clean-build option"
|
||||
echo -e "-acb | --asan-clean-build For compiling with ASAN library attached"
|
||||
echo -e "--install For installing rocmtools without clean build in the default installation folder (review build.sh to know more about the default paths)"
|
||||
echo -e "--clean-install For installing rocmtools with new clean build in the default installation folder (review build.sh to know more about the default paths)"
|
||||
fi
|
||||
echo -e "--hip-api For Collecting HIP API Traces"
|
||||
echo -e "--hip-activity For Collecting HSA API Activities Traces"
|
||||
echo -e "--hsa-api For Collecting HIP API Traces"
|
||||
echo -e "--hsa-activity For Collecting HSA API Activities Traces"
|
||||
echo -e "--roctx-trace For Collecting ROCTx Traces"
|
||||
echo -e "--kernel-trace For Collecting Kernel dispatch Traces"
|
||||
echo -e "--sys-trace For Collecting HIP and HSA APIs and their Activities Traces along ROCTX and Kernel Dispatch traces"
|
||||
echo -e "--plugin PLUGIN_NAME For enabling a plugin (file/perfetto)"
|
||||
echo -e "-i | --input For adding counters file path (every line in the text file represents a counter)"
|
||||
echo -e "-o | --output-file For the output file name"
|
||||
echo -e "-d | --output-directory For adding output path where the output files will be saved"
|
||||
echo -e "-fi | --flush-interval For adding a flush interval in milliseconds, every \"flush interval\" the buffers will be flushed"
|
||||
exit 1
|
||||
}
|
||||
|
||||
if [ -z "$1" ] ; then
|
||||
usage
|
||||
exit 1
|
||||
fi
|
||||
|
||||
while [ 1 ] ; do
|
||||
if [[ "$1" = "-h" || "$1" = "--help" ]] ; then
|
||||
usage
|
||||
exit 1
|
||||
elif [[ "$1" = "-b" || "$1" = "--build" ]] ; then
|
||||
if [ $RUN_FROM_BUILD == 1 ]; then
|
||||
TO_CLEAN=no ./build.sh
|
||||
exit 1
|
||||
fi
|
||||
elif [[ "$1" = "-acb" || "$1" = "--asan-clean-build" ]] ; then
|
||||
if [ $RUN_FROM_BUILD == 1 ]; then
|
||||
ASAN=yes TO_CLEAN=yes ./build.sh
|
||||
exit 1
|
||||
fi
|
||||
elif [[ "$1" = "-cb" || "$1" = "--clean-build" ]] ; then
|
||||
if [ $RUN_FROM_BUILD == 1 ]; then
|
||||
TO_CLEAN=yes ./build.sh
|
||||
exit 1
|
||||
fi
|
||||
elif [[ "$1" = "-t" || "$1" = "--test" ]] ; then
|
||||
if [ $RUN_FROM_BUILD == 1 ]; then
|
||||
export ROCPROFILER_METRICS_PATH=$ROCM_DIR/build/counters/derived_counters.xml
|
||||
TO_CLEAN=no $ROCM_DIR/build.sh
|
||||
pushd build
|
||||
./run_tests.sh
|
||||
exit 1
|
||||
fi
|
||||
elif [[ "$1" = "-mt" || "$1" = "--mem-test" ]] ; then
|
||||
if [ $RUN_FROM_BUILD == 1 ]; then
|
||||
ASAN=yes TO_CLEAN=yes ./build.sh
|
||||
./tests/memorytests/run_asan_tests.sh $ROCM_DIR/build/tests/featuretests/profiler/gtests/apps/hip_vectoradd $ROCM_DIR/build/memleaks.log
|
||||
exit 1
|
||||
fi
|
||||
elif [[ "$1" = "-ct" || "$1" = "--clean-build-test" ]] ; then
|
||||
if [ $RUN_FROM_BUILD == 1 ]; then
|
||||
TO_CLEAN=yes $ROCM_DIR/build.sh
|
||||
pushd build
|
||||
./run_tests.sh
|
||||
exit 1
|
||||
fi
|
||||
elif [[ "$1" = "--install" ]] ; then
|
||||
if [ $RUN_FROM_BUILD == 1 ]; then
|
||||
TO_CLEAN=no $ROCM_DIR/build.sh
|
||||
pushd build
|
||||
make install
|
||||
exit 1
|
||||
fi
|
||||
elif [[ "$1" = "--clean-install" ]] ; then
|
||||
if [ $RUN_FROM_BUILD == 1 ]; then
|
||||
TO_CLEAN=yes $ROCM_DIR/build.sh
|
||||
pushd build
|
||||
make install
|
||||
exit 1
|
||||
fi
|
||||
elif [[ "$1" = "--list-counters" ]] ; then
|
||||
if [ $RUN_FROM_BUILD == 1 ]; then
|
||||
export ROCPROFILER_METRICS_PATH=$ROCM_DIR/build/counters/derived_counters.xml
|
||||
eval $ROCM_DIR/build/src/tools/ctrl
|
||||
else
|
||||
export ROCPROFILER_METRICS_PATH=$ROCPROFV2_DIR/../libexec/rocmtools/counters/derived_counters.xml
|
||||
export LD_LIBRARY_PATH=$ROCPROFV2_DIR/../lib:$LD_LIBRARY_PATH
|
||||
export LD_PRELOAD=$ROCPROFV2_DIR/../lib/librocprofiler_tool.so
|
||||
eval $ROCPROFV2_DIR/../libexec/rocmtools/ctrl
|
||||
fi
|
||||
exit 1
|
||||
elif [[ "$1" = "-i" || "$1" = "--input" ]] ; then
|
||||
if [ $2 ] && [ -n $2 ] && [ -r $2 ] ; then
|
||||
if [ $RUN_FROM_BUILD == 1 ]; then
|
||||
export ROCPROFILER_METRICS_PATH=$ROCM_DIR/build/counters/derived_counters.xml
|
||||
else
|
||||
export ROCPROFILER_METRICS_PATH=$ROCPROFV2_DIR/../libexec/rocmtools/counters/derived_counters.xml
|
||||
fi
|
||||
export COUNTERS_PATH=$2
|
||||
else
|
||||
echo -e "Error: \"$2\" doesn't exist!"
|
||||
usage
|
||||
exit 1
|
||||
fi
|
||||
shift
|
||||
shift
|
||||
elif [[ "$1" = "-o" || "$1" = "--output-file-name" ]] ; then
|
||||
if [ $2 ] ; then
|
||||
export OUT_FILE_NAME=$2
|
||||
else
|
||||
usage
|
||||
exit 1
|
||||
fi
|
||||
shift
|
||||
shift
|
||||
elif [[ "$1" = "-d" || "$1" = "--output-directory" ]] ; then
|
||||
if [ $2 ] ; then
|
||||
mkdir -p $2
|
||||
export OUTPUT_PATH=$2
|
||||
OUTPUT_PATH_INTERNAL=$2
|
||||
else
|
||||
usage
|
||||
exit 1
|
||||
fi
|
||||
shift
|
||||
shift
|
||||
elif [[ "$1" = "-fi" || "$1" = "--flush-interval" ]] ; then
|
||||
if [ $2 ] && [ $2 -gt 0 ] ; then
|
||||
export ROCPROFILER_FLUSH_INTERVAL=$2
|
||||
else
|
||||
echo -e "Wrong input \"$2\" for flush interval, it needs to be integer greater than zero!"
|
||||
usage
|
||||
exit 1
|
||||
fi
|
||||
shift
|
||||
shift
|
||||
elif [ "$1" = "--hip-api" ] ; then
|
||||
export ROCPROFILER_HIP_API_TRACE=1
|
||||
shift
|
||||
elif [ "$1" = "--hip-activity" ] ; then
|
||||
export ROCPROFILER_HIP_API_TRACE=1
|
||||
export ROCPROFILER_HIP_ACTIVITY_TRACE=1
|
||||
shift
|
||||
elif [ "$1" = "--hsa-api" ] ; then
|
||||
export ROCPROFILER_HSA_API_TRACE=1
|
||||
shift
|
||||
elif [ "$1" = "--hsa-activity" ] ; then
|
||||
export ROCPROFILER_HSA_API_TRACE=1
|
||||
export ROCPROFILER_HSA_ACTIVITY_TRACE=1
|
||||
shift
|
||||
elif [ "$1" = "--roctx-trace" ] ; then
|
||||
export ROCPROFILER_ROCTX_TRACE=1
|
||||
shift
|
||||
elif [ "$1" = "--kernel-trace" ] ; then
|
||||
export ROCPROFILER_KERNEL_TRACE=1
|
||||
shift
|
||||
elif [ "$1" = "--sys-trace" ] ; then
|
||||
export ROCPROFILER_HIP_API_TRACE=1
|
||||
export ROCPROFILER_HIP_ACTIVITY_TRACE=1
|
||||
export ROCPROFILER_HSA_API_TRACE=1
|
||||
export ROCPROFILER_HSA_ACTIVITY_TRACE=1
|
||||
export ROCPROFILER_ROCTX_TRACE=1
|
||||
export ROCPROFILER_KERNEL_TRACE=1
|
||||
shift
|
||||
elif [ "$1" = "--amd-sys" ] ; then
|
||||
export ROCPROFILER_ENABLE_AMDSYS=$2
|
||||
shift
|
||||
shift
|
||||
elif [ "$1" = "--plugin" ] ; then
|
||||
if [ -n $2 ] ; then
|
||||
PLUGIN=$2
|
||||
if [ $RUN_FROM_BUILD == 1 ]; then
|
||||
export ROCPROFILER_PLUGIN_LIB=lib${PLUGIN}_plugin.so
|
||||
else
|
||||
export ROCPROFILER_PLUGIN_LIB=rocmtools/lib${PLUGIN}_plugin.so
|
||||
fi
|
||||
else
|
||||
echo -e "Wrong input \"$2\" for plugin!"
|
||||
usage
|
||||
exit 1
|
||||
fi
|
||||
shift
|
||||
shift
|
||||
elif [[ "$1" = "-"* || "$1" = "--"* ]] ; then
|
||||
echo -e "Wrong option \"$1\", Please use the following options:\n"
|
||||
usage
|
||||
exit 1
|
||||
else
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
PMC_LINES=()
|
||||
if [ -n "$COUNTERS_PATH" ]; then
|
||||
input=$COUNTERS_PATH
|
||||
while IFS= read -r line || [[ -n "$line" ]]; do
|
||||
PMC_LINES+=( "$line" )
|
||||
done < $input
|
||||
fi
|
||||
|
||||
if [ -n "$PMC_LINES" ]; then
|
||||
COUNTER=1
|
||||
for i in ${!PMC_LINES[@]}; do
|
||||
export ROCPROFILER_COUNTERS="${PMC_LINES[$i]}"
|
||||
if [ -n "$OUTPUT_PATH" ]; then
|
||||
FINAL_PATH="$OUTPUT_PATH_INTERNAL/pmc_$COUNTER"
|
||||
echo -e "\nThe output path for the following counters: $FINAL_PATH"
|
||||
mkdir -p $FINAL_PATH
|
||||
echo $ROCPROFILER_COUNTERS > $FINAL_PATH/pmc.txt
|
||||
export OUTPUT_PATH=$FINAL_PATH
|
||||
let COUNTER=COUNTER+1
|
||||
fi
|
||||
if [ $RUN_FROM_BUILD == 1 ]; then
|
||||
LD_PRELOAD=$LD_PRELOAD:$ROCM_DIR/build/librocprofiler_tool.so $*
|
||||
else
|
||||
LD_PRELOAD=$LD_PRELOAD:$ROCM_DIR/lib/librocprofiler_tool.so $*
|
||||
fi
|
||||
done
|
||||
else
|
||||
if [ $RUN_FROM_BUILD == 1 ]; then
|
||||
LD_PRELOAD=$LD_PRELOAD:$ROCM_DIR/build/librocprofiler_tool.so $*
|
||||
else
|
||||
LD_PRELOAD=$LD_PRELOAD:$ROCM_DIR/lib/librocprofiler_tool.so $*
|
||||
fi
|
||||
fi
|
||||
|
||||
|
||||
@@ -20,43 +20,14 @@
|
||||
# THE SOFTWARE.
|
||||
################################################################################
|
||||
|
||||
## Build is not supported on Windows plaform
|
||||
if ( WIN32 )
|
||||
message ( FATAL_ERROR "Windows build is not supported." )
|
||||
endif ()
|
||||
|
||||
## Compiler Preprocessor definitions.
|
||||
add_definitions ( -D__linux__ )
|
||||
add_definitions ( -DUNIX_OS )
|
||||
add_definitions ( -DLINUX )
|
||||
add_definitions ( -D__AMD64__ )
|
||||
add_definitions ( -D__x86_64__ )
|
||||
add_definitions ( -DLITTLEENDIAN_CPU=1 )
|
||||
add_definitions ( -DHSA_LARGE_MODEL= )
|
||||
add_definitions ( -DHSA_DEPRECATED= )
|
||||
|
||||
## Linux Compiler options
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17")
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall" )
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror" )
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror=return-type" )
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexceptions" )
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden" )
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-math-errno" )
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-threadsafe-statics" )
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fmerge-all-constants" )
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fms-extensions" )
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fmerge-all-constants" )
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror=unused-result" )
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC" )
|
||||
|
||||
add_link_options ("-Bdynamic -z,neexecstack")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fms-extensions")
|
||||
|
||||
add_definitions ( -DNEW_TRACE_API=1 )
|
||||
|
||||
## CLANG options
|
||||
if ( "$ENV{CXX}" STREQUAL "/usr/bin/clang++" )
|
||||
set ( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ferror-limit=1000000" )
|
||||
if("$ENV{CXX}" STREQUAL "/usr/bin/clang++")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ferror-limit=1000000")
|
||||
endif()
|
||||
|
||||
## Enable debug trace
|
||||
|
||||
Filskillnaden har hållits tillbaka eftersom den är för stor
Load Diff
@@ -0,0 +1,92 @@
|
||||
# rocmtools
|
||||
|
||||
|
||||
|
||||
## Getting started
|
||||
|
||||
To make it easy for you to get started with GitLab, here's a list of recommended next steps.
|
||||
|
||||
Already a pro? Just edit this README.md and make it your own. Want to make it easy? [Use the template at the bottom](#editing-this-readme)!
|
||||
|
||||
## Add your files
|
||||
|
||||
- [ ] [Create](https://docs.gitlab.com/ee/user/project/repository/web_editor.html#create-a-file) or [upload](https://docs.gitlab.com/ee/user/project/repository/web_editor.html#upload-a-file) files
|
||||
- [ ] [Add files using the command line](https://docs.gitlab.com/ee/gitlab-basics/add-file.html#add-a-file-using-the-command-line) or push an existing Git repository with the following command:
|
||||
|
||||
```
|
||||
cd existing_repo
|
||||
git remote add origin http://gitlab1.amd.com/vinodtipparaju/rocmtools.git
|
||||
git branch -M main
|
||||
git push -uf origin main
|
||||
```
|
||||
|
||||
## Integrate with your tools
|
||||
|
||||
- [ ] [Set up project integrations](http://gitlab1.amd.com/vinodtipparaju/rocmtools/-/settings/integrations)
|
||||
|
||||
## Collaborate with your team
|
||||
|
||||
- [ ] [Invite team members and collaborators](https://docs.gitlab.com/ee/user/project/members/)
|
||||
- [ ] [Create a new merge request](https://docs.gitlab.com/ee/user/project/merge_requests/creating_merge_requests.html)
|
||||
- [ ] [Automatically close issues from merge requests](https://docs.gitlab.com/ee/user/project/issues/managing_issues.html#closing-issues-automatically)
|
||||
- [ ] [Enable merge request approvals](https://docs.gitlab.com/ee/user/project/merge_requests/approvals/)
|
||||
- [ ] [Automatically merge when pipeline succeeds](https://docs.gitlab.com/ee/user/project/merge_requests/merge_when_pipeline_succeeds.html)
|
||||
|
||||
## Test and Deploy
|
||||
|
||||
Use the built-in continuous integration in GitLab.
|
||||
|
||||
- [ ] [Get started with GitLab CI/CD](https://docs.gitlab.com/ee/ci/quick_start/index.html)
|
||||
- [ ] [Analyze your code for known vulnerabilities with Static Application Security Testing(SAST)](https://docs.gitlab.com/ee/user/application_security/sast/)
|
||||
- [ ] [Deploy to Kubernetes, Amazon EC2, or Amazon ECS using Auto Deploy](https://docs.gitlab.com/ee/topics/autodevops/requirements.html)
|
||||
- [ ] [Use pull-based deployments for improved Kubernetes management](https://docs.gitlab.com/ee/user/clusters/agent/)
|
||||
- [ ] [Set up protected environments](https://docs.gitlab.com/ee/ci/environments/protected_environments.html)
|
||||
|
||||
***
|
||||
|
||||
# Editing this README
|
||||
|
||||
When you're ready to make this README your own, just edit this file and use the handy template below (or feel free to structure it however you want - this is just a starting point!). Thank you to [makeareadme.com](https://www.makeareadme.com/) for this template.
|
||||
|
||||
## Suggestions for a good README
|
||||
Every project is different, so consider which of these sections apply to yours. The sections used in the template are suggestions for most open source projects. Also keep in mind that while a README can be too long and detailed, too long is better than too short. If you think your README is too long, consider utilizing another form of documentation rather than cutting out information.
|
||||
|
||||
## Name
|
||||
Choose a self-explaining name for your project.
|
||||
|
||||
## Description
|
||||
Let people know what your project can do specifically. Provide context and add a link to any reference visitors might be unfamiliar with. A list of Features or a Background subsection can also be added here. If there are alternatives to your project, this is a good place to list differentiating factors.
|
||||
|
||||
## Badges
|
||||
On some READMEs, you may see small images that convey metadata, such as whether or not all the tests are passing for the project. You can use Shields to add some to your README. Many services also have instructions for adding a badge.
|
||||
|
||||
## Visuals
|
||||
Depending on what you are making, it can be a good idea to include screenshots or even a video (you'll frequently see GIFs rather than actual videos). Tools like ttygif can help, but check out Asciinema for a more sophisticated method.
|
||||
|
||||
## Installation
|
||||
Within a particular ecosystem, there may be a common way of installing things, such as using Yarn, NuGet, or Homebrew. However, consider the possibility that whoever is reading your README is a novice and would like more guidance. Listing specific steps helps remove ambiguity and gets people to using your project as quickly as possible. If it only runs in a specific context like a particular programming language version or operating system or has dependencies that have to be installed manually, also add a Requirements subsection.
|
||||
|
||||
## Usage
|
||||
Use examples liberally, and show the expected output if you can. It's helpful to have inline the smallest example of usage that you can demonstrate, while providing links to more sophisticated examples if they are too long to reasonably include in the README.
|
||||
|
||||
## Support
|
||||
Tell people where they can go to for help. It can be any combination of an issue tracker, a chat room, an email address, etc.
|
||||
|
||||
## Roadmap
|
||||
If you have ideas for releases in the future, it is a good idea to list them in the README.
|
||||
|
||||
## Contributing
|
||||
State if you are open to contributions and what your requirements are for accepting them.
|
||||
|
||||
For people who want to make changes to your project, it's helpful to have some documentation on how to get started. Perhaps there is a script that they should run or some environment variables that they need to set. Make these steps explicit. These instructions could also be useful to your future self.
|
||||
|
||||
You can also document commands to lint the code or run tests. These steps help to ensure high code quality and reduce the likelihood that the changes inadvertently break something. Having instructions for running tests is especially helpful if it requires external setup, such as starting a Selenium server for testing in a browser.
|
||||
|
||||
## Authors and acknowledgment
|
||||
Show your appreciation to those who have contributed to the project.
|
||||
|
||||
## License
|
||||
For open source projects, say how it is licensed.
|
||||
|
||||
## Project status
|
||||
If you have run out of energy or time for your project, put a note at the top of the README saying that development has slowed down or stopped completely. Someone may choose to fork your project or volunteer to step in as a maintainer or owner, allowing your project to keep going. You can also make an explicit request for maintainers.
|
||||
Binary file not shown.
Filskillnaden har hållits tillbaka eftersom den är för stor
Load Diff
@@ -0,0 +1,135 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
/** \section rocprofiler_plugin_api ROCMTools Plugin API
|
||||
*
|
||||
* The ROCMTools Plugin API is used by the ROCMTools Tool to output all
|
||||
* profiling information. Different implementations of the ROCMTools Plugin
|
||||
* API can be developed that output the data in different formats. The
|
||||
* ROCMTools Tool can be configured to load a specific library that supports
|
||||
* the user desired format.
|
||||
*
|
||||
* The API is not thread safe. It is the responsibility of the ROCMTools Tool
|
||||
* to ensure the operations are synchronized and not called concurrently. There
|
||||
* is no requirement for the ROCMTools Tool to report trace data in any
|
||||
* specific order. If the format supported by plugin requires specific
|
||||
* ordering, it is the responsibility of the plugin implementation to perform
|
||||
* any necessary sorting.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file
|
||||
* ROCMTools Tool Plugin API interface.
|
||||
*/
|
||||
|
||||
#ifndef ROCPROFILER_PLUGIN_H_
|
||||
#define ROCPROFILER_PLUGIN_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "rocprofiler.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif /* __cplusplus */
|
||||
|
||||
/** \defgroup rocprofiler_plugins ROCMTools Plugin API Specification
|
||||
* @{
|
||||
*/
|
||||
|
||||
/** \defgroup initialization_group Initialization and Finalization
|
||||
* \ingroup rocprofiler_plugins
|
||||
*
|
||||
* The ROCMTools Plugin API must be initialized before using any of the
|
||||
* operations to report trace data, and finalized after the last trace data has
|
||||
* been reported.
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* Initialize plugin.
|
||||
* Must be called before any other operation.
|
||||
*
|
||||
* @param[in] rocprofiler_major_version The major version of the ROCMTools API
|
||||
* being used by the ROCMTools Tool. An error is reported if this does not
|
||||
* match the major version of the ROCMTools API used to build the plugin
|
||||
* library. This ensures compatibility of the trace data format.
|
||||
* @param[in] rocprofiler_minor_version The minor version of the ROCMTools API
|
||||
* being used by the ROCMTools Tool. An error is reported if the
|
||||
* \p ROCMTools_major_version matches and this is greater than the minor
|
||||
* version of the ROCMTools API used to build the plugin library. This ensures
|
||||
* compatibility of the trace data format.
|
||||
* @return Returns 0 on success and -1 on error.
|
||||
*/
|
||||
ROCPROFILER_EXPORT int rocprofiler_plugin_initialize(uint32_t rocprofiler_major_version,
|
||||
uint32_t rocprofiler_minor_version);
|
||||
|
||||
/**
|
||||
* Finalize plugin.
|
||||
* This must be called after ::rocprofiler_plugin_initialize and after all
|
||||
* profiling data has been reported by
|
||||
* ::rocprofiler_plugin_write_kernel_records
|
||||
*/
|
||||
ROCPROFILER_EXPORT void rocprofiler_plugin_finalize();
|
||||
|
||||
/** @} */
|
||||
|
||||
/** \defgroup profiling_record_write_functions Profiling data reporting
|
||||
* \ingroup rocprofiler_plugins
|
||||
* Operations to output profiling data.
|
||||
* @{
|
||||
*/
|
||||
|
||||
// TODO(aelwazir): Recheck wording of the description
|
||||
|
||||
/**
|
||||
* Report Buffer Records.
|
||||
*
|
||||
* @param[in] begin Pointer to the first record.
|
||||
* @param[in] end Pointer to one past the last record.
|
||||
* @param[in] session_id Session ID
|
||||
* @param[in] buffer_id Buffer ID
|
||||
* @return Returns 0 on success and -1 on error.
|
||||
*/
|
||||
ROCPROFILER_EXPORT int rocprofiler_plugin_write_buffer_records(const rocprofiler_record_header_t* begin,
|
||||
const rocprofiler_record_header_t* end,
|
||||
rocprofiler_session_id_t session_id,
|
||||
rocprofiler_buffer_id_t buffer_id);
|
||||
|
||||
/**
|
||||
* Report Synchronous Record.
|
||||
*
|
||||
* @param[in] record Pointer to the Synchronous Tracer record.
|
||||
* @param[in] session_id Session ID
|
||||
* @return Returns 0 on success and -1 on error.
|
||||
*/
|
||||
ROCPROFILER_EXPORT int rocprofiler_plugin_write_record(rocprofiler_record_tracer_t record,
|
||||
rocprofiler_session_id_t session_id);
|
||||
|
||||
/** @} */
|
||||
|
||||
/** @} */
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#endif /* ROCPROFILER_PLUGIN_H_ */
|
||||
@@ -0,0 +1,25 @@
|
||||
################################################################################
|
||||
## Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
##
|
||||
## Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
## of this software and associated documentation files (the "Software"), to
|
||||
## deal in the Software without restriction, including without limitation the
|
||||
## rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
## sell copies of the Software, and to permit persons to whom the Software is
|
||||
## furnished to do so, subject to the following conditions:
|
||||
##
|
||||
## The above copyright notice and this permission notice shall be included in
|
||||
## all copies or substantial portions of the Software.
|
||||
##
|
||||
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
## FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
## IN THE SOFTWARE.
|
||||
################################################################################
|
||||
|
||||
add_subdirectory(file)
|
||||
add_subdirectory(perfetto)
|
||||
add_subdirectory(ctf)
|
||||
@@ -0,0 +1 @@
|
||||
README.html
|
||||
@@ -0,0 +1,161 @@
|
||||
################################################################################
|
||||
## Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
##
|
||||
## Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
## of this software and associated documentation files (the "Software"), to
|
||||
## deal in the Software without restriction, including without limitation the
|
||||
## rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
## sell copies of the Software, and to permit persons to whom the Software is
|
||||
## furnished to do so, subject to the following conditions:
|
||||
##
|
||||
## The above copyright notice and this permission notice shall be included in
|
||||
## all copies or substantial portions of the Software.
|
||||
##
|
||||
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
## FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
## IN THE SOFTWARE.
|
||||
################################################################################
|
||||
|
||||
# Plugin shared object.
|
||||
add_library(ctf_plugin SHARED
|
||||
ctf.cpp
|
||||
plugin.cpp
|
||||
barectf.c "${CMAKE_CURRENT_BINARY_DIR}/barectf.h"
|
||||
${PROJECT_SOURCE_DIR}/src/utils/helper.cpp
|
||||
hsa_begin.cpp.i hsa_end.cpp.i
|
||||
hip_begin.cpp.i hip_end.cpp.i)
|
||||
set_target_properties(ctf_plugin PROPERTIES
|
||||
CXX_VISIBILITY_PRESET hidden
|
||||
LINK_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/../exportmap"
|
||||
LIBRARY_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}")
|
||||
set(METADATA_STREAM_FILE_DIR "${CMAKE_INSTALL_DATADIR}/${PROJECT_NAME}/plugin/ctf")
|
||||
target_compile_definitions(ctf_plugin PRIVATE
|
||||
HIP_PROF_HIP_API_STRING=1
|
||||
__HIP_PLATFORM_HCC__=1
|
||||
CTF_PLUGIN_METADATA_FILE_PATH="${CMAKE_INSTALL_PREFIX}/${METADATA_STREAM_FILE_DIR}/metadata")
|
||||
target_include_directories(ctf_plugin PRIVATE
|
||||
"${PROJECT_SOURCE_DIR}/inc"
|
||||
"${PROJECT_SOURCE_DIR}"
|
||||
"${CMAKE_BINARY_DIR}/src/api"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}")
|
||||
target_link_options(ctf_plugin PRIVATE
|
||||
"-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../exportmap"
|
||||
-Wl,--no-undefined)
|
||||
target_link_libraries(ctf_plugin PRIVATE
|
||||
${ROCPROFILER_TARGET}
|
||||
hsa-runtime64::hsa-runtime64
|
||||
systemd
|
||||
stdc++fs
|
||||
dl)
|
||||
install(TARGETS ctf_plugin LIBRARY
|
||||
DESTINATION "${CMAKE_INSTALL_LIBDIR}/${PROJECT_NAME}"
|
||||
COMPONENT plugins)
|
||||
|
||||
# `gen_api_files.py` and `gen_env_yaml.py` require Python 3,
|
||||
# CppHeaderParser, PyYAML, and barectf.
|
||||
find_package(Python3 COMPONENTS Interpreter REQUIRED)
|
||||
|
||||
message("Python: ${Python3_EXECUTABLE})")
|
||||
|
||||
execute_process(COMMAND Python3::Interpreter -c "print('hello')")
|
||||
|
||||
function(check_py3_pkg pkg_name)
|
||||
execute_process(COMMAND "${Python3_EXECUTABLE}" -c "import ${pkg_name}"
|
||||
RESULT_VARIABLE PY3_IMPORT_RES
|
||||
OUTPUT_QUIET)
|
||||
|
||||
if(NOT (${PY3_IMPORT_RES} EQUAL 0))
|
||||
message(FATAL_ERROR "Cannot find Python 3 package `${pkg_name}`")
|
||||
endif()
|
||||
|
||||
message(STATUS "Found Python 3 package `${pkg_name}`")
|
||||
endfunction()
|
||||
|
||||
check_py3_pkg(CppHeaderParser)
|
||||
check_py3_pkg(yaml)
|
||||
find_program(BARECTF_RES barectf REQUIRED)
|
||||
|
||||
# Generate barectf YAML and C++ files for HSA API.
|
||||
get_property(HSA_RUNTIME_INCLUDE_DIRS
|
||||
TARGET hsa-runtime64::hsa-runtime64
|
||||
PROPERTY INTERFACE_INCLUDE_DIRECTORIES)
|
||||
find_file(HSA_H hsa.h
|
||||
PATHS ${HSA_RUNTIME_INCLUDE_DIRS}
|
||||
PATH_SUFFIXES hsa
|
||||
NO_DEFAULT_PATH
|
||||
REQUIRED)
|
||||
get_filename_component(HSA_RUNTIME_INC_PATH "${HSA_H}" DIRECTORY)
|
||||
add_custom_command(
|
||||
OUTPUT hsa_erts.yaml hsa_begin.cpp.i hsa_end.cpp.i
|
||||
COMMAND ${CMAKE_C_COMPILER} -E "${HSA_RUNTIME_INC_PATH}/hsa.h" -o hsa.h.i
|
||||
COMMAND ${CMAKE_C_COMPILER} -E "${HSA_RUNTIME_INC_PATH}/hsa_ext_amd.h"
|
||||
-o hsa_ext_amd.h.i
|
||||
COMMAND ${CMAKE_COMMAND} -E cat hsa.h.i
|
||||
hsa_ext_amd.h.i
|
||||
"${CMAKE_BINARY_DIR}/src/api/hsa_prof_str.h"
|
||||
> hsa_input.h
|
||||
COMMAND "${Python3_EXECUTABLE}" "${CMAKE_CURRENT_SOURCE_DIR}/gen_api_files.py"
|
||||
hsa hsa_input.h
|
||||
BYPRODUCTS hsa.h.i hsa_ext_amd.h.i hsa_input.h
|
||||
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/gen_api_files.py"
|
||||
"${HSA_RUNTIME_INC_PATH}/hsa.h"
|
||||
"${HSA_RUNTIME_INC_PATH}/hsa_ext_amd.h"
|
||||
"${CMAKE_BINARY_DIR}/src/api/hsa_prof_str.h"
|
||||
COMMENT "Generating HSA API files for the `ctf` plugin...")
|
||||
|
||||
# Generate barectf YAML and C++ files for HIP API.
|
||||
get_property(HIP_INCLUDE_DIRS TARGET hip::amdhip64
|
||||
PROPERTY INTERFACE_INCLUDE_DIRECTORIES)
|
||||
find_file(HIP_RUNTIME_API_H hip_runtime_api.h
|
||||
PATHS ${HIP_INCLUDE_DIRS}
|
||||
PATH_SUFFIXES hip
|
||||
NO_DEFAULT_PATH
|
||||
REQUIRED)
|
||||
find_file(HIP_PROF_STR_H hip_prof_str.h
|
||||
PATHS ${HIP_INCLUDE_DIRS}
|
||||
PATH_SUFFIXES hip hip/amd_detail
|
||||
NO_DEFAULT_PATH
|
||||
REQUIRED)
|
||||
list(TRANSFORM HIP_INCLUDE_DIRS PREPEND -I)
|
||||
add_custom_command(
|
||||
OUTPUT hip_erts.yaml hip_begin.cpp.i hip_end.cpp.i
|
||||
COMMAND ${CMAKE_C_COMPILER} ${HIP_INCLUDE_DIRS}
|
||||
-E "${HIP_RUNTIME_API_H}"
|
||||
-D__HIP_PLATFORM_HCC__=1
|
||||
-D__HIP_ROCclr__=1
|
||||
-o hip_runtime_api.h.i
|
||||
COMMAND cat hip_runtime_api.h.i "${HIP_PROF_STR_H}" > hip_input.h
|
||||
BYPRODUCTS hip_runtime_api.h.i hip_input.h
|
||||
COMMAND "${Python3_EXECUTABLE}" "${CMAKE_CURRENT_SOURCE_DIR}/gen_api_files.py"
|
||||
hip hip_input.h
|
||||
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/gen_api_files.py"
|
||||
"${HIP_RUNTIME_API_H}"
|
||||
"${HIP_PROF_STR_H}"
|
||||
COMMENT "Generating HIP API files for the `ctf` plugin...")
|
||||
|
||||
# Generate `env.yaml` (trace environment for barectf).
|
||||
add_custom_command(
|
||||
OUTPUT env.yaml
|
||||
COMMAND "${Python3_EXECUTABLE}" "${CMAKE_CURRENT_SOURCE_DIR}/gen_env_yaml.py"
|
||||
${PROJECT_VERSION}
|
||||
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/gen_env_yaml.py"
|
||||
COMMENT "Generating `env.yaml`...")
|
||||
|
||||
# Generate raw CTF tracer with barectf.
|
||||
add_custom_command(
|
||||
OUTPUT barectf.c barectf.h barectf-bitfield.h metadata
|
||||
COMMAND "${BARECTF_RES}" gen "-I${CMAKE_CURRENT_BINARY_DIR}"
|
||||
"-I${CMAKE_CURRENT_SOURCE_DIR}"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/config.yaml"
|
||||
DEPENDS hsa_erts.yaml
|
||||
hip_erts.yaml
|
||||
env.yaml
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/config.yaml"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/dst_base.yaml"
|
||||
COMMENT "Generating raw CTF tracer with barectf...")
|
||||
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/metadata"
|
||||
DESTINATION "${METADATA_STREAM_FILE_DIR}" COMPONENT plugins)
|
||||
@@ -0,0 +1,260 @@
|
||||
= CTF plugin for ROCMTools
|
||||
13 December 2022
|
||||
Philippe Proulx
|
||||
|
||||
This plugin writes the received ROCMTools tracer and profiler records to
|
||||
a https://diamon.org/ctf/[CTF] trace.
|
||||
|
||||
== Build requirements
|
||||
|
||||
* Python ≥ 3.10
|
||||
* barectf ≥ 3.1.1 (`pip3 install barectf`)
|
||||
* PyYAML (`apt-get install python3-yaml`)
|
||||
* CppHeaderParser (`pip3 install CppHeaderParser`)
|
||||
|
||||
== Usage
|
||||
|
||||
Once installed, you may load this plugin with `rocprofv2` using
|
||||
the `--plugin ctf` command-line arguments.
|
||||
|
||||
This plugin honours the `OUTPUT_PATH` environment variable which
|
||||
`rocprofv2` sets with the `-d` option. If you pass `-d my-dir` to
|
||||
`rocprofv2`, then the plugin will write the CTF trace to the
|
||||
`my-dir/trace` directory.
|
||||
|
||||
IMPORTANT: This plugin performs important cleanup tasks at finalization
|
||||
time, so the resulting CTF trace could be corrupted if the plugin is
|
||||
never finalized.
|
||||
|
||||
Once the plugin is finalized, open the resulting trace directory with
|
||||
either https://babeltrace.org/[Babeltrace{nbsp}2] or
|
||||
https://www.eclipse.org/tracecompass/[Trace Compass] to view or analyze
|
||||
it.
|
||||
|
||||
=== Event record types
|
||||
|
||||
This plugin writes to different CTF data streams having different types.
|
||||
On the file system, the prefix of a data stream file name indicates the
|
||||
data stream type, that is:
|
||||
|
||||
`roctx_`::
|
||||
rocTX messages.
|
||||
+
|
||||
Each CTF event record is named `roctx` and corresponds to a rocTX
|
||||
tracer record.
|
||||
+
|
||||
The fields are:
|
||||
+
|
||||
--
|
||||
[horizontal]
|
||||
`thread_id`::
|
||||
Thread ID.
|
||||
|
||||
`id`::
|
||||
rocTX ID.
|
||||
|
||||
`msg`::
|
||||
rocTX message.
|
||||
--
|
||||
|
||||
`hsa_api_`::
|
||||
HSA API beginning and end function calls.
|
||||
+
|
||||
All CTF event records have the following common fields:
|
||||
+
|
||||
--
|
||||
[horizontal]
|
||||
`thread_id`::
|
||||
Thread ID.
|
||||
|
||||
`queue_id`::
|
||||
Queue ID.
|
||||
|
||||
`agent_id`::
|
||||
Agent ID.
|
||||
|
||||
`correlation_id`::
|
||||
Correlation ID.
|
||||
--
|
||||
+
|
||||
For each ROCMTools HSA API tracer record for the HSA function named
|
||||
`__name__`, this plugin writes two event records:
|
||||
+
|
||||
`__name___begin`:::
|
||||
Beginning of the function call.
|
||||
+
|
||||
The event record contains fields which correspond to most of the
|
||||
parameters of the HSA function.
|
||||
|
||||
`__name___end`:::
|
||||
End of the function call.
|
||||
|
||||
`hip_api_`::
|
||||
HIP API beginning and end function calls.
|
||||
+
|
||||
All CTF event records have the following common fields:
|
||||
+
|
||||
--
|
||||
[horizontal]
|
||||
`thread_id`::
|
||||
Thread ID.
|
||||
|
||||
`queue_id`::
|
||||
Queue ID.
|
||||
|
||||
`agent_id`::
|
||||
Agent ID.
|
||||
|
||||
`correlation_id`::
|
||||
Correlation ID.
|
||||
|
||||
`kernel_name`::
|
||||
Kernel name (empty string if not available).
|
||||
--
|
||||
+
|
||||
For each ROCMTools HIP API tracer record for the HIP function named
|
||||
`__name__`, this plugin writes two event records:
|
||||
+
|
||||
`__name__Begin`:::
|
||||
Beginning of the function call.
|
||||
+
|
||||
The event record contains fields which correspond to most of the
|
||||
parameters of the HIP function.
|
||||
|
||||
`__name__End`:::
|
||||
End of the function call.
|
||||
|
||||
`api_ops_`::
|
||||
HSA/HIP API beginning and end operations.
|
||||
+
|
||||
All CTF event records have the following common fields:
|
||||
+
|
||||
--
|
||||
[horizontal]
|
||||
`thread_id`::
|
||||
Thread ID.
|
||||
|
||||
`queue_id`::
|
||||
Queue ID.
|
||||
|
||||
`agent_id`::
|
||||
Agent ID.
|
||||
|
||||
`correlation_id`::
|
||||
Correlation ID.
|
||||
--
|
||||
+
|
||||
The possible CTF event records are:
|
||||
+
|
||||
`hsa_op_begin`:::
|
||||
HSA API operation beginning.
|
||||
|
||||
`hsa_op_end`:::
|
||||
HSA API operation end.
|
||||
|
||||
`hip_op_begin`:::
|
||||
HIP API operation beginning.
|
||||
+
|
||||
Such an event record also has the field `kernel_name` which is the
|
||||
kernel name (empty string if not available).
|
||||
|
||||
`hip_op_end`:::
|
||||
HIP API operation end.
|
||||
|
||||
`profiler_`::
|
||||
Profiler records.
|
||||
+
|
||||
All CTF event records have the following common fields:
|
||||
+
|
||||
--
|
||||
[horizontal]
|
||||
`dispatch`::
|
||||
Dispatch ID.
|
||||
|
||||
`gpu_id`::
|
||||
GPU ID.
|
||||
|
||||
`queue_id`::
|
||||
Queue ID.
|
||||
|
||||
`queue_index`::
|
||||
Queue index.
|
||||
|
||||
`process_id`::
|
||||
Process ID.
|
||||
|
||||
`thread_id`::
|
||||
Thread ID.
|
||||
|
||||
`kernel_id`::
|
||||
Kernel ID.
|
||||
|
||||
`kernel_name`::
|
||||
Kernel name (empty string if not available).
|
||||
|
||||
`counter_names`::
|
||||
Array of counter names, each one having a corresponding integral
|
||||
value in the `counter_values` field.
|
||||
|
||||
`counter_values`::
|
||||
Array of integers, each one being the value of a counter of which
|
||||
the name is a corresponding string in the `counter_names` field.
|
||||
--
|
||||
+
|
||||
The possible CTF event records are:
|
||||
+
|
||||
`profiler_record`:::
|
||||
Profiler record.
|
||||
|
||||
`profiler_record_with_kernel_properties`:::
|
||||
Profiler record with kernel properties.
|
||||
+
|
||||
Such an event record also has the following fields:
|
||||
+
|
||||
--
|
||||
`grid_size`::
|
||||
Grid size.
|
||||
|
||||
`workgroup_size`::
|
||||
Workgroup size.
|
||||
|
||||
`lds_size`::
|
||||
Local memory size.
|
||||
|
||||
`scratch_size`::
|
||||
Scratch size.
|
||||
|
||||
`arch_vgpr_count`::
|
||||
Architecture vector general purpose register count.
|
||||
|
||||
`accum_vgpr_count`::
|
||||
Accum. vector general purpose register count
|
||||
|
||||
`sgpr_count`::
|
||||
Scalar general purpose register count.
|
||||
|
||||
`wave_size`::
|
||||
Wavefront size.
|
||||
|
||||
`signal_handle`::
|
||||
Signal handle.
|
||||
--
|
||||
|
||||
`hsa_handles_`::
|
||||
HSA handle type mappings.
|
||||
+
|
||||
Each CTF event record is named `hsa_handle_type` and maps an HSA handle
|
||||
to a processor unit type (CPU or GPU).
|
||||
+
|
||||
The clock value of those event records is irrelevant (always{nbsp}0).
|
||||
+
|
||||
The fields are:
|
||||
+
|
||||
--
|
||||
[horizontal]
|
||||
`handle`::
|
||||
HSA handle.
|
||||
|
||||
`type`::
|
||||
Processor unit type (`CPU` or `GPU` enumeration label).
|
||||
--
|
||||
@@ -0,0 +1,67 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef PLUGIN_CTF_BARECTF_EVENT_RECORD_H
|
||||
#define PLUGIN_CTF_BARECTF_EVENT_RECORD_H
|
||||
|
||||
#include <memory>
|
||||
#include <cstdint>
|
||||
|
||||
struct barectf_default_ctx;
|
||||
|
||||
namespace rocm_ctf {
|
||||
|
||||
// Abstract base class of any barectf event record.
|
||||
//
|
||||
// A concrete event record class must implement Write() which must call
|
||||
// a corresponding barectf tracing function.
|
||||
//
|
||||
// `CtxT` is the specific type of the barectf context which Write()
|
||||
// receives.
|
||||
template <typename CtxT> class BarectfEventRecord {
|
||||
protected:
|
||||
// Builds a barectf event record having the clock value `clock_val`.
|
||||
explicit BarectfEventRecord(const std::uint64_t clock_val) noexcept : clock_val_{clock_val} {}
|
||||
|
||||
public:
|
||||
// Shared pointer to const barectf event record.
|
||||
using SP = std::shared_ptr<const BarectfEventRecord>;
|
||||
|
||||
virtual ~BarectfEventRecord() = default;
|
||||
|
||||
// Disabled copy operations to make this class simpler.
|
||||
BarectfEventRecord(const BarectfEventRecord&) = delete;
|
||||
BarectfEventRecord& operator=(const BarectfEventRecord&) = delete;
|
||||
|
||||
// Clock value of this event record.
|
||||
std::uint64_t GetClockVal() const noexcept { return clock_val_; }
|
||||
|
||||
// Calls a corresponding barectf tracing function using the barectf
|
||||
// context `barectf_ctx`.
|
||||
virtual void Write(CtxT& barectf_ctx) const = 0;
|
||||
|
||||
private:
|
||||
// Clock value.
|
||||
std::uint64_t clock_val_;
|
||||
};
|
||||
|
||||
} // namespace rocm_ctf
|
||||
|
||||
#endif // PLUGIN_CTF_BARECTF_EVENT_RECORD_H
|
||||
@@ -0,0 +1,192 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef PLUGIN_CTF_BARECTF_PLATFORM_H
|
||||
#define PLUGIN_CTF_BARECTF_PLATFORM_H
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstdint>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
#include <functional>
|
||||
#include <experimental/filesystem>
|
||||
|
||||
#include "barectf.h"
|
||||
|
||||
namespace rocm_ctf {
|
||||
|
||||
template <typename> class BarectfWriter;
|
||||
|
||||
// A barectf platform for any barectf writer.
|
||||
//
|
||||
// The user doesn't deal directly with such an object: it's closely
|
||||
// coupled with a barectf writer.
|
||||
//
|
||||
// Each platform takes care of a single CTF data stream file.
|
||||
//
|
||||
// After building such a platform, get the raw barectf context with
|
||||
// GetCtx() to call tracing functions. The platform must still exist
|
||||
// when calling a tracing function.
|
||||
//
|
||||
// Such a platform opens the data stream file on construction and closes
|
||||
// it on destruction.
|
||||
//
|
||||
// `DescrT` is the specific barectf platform descriptor. It must be a
|
||||
// structure having:
|
||||
//
|
||||
// `Ctx`:
|
||||
// Specific barectf context type.
|
||||
//
|
||||
// `static void OpenPacket(Ctx&)`:
|
||||
// Packet opening function.
|
||||
//
|
||||
// `static void ClosePacket(Ctx&)`:
|
||||
// Packet closing function.
|
||||
template <typename DescrT> class BarectfPlatform final {
|
||||
friend class BarectfWriter<DescrT>;
|
||||
|
||||
private:
|
||||
// Builds a barectf platform.
|
||||
//
|
||||
// The platform writes CTF packets of size `packet_size` bytes to the
|
||||
// CTF data stream file `data_stream_file_path`.
|
||||
//
|
||||
// For each event record to write, the platform reads `clock_val` to
|
||||
// know the current timestamp.
|
||||
explicit BarectfPlatform(const std::size_t packet_size,
|
||||
const std::experimental::filesystem::path& data_stream_file_path,
|
||||
const std::uint64_t& clock_val)
|
||||
: clock_val_{&clock_val}, buffer_(packet_size) {
|
||||
// Initialize barectf callbacks.
|
||||
barectf_platform_callbacks callbacks;
|
||||
|
||||
callbacks.default_clock_get_value = GetClockCb;
|
||||
callbacks.is_backend_full = IsBackendFullCb;
|
||||
callbacks.open_packet = OpenPacketCb;
|
||||
callbacks.close_packet = ClosePacketCb;
|
||||
|
||||
// Configure exceptions so that stream operations throw instead of
|
||||
// just setting flags on error.
|
||||
output_.exceptions(std::ofstream::failbit | std::ofstream::badbit);
|
||||
|
||||
// Open CTF data stream output file in binary mode.
|
||||
output_.open(data_stream_file_path, std::ios_base::out | std::ios_base::binary);
|
||||
|
||||
// Initialize the raw barectf context.
|
||||
barectf_init(&ctx_, buffer_.data(), buffer_.size(), callbacks, this);
|
||||
|
||||
// Open the initial packet.
|
||||
OpenPacketCb();
|
||||
}
|
||||
|
||||
public:
|
||||
// Disabled copy operations to make this class simpler.
|
||||
BarectfPlatform(const BarectfPlatform&) = delete;
|
||||
BarectfPlatform& operator=(const BarectfPlatform&) = delete;
|
||||
|
||||
// Closes/writes any last CTF packet and closes the data stream file.
|
||||
~BarectfPlatform() {
|
||||
if (barectf_packet_is_open(&ctx_) && !barectf_packet_is_empty(&ctx_)) {
|
||||
// Close and write last CTF packet (not empty).
|
||||
ClosePacketCb();
|
||||
}
|
||||
|
||||
// Close data stream output file.
|
||||
output_.close();
|
||||
}
|
||||
|
||||
// Returns the raw barectf context of this platform.
|
||||
const typename DescrT::Ctx& GetCtx() const noexcept { return ctx_; }
|
||||
typename DescrT::Ctx& GetCtx() noexcept { return ctx_; }
|
||||
|
||||
private:
|
||||
static BarectfPlatform& AsPlatform(void* const data) noexcept {
|
||||
return *static_cast<BarectfPlatform*>(data);
|
||||
}
|
||||
|
||||
// Four callbacks for barectf.
|
||||
//
|
||||
// Those four functions receive an instance of this class as `data`.
|
||||
|
||||
static std::uint64_t GetClockCb(void* const data) noexcept {
|
||||
// Forward to instance method.
|
||||
return AsPlatform(data).GetClockCb();
|
||||
}
|
||||
|
||||
static int IsBackendFullCb(void* const data) noexcept {
|
||||
// Forward to instance method.
|
||||
return AsPlatform(data).IsBackendFullCb();
|
||||
}
|
||||
|
||||
static void OpenPacketCb(void* const data) {
|
||||
// Forward to instance method.
|
||||
AsPlatform(data).OpenPacketCb();
|
||||
}
|
||||
|
||||
static void ClosePacketCb(void* const data) {
|
||||
// Forward to instance method.
|
||||
AsPlatform(data).ClosePacketCb();
|
||||
}
|
||||
|
||||
// Instance version of the "get clock value" callback.
|
||||
std::uint64_t GetClockCb() noexcept { return *clock_val_; }
|
||||
|
||||
// Instance version of the "is the back end full?" callback.
|
||||
int IsBackendFullCb() noexcept {
|
||||
// Never full.
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Instance version of the "open packet" callback.
|
||||
void OpenPacketCb() {
|
||||
// Forward to user (descriptor) function.
|
||||
DescrT::OpenPacket(ctx_);
|
||||
}
|
||||
|
||||
// Instance version of the "close packet" callback.
|
||||
void ClosePacketCb() {
|
||||
// Forward to user (descriptor) function to finalize the packet.
|
||||
DescrT::ClosePacket(ctx_);
|
||||
|
||||
// Write to the data stream file.
|
||||
WriteCurrentPacket();
|
||||
}
|
||||
|
||||
// Writes the current CTF packet (`buffer_`) to the data stream file.
|
||||
void WriteCurrentPacket() {
|
||||
output_.write(reinterpret_cast<const char*>(buffer_.data()), buffer_.size());
|
||||
}
|
||||
|
||||
// Clock value pointer.
|
||||
const std::uint64_t* clock_val_;
|
||||
|
||||
// CTF data stream output file stream.
|
||||
std::ofstream output_;
|
||||
|
||||
// Raw barectf context.
|
||||
typename DescrT::Ctx ctx_;
|
||||
|
||||
// CTF packet buffer.
|
||||
std::vector<std::uint8_t> buffer_;
|
||||
};
|
||||
|
||||
} // namespace rocm_ctf
|
||||
|
||||
#endif // PLUGIN_CTF_BARECTF_PLATFORM_H
|
||||
@@ -0,0 +1,124 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef PLUGIN_CTF_BARECTF_TRACER_H
|
||||
#define PLUGIN_CTF_BARECTF_TRACER_H
|
||||
|
||||
#include <cstdlib>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <experimental/filesystem>
|
||||
|
||||
#include "barectf_event_record.h"
|
||||
#include "barectf_writer.h"
|
||||
|
||||
namespace rocm_ctf {
|
||||
|
||||
// A barectf tracer offers the AddEventRecord() method to add an event
|
||||
// record which it will ultimately write to some CTF data stream file
|
||||
// within some specified CTF trace directory.
|
||||
//
|
||||
// One important feature of such a tracer is that you don't need to add
|
||||
// event records in order of time. A barectf tracer manages one or more
|
||||
// barectf writers, each one managing a single barectf platform/context
|
||||
// (CTF data stream file).
|
||||
//
|
||||
// All the CTF data stream files which a barectf tracer indirectly
|
||||
// manages share a common specified prefix. You must not use the same
|
||||
// prefix for two barectf tracers writing to the same CTF trace
|
||||
// directory.
|
||||
//
|
||||
// `PlatformDescrT` is the specific barectf platform descriptor (see the
|
||||
// documentation of the `BarectfPlatform` class template).
|
||||
template <typename PlatformDescrT> class BarectfTracer final {
|
||||
public:
|
||||
// Specific barectf event record type.
|
||||
using EventRecord = typename BarectfWriter<PlatformDescrT>::EventRecord;
|
||||
|
||||
// Builds a barectf tracer to write CTF packets of size `packet_size`
|
||||
// bytes to CTF data stream files having the prefix
|
||||
// `data_stream_file_name_prefix` within the CTF trace directory
|
||||
// `trace_dir`.
|
||||
//
|
||||
// The internal barectf writers manage event record queues having a
|
||||
// maximum size of `max_writer_queue_size`. Increasing
|
||||
// `max_writer_queue_size` increases the memory footprint of the
|
||||
// tracer, but may reduce the number of required CTF data stream files
|
||||
// to ensure time-ordered event records.
|
||||
explicit BarectfTracer(const std::size_t packet_size,
|
||||
std::experimental::filesystem::path trace_dir,
|
||||
const char* const data_stream_file_name_prefix,
|
||||
const std::size_t max_writer_queue_size = 200)
|
||||
: packet_size_{packet_size},
|
||||
trace_dir_{std::move(trace_dir)},
|
||||
data_stream_file_name_prefix_{data_stream_file_name_prefix},
|
||||
max_writer_queue_size_{max_writer_queue_size} {}
|
||||
|
||||
// Disabled copy operations to make this class simpler.
|
||||
BarectfTracer(const BarectfTracer&) = delete;
|
||||
BarectfTracer& operator=(const BarectfTracer&) = delete;
|
||||
|
||||
// Adds the event record `event_record` to this tracer.
|
||||
//
|
||||
// The clock value of `event_record` may be less than the clock value
|
||||
// of previously added event records.
|
||||
void AddEventRecord(typename EventRecord::SP event_record) {
|
||||
// Try to find a barectf writer to accept `event_record`.
|
||||
for (auto& writer : writers_) {
|
||||
if (writer->MayAddEventRecord(*event_record)) {
|
||||
// Found: add the event record to this writer and return.
|
||||
writer->AddEventRecord(std::move(event_record));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// No barectf writer found: create a new one.
|
||||
std::ostringstream ss;
|
||||
|
||||
ss << data_stream_file_name_prefix_ << writers_.size();
|
||||
writers_.emplace_back(new BarectfWriter<PlatformDescrT>{packet_size_, trace_dir_ / ss.str(),
|
||||
max_writer_queue_size_});
|
||||
|
||||
// Add the event record to this new barectf writer.
|
||||
assert(writers_.back()->MayAddEventRecord(*event_record));
|
||||
writers_.back()->AddEventRecord(std::move(event_record));
|
||||
}
|
||||
|
||||
private:
|
||||
// CTF packet size.
|
||||
std::size_t packet_size_;
|
||||
|
||||
// CTF trace directory.
|
||||
std::experimental::filesystem::path trace_dir_;
|
||||
|
||||
// CTF data stream file name prefix.
|
||||
std::string data_stream_file_name_prefix_;
|
||||
|
||||
// Maximum event record queue size of a barectf writer.
|
||||
std::size_t max_writer_queue_size_;
|
||||
|
||||
// barectf writers.
|
||||
std::vector<std::unique_ptr<BarectfWriter<PlatformDescrT>>> writers_;
|
||||
};
|
||||
|
||||
} // namespace rocm_ctf
|
||||
|
||||
#endif // PLUGIN_CTF_BARECTF_TRACER_H
|
||||
@@ -0,0 +1,178 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef PLUGIN_CTF_BARECTF_WRITER_H
|
||||
#define PLUGIN_CTF_BARECTF_WRITER_H
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdlib>
|
||||
#include <cstdint>
|
||||
#include <cassert>
|
||||
#include <queue>
|
||||
#include <utility>
|
||||
#include <experimental/filesystem>
|
||||
|
||||
#include "barectf_platform.h"
|
||||
#include "barectf_event_record.h"
|
||||
|
||||
namespace rocm_ctf {
|
||||
|
||||
template <typename> class BarectfTracer;
|
||||
|
||||
// A barectf writer manages a queue of event records, writing them
|
||||
// through barectf when needed.
|
||||
//
|
||||
// Such an object makes it possible to add some event record with a
|
||||
// clock value V and then some other event record of which the clock
|
||||
// value is less than V. The barectf writer ensures that actual barectf
|
||||
// tracing functions are called chronologically, a requirement of CTF.
|
||||
//
|
||||
// A barectf writer keeps event records in memory until its queue is
|
||||
// full (you provide the maximum queue size at construction time), in
|
||||
// which case it writes the oldest event record to some current CTF
|
||||
// packet through a barectf tracing function.
|
||||
//
|
||||
// Call MayAddEventRecord() to check whether or not you may add an event
|
||||
// record to the barectf writer, and then AddEventRecord() if you may.
|
||||
//
|
||||
// A barectf writer writes all its remaining event records on
|
||||
// destruction.
|
||||
//
|
||||
// `PlatformDescrT` is the specific barectf platform descriptor (see the
|
||||
// documentation of the `BarectfPlatform` class template).
|
||||
template <typename PlatformDescrT> class BarectfWriter final {
|
||||
friend class BarectfTracer<PlatformDescrT>;
|
||||
|
||||
public:
|
||||
// Specific barectf event record type.
|
||||
using EventRecord = BarectfEventRecord<typename PlatformDescrT::Ctx>;
|
||||
|
||||
private:
|
||||
// Builds a barectf writer to write CTF packets of size `packet_size`
|
||||
// bytes to the CTF data stream file `data_stream_file_path`.
|
||||
//
|
||||
// The built barectf writer manages an event record queue having a
|
||||
// maximum size of `max_queue_size`.
|
||||
explicit BarectfWriter(const std::size_t packet_size,
|
||||
const std::experimental::filesystem::path& data_stream_file_path,
|
||||
const std::size_t max_queue_size)
|
||||
: platform_{packet_size, data_stream_file_path, clock_val_},
|
||||
max_queue_size_{max_queue_size} {}
|
||||
|
||||
public:
|
||||
// Writes all its remaining event records.
|
||||
~BarectfWriter() {
|
||||
// Write all the remaining event records from the oldest to the
|
||||
// newest.
|
||||
while (!queue_.empty()) {
|
||||
WriteOldestEventRecord();
|
||||
}
|
||||
}
|
||||
|
||||
// Disabled copy operations to make this class simpler.
|
||||
BarectfWriter(const BarectfWriter&) = delete;
|
||||
BarectfWriter& operator=(const BarectfWriter&) = delete;
|
||||
|
||||
// Whether or not you may add the event record `event_record` to this
|
||||
// writer with AddEventRecord().
|
||||
bool MayAddEventRecord(const EventRecord& event_record) const noexcept {
|
||||
if (queue_.empty()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// One may only add an event record if its clock value is greater
|
||||
// than or equal to the clock value of the most recently written
|
||||
// event record.
|
||||
return event_record.GetClockVal() >= clock_val_;
|
||||
}
|
||||
|
||||
// Adds the event record `event_record` to this writer.
|
||||
//
|
||||
// `MayAddEventRecord(*event_record)` must return `true`.
|
||||
void AddEventRecord(typename EventRecord::SP event_record) {
|
||||
assert(MayAddEventRecord(*event_record) && "May add event record");
|
||||
|
||||
// Add event record to queue.
|
||||
queue_.emplace(std::move(event_record));
|
||||
|
||||
if (queue_.size() > max_queue_size_) {
|
||||
// Queue is too large: write the oldest event record now to
|
||||
// satisfy the requirement.
|
||||
WriteOldestEventRecord();
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
// Comparison type for `queue_`.
|
||||
struct EventRecordQueueCompare final {
|
||||
bool operator()(const typename EventRecord::SP& left,
|
||||
const typename EventRecord::SP& right) const noexcept {
|
||||
// "Greater than" so that the top element of the queue is the
|
||||
// oldest event record.
|
||||
return left->GetClockVal() > right->GetClockVal();
|
||||
}
|
||||
};
|
||||
|
||||
// Oldest event record within `queue_`.
|
||||
//
|
||||
// `queue_` must not be empty.
|
||||
const EventRecord& GetOldestEventRecord() const noexcept {
|
||||
assert(!queue_.empty() && "Queue isn't empty");
|
||||
return *queue_.top();
|
||||
}
|
||||
|
||||
// Writes the oldest event record through a barectf tracing function
|
||||
// and removes it from the event record queue.
|
||||
void WriteOldestEventRecord() {
|
||||
auto& oldest_event_record = GetOldestEventRecord();
|
||||
|
||||
// When calling a barectf tracing function, it calls the clock value
|
||||
// accessor callback of the platform, which itself reads from
|
||||
// `clock_val_`.
|
||||
clock_val_ = oldest_event_record.GetClockVal();
|
||||
|
||||
// Forward to a barectf tracing function.
|
||||
oldest_event_record.Write(platform_.GetCtx());
|
||||
|
||||
// Remove from queue.
|
||||
queue_.pop();
|
||||
}
|
||||
|
||||
// barectf platform (manages file I/O).
|
||||
BarectfPlatform<PlatformDescrT> platform_;
|
||||
|
||||
// Current clock value for `platform_`.
|
||||
//
|
||||
// This is also the clock value of the most recently written event
|
||||
// record, therefore that MayAddEventRecord() can rely on this.
|
||||
std::uint64_t clock_val_ = 0;
|
||||
|
||||
// Maximum size of `queue_` below.
|
||||
std::size_t max_queue_size_;
|
||||
|
||||
// Event record queue.
|
||||
std::priority_queue<typename EventRecord::SP, std::vector<typename EventRecord::SP>,
|
||||
EventRecordQueueCompare>
|
||||
queue_;
|
||||
};
|
||||
|
||||
} // namespace rocm_ctf
|
||||
|
||||
#endif // PLUGIN_CTF_BARECTF_WRITER_H
|
||||
@@ -0,0 +1,165 @@
|
||||
################################################################################
|
||||
# Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to
|
||||
# deal in the Software without restriction, including without limitation the
|
||||
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
# sell copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
################################################################################
|
||||
|
||||
%YAML 1.2
|
||||
--- !<tag:barectf.org,2020/3/config>
|
||||
trace:
|
||||
$include:
|
||||
# Environment (generated file).
|
||||
- env.yaml
|
||||
type:
|
||||
$include:
|
||||
- stdint.yaml
|
||||
- stdmisc.yaml
|
||||
native-byte-order: little-endian
|
||||
clock-types:
|
||||
default:
|
||||
origin-is-unix-epoch: true
|
||||
$c-type: uint64_t
|
||||
data-stream-types:
|
||||
hsa_api:
|
||||
event-record-common-context-field-type:
|
||||
class: struct
|
||||
members:
|
||||
- _thread_id: uint32
|
||||
- _queue_id: uint32
|
||||
- _agent_id: uint32
|
||||
- _correlation_id: uint64
|
||||
$include:
|
||||
# Base.
|
||||
- dst_base.yaml
|
||||
|
||||
# HSA API event record types (generated file).
|
||||
- hsa_erts.yaml
|
||||
hip_api:
|
||||
event-record-common-context-field-type:
|
||||
class: struct
|
||||
members:
|
||||
- _thread_id: uint32
|
||||
- _queue_id: uint32
|
||||
- _agent_id: uint32
|
||||
- _correlation_id: uint64
|
||||
- _kernel_name: str
|
||||
$include:
|
||||
# Base.
|
||||
- dst_base.yaml
|
||||
|
||||
# HIP API event record types (generated file).
|
||||
- hip_erts.yaml
|
||||
roctx:
|
||||
$include:
|
||||
# Base
|
||||
- dst_base.yaml
|
||||
event-record-common-context-field-type:
|
||||
class: struct
|
||||
members:
|
||||
- _thread_id: uint32
|
||||
event-record-types:
|
||||
roctx:
|
||||
payload-field-type:
|
||||
class: struct
|
||||
members:
|
||||
- _id: sint64
|
||||
- _msg: str
|
||||
hsa_handles:
|
||||
$include:
|
||||
# Base.
|
||||
- dst_base.yaml
|
||||
event-record-types:
|
||||
hsa_handle_type:
|
||||
payload-field-type:
|
||||
class: struct
|
||||
members:
|
||||
- _handle: uint64
|
||||
- _type:
|
||||
field-type:
|
||||
class: uenum
|
||||
size: 8
|
||||
mappings:
|
||||
CPU: [0]
|
||||
GPU: [1]
|
||||
api_ops:
|
||||
$include:
|
||||
# Base.
|
||||
- dst_base.yaml
|
||||
event-record-common-context-field-type:
|
||||
class: struct
|
||||
members:
|
||||
- _thread_id: uint32
|
||||
- _queue_id: uint32
|
||||
- _agent_id: uint32
|
||||
- _correlation_id: uint64
|
||||
event-record-types:
|
||||
hsa_op_begin:
|
||||
payload-field-type:
|
||||
class: struct
|
||||
hsa_op_end:
|
||||
payload-field-type:
|
||||
class: struct
|
||||
hip_op_begin:
|
||||
payload-field-type:
|
||||
class: struct
|
||||
members:
|
||||
- _kernel_name: str
|
||||
hip_op_end:
|
||||
payload-field-type:
|
||||
class: struct
|
||||
profiler:
|
||||
$include:
|
||||
# Base.
|
||||
- dst_base.yaml
|
||||
event-record-common-context-field-type:
|
||||
class: struct
|
||||
members:
|
||||
- _dispatch: uint64
|
||||
- _gpu_id: uint64
|
||||
- _queue_id: uint64
|
||||
- _queue_index: uint64
|
||||
- _process_id: uint32
|
||||
- _thread_id: uint32
|
||||
- _kernel_id: uint64
|
||||
- _kernel_name: str
|
||||
- _counter_names:
|
||||
field-type:
|
||||
class: dynamic-array
|
||||
element-field-type: str
|
||||
- _counter_values:
|
||||
field-type:
|
||||
class: dynamic-array
|
||||
element-field-type: uint64
|
||||
event-record-types:
|
||||
profiler_record:
|
||||
payload-field-type:
|
||||
class: struct
|
||||
profiler_record_with_kernel_properties:
|
||||
payload-field-type:
|
||||
class: struct
|
||||
members:
|
||||
- _grid_size: uint64
|
||||
- _workgroup_size: uint64
|
||||
- _lds_size: uint64
|
||||
- _scratch_size: uint64
|
||||
- _arch_vgpr_count: uint64
|
||||
- _accum_vgpr_count: uint64
|
||||
- _sgpr_count: uint64
|
||||
- _wave_size: uint64
|
||||
- _signal_handle: uint64
|
||||
@@ -0,0 +1,107 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include <cassert>
|
||||
#include <stdexcept>
|
||||
#include <iostream>
|
||||
#include <experimental/filesystem>
|
||||
|
||||
#include "rocprofiler.h"
|
||||
#include "rocprofiler_plugin.h"
|
||||
|
||||
#include "plugin.h"
|
||||
|
||||
namespace fs = std::experimental::filesystem;
|
||||
|
||||
namespace {
|
||||
|
||||
// Global plugin instance
|
||||
rocm_ctf::Plugin* the_plugin = nullptr;
|
||||
|
||||
} // namespace
|
||||
|
||||
ROCPROFILER_EXPORT int rocprofiler_plugin_initialize(const uint32_t rocprofiler_major_version,
|
||||
const uint32_t rocprofiler_minor_version) {
|
||||
if (rocprofiler_major_version != ROCPROFILER_VERSION_MAJOR ||
|
||||
rocprofiler_minor_version < ROCPROFILER_VERSION_MINOR) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (the_plugin) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
const auto output_dir = getenv("OUTPUT_PATH");
|
||||
|
||||
if (!output_dir) {
|
||||
std::cerr << "rocprofiler_plugin_initialize(): "
|
||||
<< "`OUTPUT_PATH` environment variable isn't set" << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Create the plugin instance.
|
||||
try {
|
||||
the_plugin = new rocm_ctf::Plugin{256 * 1024, fs::path{output_dir} / "trace",
|
||||
CTF_PLUGIN_METADATA_FILE_PATH};
|
||||
} catch (const std::exception& exc) {
|
||||
std::cerr << "rocprofiler_plugin_initialize(): " << exc.what() << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
ROCPROFILER_EXPORT void rocprofiler_plugin_finalize() {
|
||||
delete the_plugin;
|
||||
the_plugin = nullptr;
|
||||
}
|
||||
|
||||
ROCPROFILER_EXPORT int rocprofiler_plugin_write_buffer_records(
|
||||
const rocprofiler_record_header_t* const begin, const rocprofiler_record_header_t* const end,
|
||||
const rocprofiler_session_id_t session_id, const rocprofiler_buffer_id_t buffer_id) {
|
||||
assert(the_plugin);
|
||||
|
||||
try {
|
||||
the_plugin->HandleBufferRecords(begin, end, session_id, buffer_id);
|
||||
} catch (const std::exception& exc) {
|
||||
std::cerr << "rocprofiler_plugin_write_buffer_records(): " << exc.what() << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
ROCPROFILER_EXPORT int rocprofiler_plugin_write_record(const rocprofiler_record_tracer_t record,
|
||||
const rocprofiler_session_id_t session_id) {
|
||||
assert(the_plugin);
|
||||
|
||||
if (record.header.id.handle == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
try {
|
||||
the_plugin->HandleTracerRecord(record, session_id);
|
||||
} catch (const std::exception& exc) {
|
||||
std::cerr << "rocprofiler_plugin_write_record(): " << exc.what() << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,28 @@
|
||||
################################################################################
|
||||
# Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to
|
||||
# deal in the Software without restriction, including without limitation the
|
||||
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
# sell copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
################################################################################
|
||||
|
||||
$default-clock-type-name: default
|
||||
$features:
|
||||
packet:
|
||||
beginning-timestamp-field-type: false
|
||||
discarded-event-records-counter-snapshot-field-type: false
|
||||
end-timestamp-field-type: false
|
||||
@@ -0,0 +1,645 @@
|
||||
################################################################################
|
||||
# Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to
|
||||
# deal in the Software without restriction, including without limitation the
|
||||
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
# sell copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
################################################################################
|
||||
|
||||
import os
|
||||
import os.path
|
||||
import sys
|
||||
import re
|
||||
import yaml
|
||||
import CppHeaderParser
|
||||
|
||||
|
||||
# Numeric field type (abstract).
|
||||
class _NumericFt:
|
||||
# Returns the C++ expression to cast the expression `expr` to the C
|
||||
# type of this field type.
|
||||
def cast(self, expr):
|
||||
return f'static_cast<{self.c_type}>({expr})'
|
||||
|
||||
|
||||
# Integer field type (abstract).
|
||||
class _IntFt(_NumericFt):
|
||||
def __init__(self, size, pref_disp_base='dec'):
|
||||
self._size = size
|
||||
self._pref_disp_base = pref_disp_base
|
||||
|
||||
# Size (bits).
|
||||
@property
|
||||
def size(self):
|
||||
return self._size
|
||||
|
||||
# Preferred display base (`dec` or `hex`).
|
||||
@property
|
||||
def pref_disp_base(self):
|
||||
return self._pref_disp_base
|
||||
|
||||
# Equivalent barectf field type in YAML.
|
||||
@property
|
||||
def barectf_yaml(self):
|
||||
return {
|
||||
'size': self._size,
|
||||
'preferred-display-base': self._pref_disp_base,
|
||||
}
|
||||
|
||||
|
||||
# Signed integer field type.
|
||||
class _SIntFt(_IntFt):
|
||||
# Equivalent barectf field type in YAML.
|
||||
@property
|
||||
def barectf_yaml(self):
|
||||
ret = super().barectf_yaml
|
||||
ret['class'] = 'sint'
|
||||
return ret
|
||||
|
||||
# Equivalent C type
|
||||
@property
|
||||
def c_type(self):
|
||||
return f'std::int{self._size}_t'
|
||||
|
||||
|
||||
# Unsigned integer field type.
|
||||
class _UIntFt(_IntFt):
|
||||
# Equivalent barectf field type in YAML.
|
||||
@property
|
||||
def barectf_yaml(self):
|
||||
ret = super().barectf_yaml
|
||||
ret['class'] = 'uint'
|
||||
return ret
|
||||
|
||||
# Equivalent C type.
|
||||
@property
|
||||
def c_type(self):
|
||||
return f'std::uint{self._size}_t'
|
||||
|
||||
|
||||
# Pointer field type.
|
||||
class _PointerFt(_UIntFt):
|
||||
def __init__(self):
|
||||
super().__init__(64, 'hex')
|
||||
|
||||
# Returns the C++ expression to cast the expression `expr` to the C
|
||||
# type of this field type.
|
||||
def cast(self, expr):
|
||||
return f'static_cast<{self.c_type}>(reinterpret_cast<std::uintptr_t>({expr}))'
|
||||
|
||||
|
||||
# Enumeration field type (abstract).
|
||||
class _EnumFt(_IntFt):
|
||||
def __init__(self, size, mappings):
|
||||
super().__init__(size)
|
||||
self._mappings = mappings.copy()
|
||||
|
||||
# Mappings (names to integers).
|
||||
@property
|
||||
def mappings(self):
|
||||
return self._mappings
|
||||
|
||||
# Equivalent barectf field type in YAML.
|
||||
@property
|
||||
def barectf_yaml(self):
|
||||
ret = super().barectf_yaml
|
||||
mappings = {}
|
||||
|
||||
for name, val in self._mappings.items():
|
||||
mappings[name] = [val]
|
||||
|
||||
ret['mappings'] = mappings
|
||||
return ret
|
||||
|
||||
|
||||
# Unsigned enumeration field type.
|
||||
class _UEnumFt(_EnumFt, _UIntFt):
|
||||
# Equivalent barectf field type in YAML.
|
||||
@property
|
||||
def barectf_yaml(self):
|
||||
ret = super().barectf_yaml
|
||||
ret['class'] = 'uenum'
|
||||
return ret
|
||||
|
||||
|
||||
# Signed enumeration field type.
|
||||
class _SEnumFt(_EnumFt, _UIntFt):
|
||||
# Equivalent barectf field type in YAML.
|
||||
@property
|
||||
def barectf_yaml(self):
|
||||
ret = super().barectf_yaml
|
||||
ret['class'] = 'senum'
|
||||
return ret
|
||||
|
||||
|
||||
# Optional string field type.
|
||||
class _OptStrFt:
|
||||
# Equivalent barectf field type in YAML.
|
||||
@property
|
||||
def barectf_yaml(self):
|
||||
return {
|
||||
'class': 'str',
|
||||
}
|
||||
|
||||
|
||||
# String field type.
|
||||
class _StrFt(_OptStrFt):
|
||||
pass
|
||||
|
||||
|
||||
# Floating-point number field type.
|
||||
class _FloatFt(_NumericFt):
|
||||
def __init__(self, size):
|
||||
self._size = size
|
||||
|
||||
# Size (bits): 32 or 64.
|
||||
@property
|
||||
def size(self):
|
||||
return self._size
|
||||
|
||||
# Equivalent barectf field type in YAML.
|
||||
@property
|
||||
def barectf_yaml(self):
|
||||
return {
|
||||
'class': 'real',
|
||||
'size': self._size,
|
||||
}
|
||||
|
||||
# Equivalent C type.
|
||||
@property
|
||||
def c_type(self):
|
||||
if self._size == 32:
|
||||
return 'float'
|
||||
else:
|
||||
assert self._size == 64
|
||||
return 'double'
|
||||
|
||||
|
||||
# Event record type.
|
||||
class _Ert:
|
||||
def __init__(self, api_func_name, members):
|
||||
self._api_func_name = api_func_name
|
||||
self._members = members
|
||||
|
||||
# API function name
|
||||
@property
|
||||
def api_func_name(self):
|
||||
return self._api_func_name
|
||||
|
||||
# Parameters of function (list of `_ErtMember`).
|
||||
@property
|
||||
def members(self):
|
||||
return self._members
|
||||
|
||||
|
||||
# Beginning event record type.
|
||||
class _BeginErt(_Ert):
|
||||
# Name of event record type depending on the API prefix.
|
||||
def name(self, api_prefix):
|
||||
suffix = '_begin' if api_prefix == 'hsa' else 'Begin'
|
||||
return f'{self._api_func_name}{suffix}'
|
||||
|
||||
|
||||
# End event record type.
|
||||
class _EndErt(_Ert):
|
||||
# Name of event record type depending on the API prefix.
|
||||
def name(self, api_prefix):
|
||||
suffix = '_end' if api_prefix == 'hsa' else 'End'
|
||||
return f'{self._api_func_name}{suffix}'
|
||||
|
||||
|
||||
# Event record type member.
|
||||
class _ErtMember:
|
||||
def __init__(self, access, member_names, ft):
|
||||
self._access = access
|
||||
self._member_names = member_names.copy()
|
||||
self._ft = ft
|
||||
|
||||
# C++ access expression.
|
||||
@property
|
||||
def access(self):
|
||||
return self._access
|
||||
|
||||
# List of member names.
|
||||
@property
|
||||
def member_names(self):
|
||||
return self._member_names
|
||||
|
||||
# Equivalent field type.
|
||||
@property
|
||||
def ft(self):
|
||||
return self._ft
|
||||
|
||||
|
||||
# Makes sure some condition is satisfied, or prints the error message
|
||||
# `error_msg` and quits with exit status 1 otherwise.
|
||||
#
|
||||
# This is an unconditional assertion.
|
||||
def _make_sure(cond, error_msg):
|
||||
if not cond:
|
||||
print(f'Error: {error_msg}', file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def _enumerator_effective_val(enum_val):
|
||||
# Try the value, but this value may be a string (an
|
||||
# enumerator/definition).
|
||||
val = enum_val.get('value')
|
||||
|
||||
if type(val) is int:
|
||||
return val
|
||||
|
||||
# Try the raw value.
|
||||
val = enum_val.get('raw_value')
|
||||
|
||||
if val is not None:
|
||||
if type(val) is int:
|
||||
# Raw value is already an integer.
|
||||
return val
|
||||
else:
|
||||
# Try to parse the raw value string as an integer.
|
||||
try:
|
||||
return int(val, 0)
|
||||
except:
|
||||
pass
|
||||
|
||||
_make_sure(False,
|
||||
f'Cannot get the integral value of enumerator `{enum_val["name"]}`')
|
||||
|
||||
|
||||
# Returns the equivalent field type of the C type `c_type`.
|
||||
def _number_ft_from_c_type(cpp_header, c_type):
|
||||
# Check for known enumeration.
|
||||
m = re.match(r'(?:enum\s+)?(\w+)', c_type)
|
||||
|
||||
if m:
|
||||
size = 32
|
||||
|
||||
for enum_info in cpp_header.enums:
|
||||
if m.group(1) == enum_info.get('name'):
|
||||
# Fill enumeration field type mappings.
|
||||
mappings = {
|
||||
str(v['name']): _enumerator_effective_val(v)
|
||||
for v in enum_info['values']
|
||||
}
|
||||
|
||||
if len(mappings) == 0:
|
||||
return _SIntFt(64)
|
||||
|
||||
if max(mappings.values()) >= 2**31 or min(mappings.values()) < -2**31:
|
||||
size = 64
|
||||
|
||||
_make_sure(len(mappings) > 0, f'Enumeration `{enum_info["name"]}` is empty')
|
||||
|
||||
# Create corresponding enumeration field type.
|
||||
return _SEnumFt(size, mappings)
|
||||
|
||||
# Find corresponding basic field type.
|
||||
is_unsigned = 'unsigned' in c_type
|
||||
|
||||
if 'long' in c_type:
|
||||
if is_unsigned:
|
||||
return _UIntFt(64)
|
||||
else:
|
||||
return _SIntFt(64)
|
||||
elif 'short' in c_type:
|
||||
if is_unsigned:
|
||||
return _UIntFt(16)
|
||||
else:
|
||||
return _SIntFt(16)
|
||||
elif 'char' in c_type:
|
||||
if is_unsigned:
|
||||
return _UIntFt(8)
|
||||
else:
|
||||
return _SIntFt(8)
|
||||
elif 'float' in c_type:
|
||||
return _FloatFt(32)
|
||||
elif 'double' in c_type:
|
||||
return _FloatFt(64)
|
||||
else:
|
||||
# Assume `int` (often an unresolved C enumeration).
|
||||
if is_unsigned:
|
||||
return _UIntFt(32)
|
||||
else:
|
||||
return _SIntFt(32)
|
||||
|
||||
|
||||
# Returns whether or not a property has a pointer type.
|
||||
def _prop_is_pointer(prop, c_type):
|
||||
if prop['pointer'] or prop['function_pointer']:
|
||||
return True
|
||||
|
||||
if prop['array'] and 'array_size' in prop:
|
||||
return True
|
||||
|
||||
if prop['unresolved']:
|
||||
# HSA API function pointers.
|
||||
if prop['name'] in ('callback', 'handler'):
|
||||
return True
|
||||
|
||||
# HIP API function pointers.
|
||||
if c_type.endswith('Fn_t'):
|
||||
return True
|
||||
|
||||
# Check the C type itself.
|
||||
if '*' in c_type or '*' in prop.get('raw_type', ''):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
# Returns a list of event record type member objects for the structure
|
||||
# `struct` considering the initial C++ access expression `access` and
|
||||
# member names `member_names`.
|
||||
def _get_ert_members_for_struct(cpp_header, struct, access, member_names):
|
||||
members = []
|
||||
member_names = member_names.copy()
|
||||
member_names.append(None)
|
||||
props = struct['properties']['public']
|
||||
|
||||
for index, prop in enumerate(props):
|
||||
# Property name.
|
||||
name = prop['name']
|
||||
|
||||
# Member names, access, and C type.
|
||||
member_names[-1] = str(name)
|
||||
this_access = f'{access}.{name}'
|
||||
c_type = prop['type']
|
||||
aliases = prop['aliases']
|
||||
|
||||
# Skip no type.
|
||||
if c_type == '':
|
||||
continue
|
||||
|
||||
# Skip unnamed or union.
|
||||
if name == '' or 'union' in name or re.match(r'\bunion\b', c_type):
|
||||
continue
|
||||
|
||||
# Check for known C type alias.
|
||||
while True:
|
||||
c_type_alias = cpp_header.typedefs.get(c_type)
|
||||
|
||||
if c_type_alias is None:
|
||||
break
|
||||
|
||||
c_type = c_type_alias
|
||||
|
||||
# Check for C string.
|
||||
if re.match(r'^((const\s+char)|(char\s+const)|char)\s*\*$',
|
||||
c_type.strip()):
|
||||
members.append(_ErtMember(this_access, member_names, _OptStrFt()))
|
||||
continue
|
||||
|
||||
# Check for pointer.
|
||||
if _prop_is_pointer(prop, c_type):
|
||||
# Pointer: use numeric value.
|
||||
members.append(_ErtMember(this_access, member_names, _PointerFt()))
|
||||
continue
|
||||
|
||||
# Check for substructure.
|
||||
sub_struct = cpp_header.classes.get(c_type)
|
||||
|
||||
if sub_struct is None and len(aliases) == 1:
|
||||
sub_struct = cpp_header.classes.get(aliases[0])
|
||||
|
||||
if sub_struct is not None:
|
||||
members += _get_ert_members_for_struct(cpp_header, sub_struct,
|
||||
this_access, member_names)
|
||||
continue
|
||||
|
||||
# Use a basic field type.
|
||||
members.append(_ErtMember(this_access, member_names,
|
||||
_number_ft_from_c_type(cpp_header, c_type)))
|
||||
|
||||
return members
|
||||
|
||||
|
||||
# Returns the beginning and end event record type objects for the
|
||||
# callback data structure `struct`.
|
||||
def _erts_from_cb_data_struct(api_prefix, cpp_header, retval_info, struct):
|
||||
# The location of the `args` union within the nested structures of
|
||||
# `struct`.
|
||||
args_nested_cls_index = 0
|
||||
|
||||
# Create return value members (to be used later).
|
||||
if retval_info is not None:
|
||||
args_nested_cls_index = 1
|
||||
retval_members = {}
|
||||
nested_classes = struct['nested_classes']
|
||||
_make_sure(len(nested_classes) >= 1,
|
||||
f"Return value union doesn't exist in `{struct['name']}`")
|
||||
retval_union = nested_classes[0]
|
||||
|
||||
for prop in retval_union['properties']['public']:
|
||||
name = str(prop['name'])
|
||||
member = _ErtMember(f'GetApiData().{name}', ['retval'],
|
||||
_number_ft_from_c_type(cpp_header, prop['type']))
|
||||
retval_members[prop['name']] = member
|
||||
|
||||
# Make sure we have everything we need.
|
||||
for api_func_name, retval_name in retval_info.items():
|
||||
if retval_name is not None:
|
||||
_make_sure(retval_name in retval_members,
|
||||
f"Return value union member `{retval_name}` doesn't exist (function {api_func_name}())")
|
||||
|
||||
# Create beginning/end event record type objects.
|
||||
begin_erts = []
|
||||
end_erts = []
|
||||
nested_classes = struct['nested_classes'][args_nested_cls_index]['nested_classes']
|
||||
props = struct['nested_classes'][args_nested_cls_index]['properties']['public']
|
||||
_make_sure(len(nested_classes) == len(props),
|
||||
f'Mismatch between nested structure and member count in `{struct["name"]}`')
|
||||
|
||||
for index, prop in enumerate(props):
|
||||
# API function name is the name of the member.
|
||||
api_func_name = str(prop['name'])
|
||||
|
||||
# Get the parameters.
|
||||
members = _get_ert_members_for_struct(cpp_header,
|
||||
nested_classes[index],
|
||||
f'GetApiData().args.{api_func_name}',
|
||||
[])
|
||||
|
||||
# Append new beginning event record type object.
|
||||
begin_erts.append(_BeginErt(api_func_name, members))
|
||||
|
||||
# Append new end event record type object if possible.
|
||||
ret_members = []
|
||||
|
||||
if retval_info is not None:
|
||||
retval_type = retval_info.get(api_func_name)
|
||||
|
||||
if retval_type is not None:
|
||||
ret_members.append(retval_members[retval_type])
|
||||
|
||||
end_erts.append(_EndErt(api_func_name, ret_members))
|
||||
|
||||
return begin_erts, end_erts
|
||||
|
||||
|
||||
# Creates and returns the return value information dictionary.
|
||||
#
|
||||
# This dictionary maps API function names to the member to get within
|
||||
# the callback data structure.
|
||||
#
|
||||
# This only applies to the HSA API: for other APIs, this function
|
||||
# returns `None`.
|
||||
def _get_retval_info(path):
|
||||
if 'hsa' not in os.path.basename(path):
|
||||
return
|
||||
|
||||
retval_info = {}
|
||||
cur_api_func_name = None
|
||||
|
||||
with open(path) as f:
|
||||
for line in f:
|
||||
if 'out << ")' in line and cur_api_func_name is not None:
|
||||
m = re.search(r'api_data.(\w+_retval)', line)
|
||||
retval_info[cur_api_func_name] = m.group(1) if m else None
|
||||
else:
|
||||
m = re.search(r'out << "(hsa_\w+)\(";', line)
|
||||
|
||||
if m:
|
||||
cur_api_func_name = m.group(1)
|
||||
|
||||
return retval_info
|
||||
|
||||
|
||||
# Returns a partial barectf data stream type in YAML with the event
|
||||
# record types `erts`.
|
||||
def _yaml_dst_from_erts(api_prefix, erts):
|
||||
# Base.
|
||||
yaml_erts = {}
|
||||
yaml_dst = {
|
||||
'event-record-types': yaml_erts,
|
||||
}
|
||||
|
||||
# Create one event record type per API function.
|
||||
for ert in erts:
|
||||
# Base.
|
||||
yaml_members = []
|
||||
yaml_ert = {
|
||||
'payload-field-type': {
|
||||
'class': 'struct',
|
||||
'members': yaml_members,
|
||||
},
|
||||
}
|
||||
|
||||
# Create one structure field type member per member.
|
||||
for member in ert.members:
|
||||
# barectf doesn't support nested CTF structures, so join
|
||||
# individual member names with `__` to flatten.
|
||||
yaml_members.append({
|
||||
'_' + '__'.join(member.member_names): {
|
||||
'field-type': member.ft.barectf_yaml,
|
||||
},
|
||||
})
|
||||
|
||||
# Add event record type.
|
||||
yaml_erts[ert.name(api_prefix)] = yaml_ert
|
||||
|
||||
# Convert to YAML.
|
||||
return yaml.dump(yaml_dst)
|
||||
|
||||
|
||||
# Returns the C++ switch statement which calls the correct barectf
|
||||
# tracing function depending on the API function operation ID.
|
||||
def _cpp_switch_statement_from_erts(api_prefix, erts):
|
||||
lines = []
|
||||
lines.append('switch (GetOp()) {')
|
||||
|
||||
for ert in erts:
|
||||
lines.append(f' case {api_prefix.upper()}_API_ID_{ert.api_func_name}:')
|
||||
lines.append(f' barectf_{api_prefix}_api_trace_{ert.name(api_prefix)}(')
|
||||
lines.append(f' &barectf_ctx,')
|
||||
lines.append(f' GetThreadId(),')
|
||||
lines.append(f' GetQueueId(),')
|
||||
lines.append(f' GetAgentId(),')
|
||||
lines.append(f' GetCorrelationId(),')
|
||||
|
||||
if api_prefix == 'hip':
|
||||
lines.append(f' GetKernelName().c_str(),')
|
||||
|
||||
if len(ert.members) == 0:
|
||||
# Remove last comma.
|
||||
lines[-1] = lines[-1].replace(',', '')
|
||||
|
||||
for index, member in enumerate(ert.members):
|
||||
if type(member.ft) is _OptStrFt:
|
||||
# Only dereference C string if not null, otherwise use
|
||||
# an empty string.
|
||||
lines.append(f' {member.access} ? {member.access} : ""')
|
||||
elif type(member.ft) is _StrFt:
|
||||
lines.append(f' {member.access}')
|
||||
else:
|
||||
lines.append(f' {member.ft.cast(member.access)}')
|
||||
|
||||
if index + 1 < len(ert.members):
|
||||
lines[-1] += ','
|
||||
|
||||
lines.append(' );')
|
||||
lines.append(' break;')
|
||||
|
||||
lines.append('}')
|
||||
return lines
|
||||
|
||||
|
||||
# Processes the complete API header file `path`.
|
||||
def _process_file(api_prefix, path):
|
||||
# Create `CppHeader` object.
|
||||
try:
|
||||
cpp_header = CppHeaderParser.CppHeader(path)
|
||||
except CppHeaderParser.CppParseError as exc:
|
||||
print(exc, file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
# Get return value information dictionary.
|
||||
retval_info = _get_retval_info(path)
|
||||
|
||||
# Find callback data structure.
|
||||
for struct_name, struct in cpp_header.classes.items():
|
||||
if re.match(r'^' + api_prefix + r'_api_data\w+$', struct_name):
|
||||
# Process callback data structure.
|
||||
begin_erts, end_erts = _erts_from_cb_data_struct(api_prefix,
|
||||
cpp_header,
|
||||
retval_info,
|
||||
struct)
|
||||
|
||||
# Write barectf YAML file.
|
||||
with open(f'{api_prefix}_erts.yaml', 'w') as f:
|
||||
f.write(_yaml_dst_from_erts(api_prefix, begin_erts + end_erts))
|
||||
|
||||
# Write C++ code (beginning event record).
|
||||
with open(f'{api_prefix}_begin.cpp.i', 'w') as f:
|
||||
f.write('\n'.join(_cpp_switch_statement_from_erts(api_prefix,
|
||||
begin_erts)))
|
||||
|
||||
# Write C++ code (end event record).
|
||||
with open(f'{api_prefix}_end.cpp.i', 'w') as f:
|
||||
f.write('\n'.join(_cpp_switch_statement_from_erts(api_prefix,
|
||||
end_erts)))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# Disable `CppHeaderParser` printing to standard output.
|
||||
CppHeaderParser.CppHeaderParser.print_warnings = 0
|
||||
CppHeaderParser.CppHeaderParser.print_errors = 0
|
||||
CppHeaderParser.CppHeaderParser.debug = 0
|
||||
CppHeaderParser.CppHeaderParser.debug_trace = 0
|
||||
|
||||
# Process the complete API header file.
|
||||
_process_file(sys.argv[1], sys.argv[2])
|
||||
@@ -0,0 +1,33 @@
|
||||
################################################################################
|
||||
# Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to
|
||||
# deal in the Software without restriction, including without limitation the
|
||||
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
# sell copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
################################################################################
|
||||
|
||||
import sys
|
||||
import yaml
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
with open('env.yaml', 'w') as f:
|
||||
f.write(yaml.dump({
|
||||
'environment': {
|
||||
'rocprofiler_version': sys.argv[1],
|
||||
}
|
||||
}))
|
||||
@@ -0,0 +1,869 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdlib>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
#include <iostream>
|
||||
#include <utility>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <limits>
|
||||
#include <fstream>
|
||||
#include <experimental/filesystem>
|
||||
#include <time.h>
|
||||
|
||||
#include <hsa/hsa.h>
|
||||
#include <hsa/hsa_ext_amd.h>
|
||||
#include "hsa_prof_str.h"
|
||||
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <hip/amd_detail/hip_prof_str.h>
|
||||
|
||||
#include "rocprofiler.h"
|
||||
#include "rocprofiler_plugin.h"
|
||||
#include "../utils.h"
|
||||
|
||||
#include "barectf.h"
|
||||
#include "barectf_event_record.h"
|
||||
#include "barectf_tracer.h"
|
||||
#include "plugin.h"
|
||||
|
||||
namespace fs = std::experimental::filesystem;
|
||||
|
||||
namespace rocm_ctf {
|
||||
namespace {
|
||||
|
||||
// Abstract tracer event record using the barectf context type `CtxT`.
|
||||
template <typename CtxT> class TracerEventRecord : public BarectfEventRecord<CtxT> {
|
||||
protected:
|
||||
explicit TracerEventRecord(const rocprofiler_record_tracer_t& record, const std::uint64_t clock_val)
|
||||
: BarectfEventRecord<CtxT>{clock_val},
|
||||
op_{record.operation_id.id},
|
||||
thread_id_{record.thread_id.value},
|
||||
queue_id_{record.queue_id.handle},
|
||||
agent_id_{record.agent_id.handle},
|
||||
correlation_id_{record.correlation_id.value} {}
|
||||
|
||||
std::uint32_t GetOp() const noexcept { return op_; }
|
||||
std::uint32_t GetThreadId() const noexcept { return thread_id_; }
|
||||
std::uint64_t GetQueueId() const noexcept { return queue_id_; }
|
||||
std::uint64_t GetAgentId() const noexcept { return agent_id_; }
|
||||
std::uint64_t GetCorrelationId() const noexcept { return correlation_id_; }
|
||||
|
||||
private:
|
||||
std::uint32_t op_;
|
||||
std::uint32_t thread_id_;
|
||||
std::uint64_t queue_id_;
|
||||
std::uint64_t agent_id_;
|
||||
std::uint64_t correlation_id_;
|
||||
};
|
||||
|
||||
// Returns the beginning clock value of the tracer or profiler record
|
||||
// `record`.
|
||||
template <typename RecordT> std::uint64_t GetRecordBeginClockVal(const RecordT& record) {
|
||||
return record.timestamps.begin.value;
|
||||
}
|
||||
|
||||
// Returns the end clock value of the tracer or profiler record
|
||||
// `record`.
|
||||
template <typename RecordT> std::uint64_t GetRecordEndClockVal(const RecordT& record) {
|
||||
return record.timestamps.end.value;
|
||||
}
|
||||
|
||||
// Queries allocated string data using the size query function
|
||||
// `query_size_func` and the data query function `query_data_func`,
|
||||
// returning the corresponding string and freeing temporary allocated
|
||||
// memory.
|
||||
//
|
||||
// Returns an empty string if anything goes wrong.
|
||||
template <typename QuerySizeFuncT, typename QueryDataFuncT>
|
||||
std::string QueryAllocStr(QuerySizeFuncT&& query_size_func, QueryDataFuncT&& query_data_func) {
|
||||
// Query size first.
|
||||
std::size_t size = 0;
|
||||
[[maybe_unused]] auto ret = query_size_func(&size);
|
||||
|
||||
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query size");
|
||||
|
||||
if (size == 0) {
|
||||
// No size: return empty string.
|
||||
return {};
|
||||
}
|
||||
|
||||
// Query data (allocated by query_data_func()).
|
||||
char* alloc_str = nullptr;
|
||||
|
||||
ret = query_data_func(&alloc_str);
|
||||
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query data");
|
||||
|
||||
if (!alloc_str) {
|
||||
// No data: return empty string.
|
||||
return {};
|
||||
}
|
||||
|
||||
// Allocate return value.
|
||||
std::string str_ret{alloc_str};
|
||||
|
||||
// Free allocated data.
|
||||
std::free(alloc_str);
|
||||
|
||||
// Return string object.
|
||||
return str_ret;
|
||||
}
|
||||
|
||||
// rocTX event record.
|
||||
class RocTxEventRecord final : public TracerEventRecord<barectf_roctx_ctx> {
|
||||
public:
|
||||
explicit RocTxEventRecord(const rocprofiler_record_tracer_t& record,
|
||||
const rocprofiler_session_id_t session_id)
|
||||
: TracerEventRecord<barectf_roctx_ctx>{record, GetRecordBeginClockVal(record)},
|
||||
id_{QueryId(record, session_id)},
|
||||
msg_{QueryMsg(record, session_id)} {}
|
||||
|
||||
void Write(barectf_roctx_ctx& barectf_ctx) const override {
|
||||
barectf_roctx_trace_roctx(&barectf_ctx, GetThreadId(), id_, msg_.c_str());
|
||||
}
|
||||
|
||||
private:
|
||||
// Queries and returns the rocTX message of the record `record` and
|
||||
// session ID `session_id`.
|
||||
//
|
||||
// Returns an empty string if not available.
|
||||
static std::string QueryMsg(const rocprofiler_record_tracer_t& record,
|
||||
const rocprofiler_session_id_t session_id) {
|
||||
// Query size first.
|
||||
std::size_t msg_size = 0;
|
||||
[[maybe_unused]] auto ret = rocprofiler_query_roctx_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_ROCTX_MESSAGE, record.api_data_handle, record.operation_id,
|
||||
&msg_size);
|
||||
|
||||
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query rocTX message size");
|
||||
|
||||
if (msg_size == 0) {
|
||||
// No size: return empty string.
|
||||
return {};
|
||||
}
|
||||
|
||||
// Query data (borrowed from the record: no need to free).
|
||||
char* msg = nullptr;
|
||||
|
||||
ret = rocprofiler_query_roctx_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_ROCTX_MESSAGE, record.api_data_handle, record.operation_id, &msg);
|
||||
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query rocTX message");
|
||||
|
||||
if (!msg) {
|
||||
// No data: return empty string.
|
||||
return {};
|
||||
}
|
||||
|
||||
return rocmtools::cxx_demangle(msg);
|
||||
}
|
||||
|
||||
// Queries and returns the rocTX ID of the record `record` and the
|
||||
// session ID `session_id`.
|
||||
//
|
||||
// Returns 0 if anything goes wrong.
|
||||
static std::uint64_t QueryId(const rocprofiler_record_tracer_t& record,
|
||||
const rocprofiler_session_id_t session_id) {
|
||||
try {
|
||||
return std::stoull(QueryAllocStr(
|
||||
[&record, session_id](const auto size) {
|
||||
return rocprofiler_query_roctx_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_ROCTX_ID, record.api_data_handle, record.operation_id, size);
|
||||
},
|
||||
[&record, session_id](const auto str) {
|
||||
return rocprofiler_query_roctx_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_ROCTX_ID, record.api_data_handle, record.operation_id, str);
|
||||
}));
|
||||
} catch (...) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
std::uint64_t id_;
|
||||
std::string msg_;
|
||||
};
|
||||
|
||||
// Abstract HSA API event record.
|
||||
class HsaApiEventRecord : public TracerEventRecord<barectf_hsa_api_ctx> {
|
||||
protected:
|
||||
explicit HsaApiEventRecord(const rocprofiler_record_tracer_t& record,
|
||||
const rocprofiler_session_id_t session_id, const std::uint64_t clock_val)
|
||||
: TracerEventRecord<barectf_hsa_api_ctx>{record, clock_val},
|
||||
api_data_{QueryApiData(record, session_id)} {}
|
||||
|
||||
const hsa_api_data_t& GetApiData() const noexcept { return api_data_; }
|
||||
|
||||
private:
|
||||
// Queries and returns the API data of the record `record` and session
|
||||
// ID `session_id`.
|
||||
static const hsa_api_data_t& QueryApiData(const rocprofiler_record_tracer_t& record,
|
||||
const rocprofiler_session_id_t session_id) {
|
||||
// Query size first (only for assertions).
|
||||
[[maybe_unused]] std::size_t size = 0;
|
||||
[[maybe_unused]] auto ret = rocprofiler_query_hsa_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_HSA_API_DATA, record.api_data_handle, record.operation_id, &size);
|
||||
|
||||
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query HSA API data size");
|
||||
assert(size > 0);
|
||||
|
||||
// Query data (borrowed from the record).
|
||||
char* data = nullptr;
|
||||
ret = rocprofiler_query_hsa_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_HSA_API_DATA, record.api_data_handle, record.operation_id, &data);
|
||||
|
||||
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query HSA API data");
|
||||
assert(data);
|
||||
|
||||
// Reinterpret as an HSA API data pointer.
|
||||
return *reinterpret_cast<const hsa_api_data_t*>(data);
|
||||
}
|
||||
|
||||
hsa_api_data_t api_data_;
|
||||
};
|
||||
|
||||
// HSA API event record (beginning).
|
||||
class HsaApiEventRecordBegin final : public HsaApiEventRecord {
|
||||
public:
|
||||
explicit HsaApiEventRecordBegin(const rocprofiler_record_tracer_t& record,
|
||||
const rocprofiler_session_id_t session_id)
|
||||
: HsaApiEventRecord{record, session_id, GetRecordBeginClockVal(record)} {}
|
||||
|
||||
void Write(barectf_hsa_api_ctx& barectf_ctx) const override {
|
||||
// Include generated switch statement.
|
||||
#include "hsa_begin.cpp.i"
|
||||
}
|
||||
};
|
||||
|
||||
// HSA API event record (end).
|
||||
class HsaApiEventRecordEnd final : public HsaApiEventRecord {
|
||||
public:
|
||||
explicit HsaApiEventRecordEnd(const rocprofiler_record_tracer_t& record,
|
||||
const rocprofiler_session_id_t session_id)
|
||||
: HsaApiEventRecord{record, session_id, GetRecordEndClockVal(record)} {}
|
||||
|
||||
void Write(barectf_hsa_api_ctx& barectf_ctx) const override {
|
||||
// Include generated switch statement.
|
||||
#include "hsa_end.cpp.i"
|
||||
}
|
||||
};
|
||||
|
||||
// Abstract HIP API event record.
|
||||
class HipApiEventRecord : public TracerEventRecord<barectf_hip_api_ctx> {
|
||||
protected:
|
||||
explicit HipApiEventRecord(const rocprofiler_record_tracer_t& record,
|
||||
const rocprofiler_session_id_t session_id, const std::uint64_t clock_val)
|
||||
: TracerEventRecord<barectf_hip_api_ctx>{record, clock_val},
|
||||
api_data_{QueryApiData(record, session_id)},
|
||||
kernel_name_{QueryKernelName(record, session_id)} {}
|
||||
|
||||
const hip_api_data_t& GetApiData() const noexcept { return api_data_; }
|
||||
const std::string& GetKernelName() const noexcept { return kernel_name_; }
|
||||
|
||||
private:
|
||||
// Queries and returns the API data of the record `record` and session
|
||||
// ID `session_id`.
|
||||
static const hip_api_data_t& QueryApiData(const rocprofiler_record_tracer_t& record,
|
||||
const rocprofiler_session_id_t session_id) {
|
||||
// Query size first (only for assertions).
|
||||
[[maybe_unused]] std::size_t size = 0;
|
||||
[[maybe_unused]] auto ret = rocprofiler_query_hip_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_HIP_API_DATA, record.api_data_handle, record.operation_id, &size);
|
||||
|
||||
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query HIP API data size");
|
||||
assert(size > 0);
|
||||
|
||||
// Query data (borrowed from the record).
|
||||
char* data = nullptr;
|
||||
|
||||
ret = rocprofiler_query_hip_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_HIP_API_DATA, record.api_data_handle, record.operation_id, &data);
|
||||
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query HIP API data");
|
||||
assert(data);
|
||||
|
||||
// Reinterpret as an HIP API data pointer.
|
||||
return *reinterpret_cast<const hip_api_data_t*>(data);
|
||||
}
|
||||
|
||||
// Queries and returns the kernel name of the record `record` and
|
||||
// session ID `session_id`.
|
||||
//
|
||||
// Returns an empty string if not available.
|
||||
static std::string QueryKernelName(const rocprofiler_record_tracer_t& record,
|
||||
const rocprofiler_session_id_t session_id) {
|
||||
const auto kernel_name = QueryAllocStr(
|
||||
[&record, session_id](const auto size) {
|
||||
return rocprofiler_query_hip_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_HIP_KERNEL_NAME, record.api_data_handle, record.operation_id,
|
||||
size);
|
||||
},
|
||||
[&record, session_id](const auto str) {
|
||||
return rocprofiler_query_hip_tracer_api_data_info(session_id, ROCPROFILER_HIP_KERNEL_NAME,
|
||||
record.api_data_handle,
|
||||
record.operation_id, str);
|
||||
});
|
||||
|
||||
if (kernel_name.size() > 1) {
|
||||
// Return demangled version.
|
||||
return rocmtools::cxx_demangle(kernel_name);
|
||||
}
|
||||
|
||||
return kernel_name;
|
||||
}
|
||||
|
||||
hip_api_data_t api_data_;
|
||||
std::string kernel_name_;
|
||||
};
|
||||
|
||||
// HIP API event record (beginning).
|
||||
class HipApiEventRecordBegin final : public HipApiEventRecord {
|
||||
public:
|
||||
explicit HipApiEventRecordBegin(const rocprofiler_record_tracer_t& record,
|
||||
const rocprofiler_session_id_t session_id)
|
||||
: HipApiEventRecord{record, session_id, GetRecordBeginClockVal(record)} {}
|
||||
|
||||
void Write(barectf_hip_api_ctx& barectf_ctx) const override {
|
||||
// Include generated switch statement.
|
||||
#include "hip_begin.cpp.i"
|
||||
}
|
||||
};
|
||||
|
||||
// HIP API event record (end).
|
||||
class HipApiEventRecordEnd final : public HipApiEventRecord {
|
||||
public:
|
||||
explicit HipApiEventRecordEnd(const rocprofiler_record_tracer_t& record,
|
||||
const rocprofiler_session_id_t session_id)
|
||||
: HipApiEventRecord{record, session_id, GetRecordEndClockVal(record)} {}
|
||||
|
||||
void Write(barectf_hip_api_ctx& barectf_ctx) const override {
|
||||
// Include generated switch statement.
|
||||
#include "hip_end.cpp.i"
|
||||
}
|
||||
};
|
||||
|
||||
// HSA API handle type event record.
|
||||
class HsaHandleTypeEventRecord final : public BarectfEventRecord<barectf_hsa_handles_ctx> {
|
||||
public:
|
||||
enum class Type {
|
||||
CPU = 0,
|
||||
GPU = 1,
|
||||
};
|
||||
|
||||
explicit HsaHandleTypeEventRecord(const std::uint64_t handle, const Type type)
|
||||
: BarectfEventRecord<barectf_hsa_handles_ctx>{0}, handle_{handle}, type_{type} {}
|
||||
|
||||
void Write(barectf_hsa_handles_ctx& barectf_ctx) const override {
|
||||
barectf_hsa_handles_trace_hsa_handle_type(&barectf_ctx, handle_,
|
||||
static_cast<std::uint8_t>(type_));
|
||||
}
|
||||
|
||||
private:
|
||||
std::uint64_t handle_;
|
||||
Type type_;
|
||||
};
|
||||
|
||||
// Abstract API operation event record.
|
||||
class ApiOpEventRecord : public TracerEventRecord<barectf_api_ops_ctx> {
|
||||
protected:
|
||||
explicit ApiOpEventRecord(const rocprofiler_record_tracer_t& record, const std::uint64_t clock_val)
|
||||
: TracerEventRecord<barectf_api_ops_ctx>{record, clock_val} {}
|
||||
};
|
||||
|
||||
// HSA API operation event record (beginning).
|
||||
class HsaOpEventRecordBegin final : public ApiOpEventRecord {
|
||||
public:
|
||||
explicit HsaOpEventRecordBegin(const rocprofiler_record_tracer_t& record)
|
||||
: ApiOpEventRecord{record, GetRecordBeginClockVal(record)} {}
|
||||
|
||||
void Write(barectf_api_ops_ctx& barectf_ctx) const override {
|
||||
barectf_api_ops_trace_hsa_op_begin(&barectf_ctx, GetThreadId(), GetQueueId(), GetAgentId(),
|
||||
GetCorrelationId());
|
||||
}
|
||||
};
|
||||
|
||||
// HSA API operation event record (end).
|
||||
class HsaOpEventRecordEnd final : public ApiOpEventRecord {
|
||||
public:
|
||||
explicit HsaOpEventRecordEnd(const rocprofiler_record_tracer_t& record)
|
||||
: ApiOpEventRecord{record, GetRecordEndClockVal(record)} {}
|
||||
|
||||
void Write(barectf_api_ops_ctx& barectf_ctx) const override {
|
||||
barectf_api_ops_trace_hsa_op_end(&barectf_ctx, GetThreadId(), GetQueueId(), GetAgentId(),
|
||||
GetCorrelationId());
|
||||
}
|
||||
};
|
||||
|
||||
// HIP API operation event record (beginning).
|
||||
class HipOpEventRecordBegin final : public ApiOpEventRecord {
|
||||
public:
|
||||
explicit HipOpEventRecordBegin(const rocprofiler_record_tracer_t& record)
|
||||
: ApiOpEventRecord{record, GetRecordBeginClockVal(record)},
|
||||
kernel_name_{QueryKernelName(record)} {}
|
||||
|
||||
void Write(barectf_api_ops_ctx& barectf_ctx) const override {
|
||||
barectf_api_ops_trace_hip_op_begin(&barectf_ctx, GetThreadId(), GetQueueId(), GetAgentId(),
|
||||
GetCorrelationId(), kernel_name_.c_str());
|
||||
}
|
||||
|
||||
private:
|
||||
// Queries and returns the kernel name of the record `record`.
|
||||
//
|
||||
// Returns an empty string if not available.
|
||||
static std::string QueryKernelName(const rocprofiler_record_tracer_t& record) {
|
||||
if (record.operation_id.id == 0) {
|
||||
if (const auto api_handle = record.api_data_handle.handle) {
|
||||
const auto str = reinterpret_cast<const char*>(api_handle);
|
||||
|
||||
if (std::strlen(str) > 1) {
|
||||
// Return demangled version.
|
||||
return rocmtools::cxx_demangle(str);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
std::string kernel_name_;
|
||||
};
|
||||
|
||||
// HIP API operation event record (end).
|
||||
class HipOpEventRecordEnd final : public ApiOpEventRecord {
|
||||
public:
|
||||
explicit HipOpEventRecordEnd(const rocprofiler_record_tracer_t& record)
|
||||
: ApiOpEventRecord{record, GetRecordEndClockVal(record)} {}
|
||||
|
||||
void Write(barectf_api_ops_ctx& barectf_ctx) const override {
|
||||
barectf_api_ops_trace_hip_op_end(&barectf_ctx, GetThreadId(), GetQueueId(), GetAgentId(),
|
||||
GetCorrelationId());
|
||||
}
|
||||
};
|
||||
|
||||
// Profiler record base.
|
||||
class ProfilerEventRecord : public BarectfEventRecord<barectf_profiler_ctx> {
|
||||
public:
|
||||
explicit ProfilerEventRecord(const rocprofiler_record_profiler_t& record,
|
||||
const rocprofiler_session_id_t session_id)
|
||||
: BarectfEventRecord<barectf_profiler_ctx>{GetRecordBeginClockVal(record)},
|
||||
dispatch_{record.header.id.handle},
|
||||
gpu_id_{record.gpu_id.handle},
|
||||
queue_id_{record.queue_id.handle},
|
||||
queue_index_{record.queue_idx.value},
|
||||
process_id_{GetPid()},
|
||||
thread_id_{record.thread_id.value},
|
||||
kernel_id_{record.kernel_id.handle},
|
||||
kernel_name_{QueryKernelName(record)},
|
||||
counter_infos_{QueryCounterInfos(record, session_id)} {}
|
||||
|
||||
void Write(barectf_profiler_ctx& barectf_ctx) const override {
|
||||
barectf_profiler_trace_profiler_record(
|
||||
&barectf_ctx, dispatch_, gpu_id_, queue_id_, queue_index_, process_id_, thread_id_,
|
||||
kernel_id_, kernel_name_.c_str(), counter_infos_.names.size(), counter_infos_.names.data(),
|
||||
counter_infos_.values.size(), counter_infos_.values.data());
|
||||
}
|
||||
|
||||
protected:
|
||||
// Counter infos.
|
||||
//
|
||||
// `names[i]` names the counter value `values[i]`.
|
||||
struct CounterInfos final {
|
||||
// `names_storage` owns the strings while the elements of `names`
|
||||
// point to the internal C strings of `names_storage`.
|
||||
//
|
||||
// This is needed because barectf expects an array of contiguous
|
||||
// C string pointers.
|
||||
std::vector<std::string> names_storage;
|
||||
std::vector<const char*> names;
|
||||
|
||||
// Counter values.
|
||||
std::vector<std::uint64_t> values;
|
||||
};
|
||||
|
||||
std::uint64_t GetDispatch() const noexcept { return dispatch_; }
|
||||
std::uint64_t GetGpuId() const noexcept { return gpu_id_; }
|
||||
std::uint64_t GetQueueId() const noexcept { return queue_id_; }
|
||||
std::uint64_t GetQueueIndex() const noexcept { return queue_index_; }
|
||||
std::uint32_t GetProcessId() const noexcept { return process_id_; }
|
||||
std::uint32_t GetThreadId() const noexcept { return thread_id_; }
|
||||
std::uint64_t GetKernelId() const noexcept { return kernel_id_; }
|
||||
const std::string& GetKernelName() const noexcept { return kernel_name_; }
|
||||
const CounterInfos& GetCounterInfos() const noexcept { return counter_infos_; }
|
||||
|
||||
private:
|
||||
// Queries and returns the kernel name of the record `record`.
|
||||
//
|
||||
// Returns an empty string if not available.
|
||||
static std::string QueryKernelName(const rocprofiler_record_profiler_t& record) {
|
||||
const auto kernel_name = QueryAllocStr(
|
||||
[&record](const auto size) {
|
||||
return rocprofiler_query_kernel_info_size(ROCPROFILER_KERNEL_NAME, record.kernel_id, size);
|
||||
},
|
||||
[&record](const auto str) {
|
||||
return rocprofiler_query_kernel_info(ROCPROFILER_KERNEL_NAME, record.kernel_id,
|
||||
const_cast<const char**>(str));
|
||||
});
|
||||
|
||||
if (kernel_name.size() <= 1) {
|
||||
return {};
|
||||
}
|
||||
|
||||
// Return truncated and demangled version.
|
||||
return rocmtools::truncate_name(rocmtools::cxx_demangle(kernel_name));
|
||||
}
|
||||
|
||||
// Queries and returns the counter infos of the record `record` and
|
||||
// session ID `session_id`.
|
||||
static CounterInfos QueryCounterInfos(const rocprofiler_record_profiler_t& record,
|
||||
const rocprofiler_session_id_t session_id) {
|
||||
if (!record.counters) {
|
||||
// No counters.
|
||||
return {};
|
||||
}
|
||||
|
||||
CounterInfos infos;
|
||||
|
||||
for (std::size_t i = 0; i < record.counters_count.value; ++i) {
|
||||
auto& counter = record.counters[i];
|
||||
|
||||
if (counter.counter_handler.handle == 0) {
|
||||
// Not available: continue.
|
||||
continue;
|
||||
}
|
||||
|
||||
// Query counter name size first
|
||||
std::size_t counter_name_size = 0;
|
||||
[[maybe_unused]] auto ret = rocprofiler_query_counter_info_size(
|
||||
session_id, ROCPROFILER_COUNTER_NAME, counter.counter_handler, &counter_name_size);
|
||||
|
||||
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query counter name size");
|
||||
|
||||
if (counter_name_size == 0) {
|
||||
// No size: continue.
|
||||
continue;
|
||||
}
|
||||
|
||||
// Query counter name (borrowed from `record`: no need to free).
|
||||
const char* counter_name = nullptr;
|
||||
|
||||
ret = rocprofiler_query_counter_info(session_id, ROCPROFILER_COUNTER_NAME,
|
||||
counter.counter_handler, &counter_name);
|
||||
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query counter name");
|
||||
|
||||
if (!counter_name) {
|
||||
// Not available: continue.
|
||||
continue;
|
||||
}
|
||||
|
||||
// Push back infos.
|
||||
infos.names_storage.emplace_back(counter_name);
|
||||
infos.names.push_back(infos.names_storage.back().c_str());
|
||||
infos.values.push_back(counter.value.value);
|
||||
}
|
||||
|
||||
return infos;
|
||||
}
|
||||
|
||||
std::uint64_t dispatch_;
|
||||
std::uint64_t gpu_id_;
|
||||
std::uint64_t queue_id_;
|
||||
std::uint64_t queue_index_;
|
||||
std::uint32_t process_id_;
|
||||
std::uint32_t thread_id_;
|
||||
std::uint64_t kernel_id_;
|
||||
std::string kernel_name_;
|
||||
CounterInfos counter_infos_;
|
||||
};
|
||||
|
||||
// Profiler record base.
|
||||
class ProfilerWithKernelPropsEventRecord final : public ProfilerEventRecord {
|
||||
private:
|
||||
// According to `plugin/file/file.cpp`:
|
||||
//
|
||||
// > Taken from rocprofiler: The size hasn't changed in recent past
|
||||
static constexpr std::uint32_t lds_block_size_ = 128 * 4;
|
||||
|
||||
public:
|
||||
explicit ProfilerWithKernelPropsEventRecord(const rocprofiler_record_profiler_t& record,
|
||||
const rocprofiler_session_id_t session_id)
|
||||
: ProfilerEventRecord{record, session_id},
|
||||
grid_size_{record.kernel_properties.grid_size},
|
||||
workgroup_size_{record.kernel_properties.workgroup_size},
|
||||
lds_size_{
|
||||
((record.kernel_properties.lds_size + (lds_block_size_ - 1)) & ~(lds_block_size_ - 1))},
|
||||
scratch_size_{record.kernel_properties.scratch_size},
|
||||
arch_vgpr_count_{record.kernel_properties.arch_vgpr_count},
|
||||
accum_vgpr_count_{record.kernel_properties.accum_vgpr_count},
|
||||
sgpr_count_{record.kernel_properties.sgpr_count},
|
||||
wave_size_{record.kernel_properties.wave_size},
|
||||
signal_handle_{record.kernel_properties.signal_handle} {}
|
||||
|
||||
void Write(barectf_profiler_ctx& barectf_ctx) const override {
|
||||
barectf_profiler_trace_profiler_record_with_kernel_properties(
|
||||
&barectf_ctx, GetDispatch(), GetGpuId(), GetQueueId(), GetQueueIndex(), GetProcessId(),
|
||||
GetThreadId(), GetKernelId(), GetKernelName().c_str(), GetCounterInfos().names.size(),
|
||||
GetCounterInfos().names.data(), GetCounterInfos().values.size(),
|
||||
GetCounterInfos().values.data(), grid_size_, workgroup_size_, lds_size_, scratch_size_,
|
||||
arch_vgpr_count_, accum_vgpr_count_, sgpr_count_, wave_size_, signal_handle_);
|
||||
}
|
||||
|
||||
private:
|
||||
std::uint64_t grid_size_;
|
||||
std::uint64_t workgroup_size_;
|
||||
std::uint64_t lds_size_;
|
||||
std::uint64_t scratch_size_;
|
||||
std::uint64_t arch_vgpr_count_;
|
||||
std::uint64_t accum_vgpr_count_;
|
||||
std::uint64_t sgpr_count_;
|
||||
std::uint64_t wave_size_;
|
||||
std::uint64_t signal_handle_;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
Plugin::Plugin(const std::size_t packet_size, const fs::path& trace_dir,
|
||||
const fs::path& metadata_stream_path)
|
||||
: roctx_tracer_{packet_size, trace_dir, "roctx_"},
|
||||
hsa_api_tracer_{packet_size, trace_dir, "hsa_api_"},
|
||||
hip_api_tracer_{packet_size, trace_dir, "hip_api_"},
|
||||
api_ops_tracer_{packet_size, trace_dir, "api_ops_"},
|
||||
hsa_handles_tracer_{packet_size, trace_dir, "hsa_handles_"},
|
||||
profiler_tracer_{packet_size, trace_dir, "profiler_"} {
|
||||
// Make sure the trace directory doesn't exist.
|
||||
if (fs::exists(trace_dir)) {
|
||||
std::ostringstream ss;
|
||||
|
||||
ss << "CTF trace directory `" << trace_dir.string() << "` already exists";
|
||||
throw std::runtime_error{ss.str()};
|
||||
}
|
||||
|
||||
// Make sure the metadata stream file exists.
|
||||
if (!fs::exists(metadata_stream_path)) {
|
||||
std::ostringstream ss;
|
||||
|
||||
ss << "CTF metadata stream file `" << metadata_stream_path.string() << "` doesn't exist";
|
||||
throw std::runtime_error{ss.str()};
|
||||
}
|
||||
|
||||
// Create trace directory.
|
||||
if (!fs::create_directory(trace_dir)) {
|
||||
std::ostringstream ss;
|
||||
|
||||
ss << "Cannot create the CTF trace directory `" << trace_dir.string() << "`";
|
||||
throw std::runtime_error{ss.str()};
|
||||
}
|
||||
|
||||
// Copy adjusted metadata stream file to trace directory.
|
||||
try {
|
||||
CopyAdjustedMetadataStreamFile(metadata_stream_path, trace_dir);
|
||||
} catch (const std::exception& exc) {
|
||||
std::ostringstream ss;
|
||||
|
||||
ss << "Cannot adjust and copy metadata stream file `" << metadata_stream_path.string()
|
||||
<< "` to the CTF trace directory `" << trace_dir.string() << "`: " << exc.what();
|
||||
throw std::runtime_error{ss.str()};
|
||||
}
|
||||
|
||||
// Write HSA handle type event records.
|
||||
WriteHsaHandleTypes();
|
||||
}
|
||||
|
||||
void Plugin::HandleTracerRecord(const rocprofiler_record_tracer_t& record,
|
||||
const rocprofiler_session_id_t session_id) {
|
||||
std::lock_guard<std::mutex> lock{lock_};
|
||||
|
||||
// Depending on the domain, create and add an event record to the
|
||||
// corresponding tracer.
|
||||
switch (record.domain) {
|
||||
case ACTIVITY_DOMAIN_ROCTX:
|
||||
roctx_tracer_.AddEventRecord(std::make_shared<const RocTxEventRecord>(record, session_id));
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HSA_API: {
|
||||
hsa_api_tracer_.AddEventRecord(
|
||||
std::make_shared<const HsaApiEventRecordBegin>(record, session_id));
|
||||
hsa_api_tracer_.AddEventRecord(
|
||||
std::make_shared<const HsaApiEventRecordEnd>(record, session_id));
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HIP_API: {
|
||||
hip_api_tracer_.AddEventRecord(
|
||||
std::make_shared<const HipApiEventRecordBegin>(record, session_id));
|
||||
hip_api_tracer_.AddEventRecord(
|
||||
std::make_shared<const HipApiEventRecordEnd>(record, session_id));
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HSA_OPS:
|
||||
api_ops_tracer_.AddEventRecord(std::make_shared<const HsaOpEventRecordBegin>(record));
|
||||
api_ops_tracer_.AddEventRecord(std::make_shared<const HsaOpEventRecordEnd>(record));
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HIP_OPS:
|
||||
api_ops_tracer_.AddEventRecord(std::make_shared<const HipOpEventRecordBegin>(record));
|
||||
api_ops_tracer_.AddEventRecord(std::make_shared<const HipOpEventRecordEnd>(record));
|
||||
break;
|
||||
default:
|
||||
// Warn
|
||||
std::cerr << "rocm_ctf::Plugin::HandleTracerRecord(): "
|
||||
<< "ignoring record for unknown domain #" << record.domain << std::endl;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void Plugin::HandleProfilerRecord(const rocprofiler_record_profiler_t& record,
|
||||
const rocprofiler_session_id_t session_id) {
|
||||
std::lock_guard<std::mutex> lock{lock_};
|
||||
profiler_tracer_.AddEventRecord(
|
||||
std::make_shared<const ProfilerWithKernelPropsEventRecord>(record, session_id));
|
||||
}
|
||||
|
||||
void Plugin::HandleBufferRecords(const rocprofiler_record_header_t* begin,
|
||||
const rocprofiler_record_header_t* const end,
|
||||
const rocprofiler_session_id_t session_id,
|
||||
const rocprofiler_buffer_id_t buffer_id) {
|
||||
while (begin && begin < end) {
|
||||
if (begin->kind == ROCPROFILER_TRACER_RECORD) {
|
||||
HandleTracerRecord(*reinterpret_cast<const rocprofiler_record_tracer_t*>(begin), session_id);
|
||||
} else {
|
||||
assert(begin->kind == ROCPROFILER_PROFILER_RECORD);
|
||||
HandleProfilerRecord(*reinterpret_cast<const rocprofiler_record_profiler_t*>(begin),
|
||||
session_id);
|
||||
}
|
||||
|
||||
rocprofiler_next_record(begin, &begin, session_id, buffer_id);
|
||||
}
|
||||
}
|
||||
|
||||
void Plugin::WriteHsaHandleTypes() {
|
||||
[[maybe_unused]] const auto status = hsa_iterate_agents(
|
||||
[](const auto agent, const auto user_data) {
|
||||
auto& tracer = *static_cast<HsaHandlesTracer*>(user_data);
|
||||
hsa_device_type_t type;
|
||||
|
||||
if (hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &type) != HSA_STATUS_SUCCESS) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
using Type = HsaHandleTypeEventRecord::Type;
|
||||
|
||||
auto event_record = std::make_shared<HsaHandleTypeEventRecord>(
|
||||
agent.handle, type == HSA_DEVICE_TYPE_CPU ? Type::CPU : Type::GPU);
|
||||
|
||||
tracer.AddEventRecord(std::move(event_record));
|
||||
return HSA_STATUS_SUCCESS;
|
||||
},
|
||||
&hsa_handles_tracer_);
|
||||
|
||||
assert(status == HSA_STATUS_SUCCESS && "Iterate HSA agents");
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr std::uint64_t ns_per_s = 1'000'000'000ULL;
|
||||
|
||||
// Samples the ROCMTools clock and returns the value.
|
||||
std::uint64_t GetClkVal() {
|
||||
rocprofiler_timestamp_t ts;
|
||||
[[maybe_unused]] const auto ret = rocprofiler_get_timestamp(&ts);
|
||||
|
||||
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Get timestamp");
|
||||
return ts.value;
|
||||
}
|
||||
|
||||
// Updates `offset` and `delta`, if needed, to a more accurate clock
|
||||
// class offset and a smaller ROCMTools clock value delta.
|
||||
//
|
||||
// This function samples the ROCMTools clock twice, also sampling the
|
||||
// real-time clock in between, and uses the average ROCMTools clock
|
||||
// value to approximate the actual clock class offset.
|
||||
//
|
||||
// This strategy is based on the measure_single_clock_offset() function
|
||||
// of the LTTng-tools project <https://lttng.org/>.
|
||||
void UpdateClkClsOffsetAndDelta(std::uint64_t& offset, std::uint64_t& delta) {
|
||||
// Sample ROCMTools clock (first time).
|
||||
const auto rocm_clk_val1 = GetClkVal();
|
||||
|
||||
// Sample real-time clock.
|
||||
timespec realtime_spec = {0, 0};
|
||||
[[maybe_unused]] const auto ret = clock_gettime(CLOCK_REALTIME, &realtime_spec);
|
||||
|
||||
assert(ret == 0);
|
||||
|
||||
// Sample ROCMTools clock (second time).
|
||||
const auto rocm_clk_val2 = GetClkVal();
|
||||
|
||||
// Compute the current ROCMTools clock value delta.
|
||||
const auto this_delta = rocm_clk_val2 - rocm_clk_val1;
|
||||
|
||||
if (this_delta > delta) {
|
||||
// Discard larger delta.
|
||||
return;
|
||||
}
|
||||
|
||||
// Compute the average ROCMTools clock value.
|
||||
const auto rocm_clk_val_avg = (rocm_clk_val1 + rocm_clk_val2) >> 1;
|
||||
|
||||
// Compute the real-time clock value in nanoseconds.
|
||||
const auto realtime_ns =
|
||||
(static_cast<std::uint64_t>(realtime_spec.tv_sec) * ns_per_s) + realtime_spec.tv_nsec;
|
||||
|
||||
// Update clock class offset and delta.
|
||||
assert(rocm_clk_val_avg < realtime_ns);
|
||||
offset = realtime_ns - rocm_clk_val_avg;
|
||||
delta = this_delta;
|
||||
}
|
||||
|
||||
// Computes and returns the most possible accurate clock class offset.
|
||||
std::uint64_t GetMetadataClkClsOffset() {
|
||||
std::uint64_t offset = 0;
|
||||
std::uint64_t delta = std::numeric_limits<std::uint64_t>::max();
|
||||
|
||||
// Best effort to find the most accurate offset.
|
||||
for (auto i = 0U; i < 50U; ++i) {
|
||||
UpdateClkClsOffsetAndDelta(offset, delta);
|
||||
}
|
||||
|
||||
return offset;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void Plugin::CopyAdjustedMetadataStreamFile(const fs::path& metadata_stream_path,
|
||||
const fs::path& trace_dir) {
|
||||
// Load installed metadata stream file contents.
|
||||
std::string metadata;
|
||||
std::getline(std::ifstream{metadata_stream_path}, metadata, '\0');
|
||||
|
||||
// Replace the original `offset` property.
|
||||
{
|
||||
static constexpr auto offset_term = "offset = 0;";
|
||||
std::ostringstream ss;
|
||||
|
||||
ss << "offset = " << GetMetadataClkClsOffset() << ';';
|
||||
metadata.replace(metadata.find(offset_term), std::strlen(offset_term), ss.str());
|
||||
}
|
||||
|
||||
// Write adjusted metadata stream to trace directory.
|
||||
{
|
||||
std::ofstream output{trace_dir / "metadata"};
|
||||
|
||||
output.write(metadata.data(), metadata.size());
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace rocm_ctf
|
||||
@@ -0,0 +1,146 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef PLUGIN_CTF_PLUGIN_H
|
||||
#define PLUGIN_CTF_PLUGIN_H
|
||||
|
||||
#include <mutex>
|
||||
#include <cstdlib>
|
||||
#include <experimental/filesystem>
|
||||
|
||||
#include "rocprofiler.h"
|
||||
#include "rocprofiler_plugin.h"
|
||||
|
||||
#include "barectf.h"
|
||||
#include "barectf_tracer.h"
|
||||
|
||||
namespace rocm_ctf {
|
||||
|
||||
// CTF plugin.
|
||||
//
|
||||
// Build a plugin instance, and then call HandleTracerRecord(),
|
||||
// HandleProfilerRecord(), and HandleBufferRecords() to add event
|
||||
// records.
|
||||
//
|
||||
// A plugin instance performs important tasks at destruction time.
|
||||
class Plugin final {
|
||||
public:
|
||||
// Builds a plugin instance to write a CTF trace in the `trace_dir`
|
||||
// directory with packets of size `packet_size` bytes.
|
||||
//
|
||||
// `trace_dir` must not exist.
|
||||
//
|
||||
// This constructor immediately adjusts and copies the metadata stream
|
||||
// file `metadata_stream_path` to the trace directory (`trace_dir`).
|
||||
explicit Plugin(std::size_t packet_size, const std::experimental::filesystem::path& trace_dir,
|
||||
const std::experimental::filesystem::path& metadata_stream_path);
|
||||
|
||||
// Handles a tracer record.
|
||||
void HandleTracerRecord(const rocprofiler_record_tracer_t& record,
|
||||
rocprofiler_session_id_t session_id);
|
||||
|
||||
// Handles a profiler record.
|
||||
void HandleProfilerRecord(const rocprofiler_record_profiler_t& record,
|
||||
rocprofiler_session_id_t session_id);
|
||||
|
||||
// Handles tracer or profiler records from `begin` to `end`
|
||||
// (excluded).
|
||||
void HandleBufferRecords(const rocprofiler_record_header_t* begin,
|
||||
const rocprofiler_record_header_t* end, rocprofiler_session_id_t session_id,
|
||||
rocprofiler_buffer_id_t buffer_id);
|
||||
|
||||
private:
|
||||
// rocTX barectf platform descriptor.
|
||||
struct RocTxPlatformDescr final {
|
||||
using Ctx = barectf_roctx_ctx;
|
||||
|
||||
static void OpenPacket(Ctx& ctx) { barectf_roctx_open_packet(&ctx); }
|
||||
static void ClosePacket(Ctx& ctx) { barectf_roctx_close_packet(&ctx); }
|
||||
};
|
||||
|
||||
// HSA API barectf platform descriptor.
|
||||
struct HsaApiPlatformDescr final {
|
||||
using Ctx = barectf_hsa_api_ctx;
|
||||
|
||||
static void OpenPacket(Ctx& ctx) { barectf_hsa_api_open_packet(&ctx); }
|
||||
static void ClosePacket(Ctx& ctx) { barectf_hsa_api_close_packet(&ctx); }
|
||||
};
|
||||
|
||||
// HIP API barectf platform descriptor.
|
||||
struct HipApiPlatformDescr final {
|
||||
using Ctx = barectf_hip_api_ctx;
|
||||
|
||||
static void OpenPacket(Ctx& ctx) { barectf_hip_api_open_packet(&ctx); }
|
||||
static void ClosePacket(Ctx& ctx) { barectf_hip_api_close_packet(&ctx); }
|
||||
};
|
||||
|
||||
// HSA handles barectf platform descriptor.
|
||||
struct HsaHandlesPlatformDescr final {
|
||||
using Ctx = barectf_hsa_handles_ctx;
|
||||
|
||||
static void OpenPacket(Ctx& ctx) { barectf_hsa_handles_open_packet(&ctx); }
|
||||
static void ClosePacket(Ctx& ctx) { barectf_hsa_handles_close_packet(&ctx); }
|
||||
};
|
||||
|
||||
// API operations barectf platform descriptor.
|
||||
struct ApiOpsPlatformDescr final {
|
||||
using Ctx = barectf_api_ops_ctx;
|
||||
|
||||
static void OpenPacket(Ctx& ctx) { barectf_api_ops_open_packet(&ctx); }
|
||||
static void ClosePacket(Ctx& ctx) { barectf_api_ops_close_packet(&ctx); }
|
||||
};
|
||||
|
||||
// Profiler barectf platform descriptor.
|
||||
struct ProfilerPlatformDescr final {
|
||||
using Ctx = barectf_profiler_ctx;
|
||||
|
||||
static void OpenPacket(Ctx& ctx) { barectf_profiler_open_packet(&ctx); }
|
||||
static void ClosePacket(Ctx& ctx) { barectf_profiler_close_packet(&ctx); }
|
||||
};
|
||||
|
||||
// barectf tracer for HSA handle mappings.
|
||||
using HsaHandlesTracer = BarectfTracer<HsaHandlesPlatformDescr>;
|
||||
|
||||
// Writes the HSA handle type mappings to a dedicated data stream
|
||||
// file.
|
||||
void WriteHsaHandleTypes();
|
||||
|
||||
// Loads the existing metadata stream file `metadata_stream_path`,
|
||||
// adjusts the `offset` property of its single clock class, and writes
|
||||
// the result to the `metadata` file within the `trace_dir` directory.
|
||||
void CopyAdjustedMetadataStreamFile(
|
||||
const std::experimental::filesystem::path& metadata_stream_path,
|
||||
const std::experimental::filesystem::path& trace_dir);
|
||||
|
||||
// Dedicated tracers.
|
||||
BarectfTracer<RocTxPlatformDescr> roctx_tracer_;
|
||||
BarectfTracer<HsaApiPlatformDescr> hsa_api_tracer_;
|
||||
BarectfTracer<HipApiPlatformDescr> hip_api_tracer_;
|
||||
BarectfTracer<ApiOpsPlatformDescr> api_ops_tracer_;
|
||||
HsaHandlesTracer hsa_handles_tracer_;
|
||||
BarectfTracer<ProfilerPlatformDescr> profiler_tracer_;
|
||||
|
||||
// Locks any operation performed on the data of this.
|
||||
std::mutex lock_;
|
||||
};
|
||||
|
||||
} // namespace rocm_ctf
|
||||
|
||||
#endif // PLUGIN_CTF_PLUGIN_H
|
||||
@@ -0,0 +1,7 @@
|
||||
{
|
||||
global: rocprofiler_plugin_initialize;
|
||||
rocprofiler_plugin_finalize;
|
||||
rocprofiler_plugin_write_buffer_records;
|
||||
rocprofiler_plugin_write_record;
|
||||
local: *;
|
||||
};
|
||||
@@ -0,0 +1,44 @@
|
||||
# ###############################################################################
|
||||
# # Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
# #
|
||||
# # Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# # of this software and associated documentation files (the "Software"), to
|
||||
# # deal in the Software without restriction, including without limitation the
|
||||
# # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
# # sell copies of the Software, and to permit persons to whom the Software is
|
||||
# # furnished to do so, subject to the following conditions:
|
||||
# #
|
||||
# # The above copyright notice and this permission notice shall be included in
|
||||
# # all copies or substantial portions of the Software.
|
||||
# #
|
||||
# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# # IN THE SOFTWARE.
|
||||
# ###############################################################################
|
||||
|
||||
file(GLOB ROCPROFILER_UTIL_SRC_FILES ${PROJECT_SOURCE_DIR}/src/utils/helper.cpp)
|
||||
|
||||
file(GLOB FILE_SOURCES "*.cpp")
|
||||
add_library(file_plugin SHARED ${FILE_SOURCES} ${ROCPROFILER_UTIL_SRC_FILES})
|
||||
|
||||
set_target_properties(file_plugin PROPERTIES
|
||||
CXX_VISIBILITY_PRESET hidden
|
||||
LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/../exportmap
|
||||
LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR})
|
||||
|
||||
target_compile_definitions(file_plugin
|
||||
PRIVATE HIP_PROF_HIP_API_STRING=1 __HIP_PLATFORM_HCC__=1)
|
||||
|
||||
target_include_directories(file_plugin PRIVATE ${PROJECT_SOURCE_DIR}/inc ${PROJECT_SOURCE_DIR})
|
||||
|
||||
target_link_options(file_plugin PRIVATE -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../exportmap -Wl,--no-undefined)
|
||||
|
||||
target_link_libraries(file_plugin PRIVATE ${ROCPROFILER_TARGET} hsa-runtime64::hsa-runtime64 systemd stdc++fs amd_comgr dl)
|
||||
|
||||
install(TARGETS file_plugin LIBRARY
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/${PROJECT_NAME}
|
||||
COMPONENT runtime)
|
||||
@@ -0,0 +1,472 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include <cxxabi.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <cassert>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <experimental/filesystem>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <ostream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <hsa/hsa.h>
|
||||
#include <mutex>
|
||||
|
||||
#include "rocprofiler.h"
|
||||
#include "rocprofiler_plugin.h"
|
||||
#include "../utils.h"
|
||||
|
||||
namespace fs = std::experimental::filesystem;
|
||||
|
||||
namespace {
|
||||
|
||||
static std::string output_file_name;
|
||||
class file_plugin_t {
|
||||
private:
|
||||
enum class output_type_t {
|
||||
COUNTER,
|
||||
TRACER,
|
||||
PC_SAMPLING
|
||||
};
|
||||
|
||||
class output_file_t {
|
||||
public:
|
||||
output_file_t(std::string name) : name_(std::move(name)) {}
|
||||
|
||||
std::string name() const { return name_; }
|
||||
|
||||
template <typename T> std::ostream& operator<<(T&& value) {
|
||||
if (!is_open()) open();
|
||||
return stream_ << std::forward<T>(value);
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& (*func)(std::ostream&)) {
|
||||
if (!is_open()) open();
|
||||
return stream_ << func;
|
||||
}
|
||||
|
||||
void open() {
|
||||
// If the stream is already in the failed state, there's no need to try
|
||||
// to open the file.
|
||||
if (fail()) return;
|
||||
|
||||
const char* output_dir = getenv("OUTPUT_PATH");
|
||||
output_file_name = getenv("OUT_FILE_NAME") ? std::string(getenv("OUT_FILE_NAME")) + "_" : "";
|
||||
|
||||
if (output_dir == nullptr) {
|
||||
stream_.copyfmt(std::cout);
|
||||
stream_.clear(std::cout.rdstate());
|
||||
stream_.basic_ios<char>::rdbuf(std::cout.rdbuf());
|
||||
return;
|
||||
}
|
||||
|
||||
fs::path output_prefix(output_dir);
|
||||
if (!fs::is_directory(fs::status(output_prefix))) {
|
||||
if (!stream_.fail()) rocmtools::warning("Cannot open output directory '%s'", output_dir);
|
||||
stream_.setstate(std::ios_base::failbit);
|
||||
return;
|
||||
}
|
||||
|
||||
std::stringstream ss;
|
||||
ss << output_file_name << GetPid() << "_" << name_;
|
||||
stream_.open(output_prefix / ss.str());
|
||||
}
|
||||
|
||||
bool is_open() const { return stream_.is_open(); }
|
||||
bool fail() const { return stream_.fail(); }
|
||||
|
||||
private:
|
||||
const std::string name_;
|
||||
std::ofstream stream_;
|
||||
};
|
||||
|
||||
output_file_t* get_output_file(output_type_t output_type, uint32_t domain = 0) {
|
||||
switch (output_type) {
|
||||
case output_type_t::COUNTER:
|
||||
return &output_file_;
|
||||
case output_type_t::TRACER:
|
||||
switch (domain) {
|
||||
case ACTIVITY_DOMAIN_ROCTX:
|
||||
return &roctx_file_;
|
||||
case ACTIVITY_DOMAIN_HSA_API:
|
||||
return &hsa_api_file_;
|
||||
case ACTIVITY_DOMAIN_HIP_API:
|
||||
return &hip_api_file_;
|
||||
case ACTIVITY_DOMAIN_HIP_OPS:
|
||||
return &hip_activity_file_;
|
||||
case ACTIVITY_DOMAIN_HSA_OPS:
|
||||
return &hsa_async_copy_file_;
|
||||
default:
|
||||
assert(!"domain/op not supported!");
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case output_type_t::PC_SAMPLING:
|
||||
return &pc_sample_file_;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
public:
|
||||
file_plugin_t() {
|
||||
output_file_t hsa_handles("hsa_handles.txt");
|
||||
|
||||
[[maybe_unused]] hsa_status_t status = hsa_iterate_agents(
|
||||
[](hsa_agent_t agent, void* user_data) {
|
||||
auto* file = static_cast<decltype(hsa_handles)*>(user_data);
|
||||
hsa_device_type_t type;
|
||||
|
||||
if (hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &type) != HSA_STATUS_SUCCESS)
|
||||
return HSA_STATUS_ERROR;
|
||||
|
||||
*file << std::hex << std::showbase << agent.handle << " agent "
|
||||
<< ((type == HSA_DEVICE_TYPE_CPU) ? "cpu" : "gpu") << std::endl;
|
||||
return HSA_STATUS_SUCCESS;
|
||||
},
|
||||
&hsa_handles);
|
||||
assert(status == HSA_STATUS_SUCCESS && "failed to iterate HSA agents");
|
||||
if (hsa_handles.fail()) {
|
||||
rocmtools::warning("Cannot write to '%s'", hsa_handles.name().c_str());
|
||||
return;
|
||||
}
|
||||
|
||||
// App begin timestamp begin_ts_file.txt
|
||||
output_file_t begin_ts("begin_ts_file.txt");
|
||||
|
||||
[[maybe_unused]] rocprofiler_timestamp_t app_begin_timestamp = {};
|
||||
CHECK_ROCMTOOLS(rocprofiler_get_timestamp(&app_begin_timestamp));
|
||||
|
||||
begin_ts << std::dec << app_begin_timestamp.value << std::endl;
|
||||
if (begin_ts.fail()) {
|
||||
rocmtools::warning("Cannot write to '%s'", begin_ts.name().c_str());
|
||||
return;
|
||||
}
|
||||
|
||||
valid_ = true;
|
||||
}
|
||||
|
||||
std::mutex writing_lock;
|
||||
|
||||
const char* GetDomainName(rocprofiler_tracer_activity_domain_t domain) {
|
||||
switch (domain) {
|
||||
case ACTIVITY_DOMAIN_ROCTX:
|
||||
return "ROCTX_DOMAIN";
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HIP_API:
|
||||
return "HIP_API_DOMAIN";
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HIP_OPS:
|
||||
return "HIP_OPS_DOMAIN";
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HSA_API:
|
||||
return "HSA_API_DOMAIN";
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HSA_OPS:
|
||||
return "HSA_OPS_DOMAIN";
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HSA_EVT:
|
||||
return "HSA_EVT_DOMAIN";
|
||||
break;
|
||||
default:
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
void FlushTracerRecord(rocprofiler_record_tracer_t tracer_record, rocprofiler_session_id_t session_id,
|
||||
rocprofiler_buffer_id_t buffer_id = rocprofiler_buffer_id_t{0}) {
|
||||
std::lock_guard<std::mutex> lock(writing_lock);
|
||||
std::string kernel_name;
|
||||
std::string function_name;
|
||||
std::string roctx_message;
|
||||
uint64_t roctx_id;
|
||||
if ((tracer_record.operation_id.id == 0 && tracer_record.domain == ACTIVITY_DOMAIN_HIP_OPS)) {
|
||||
if (tracer_record.api_data_handle.handle &&
|
||||
strlen(reinterpret_cast<const char*>(tracer_record.api_data_handle.handle)) > 1)
|
||||
kernel_name = rocmtools::cxx_demangle(
|
||||
reinterpret_cast<const char*>(tracer_record.api_data_handle.handle));
|
||||
}
|
||||
if (tracer_record.domain == ACTIVITY_DOMAIN_HSA_API) {
|
||||
size_t function_name_size = 0;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hsa_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_HSA_FUNCTION_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &function_name_size));
|
||||
if (function_name_size > 1) {
|
||||
char* function_name_c = (char*)malloc(function_name_size);
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hsa_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_HSA_FUNCTION_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &function_name_c));
|
||||
if (function_name_c) function_name = std::string(function_name_c);
|
||||
}
|
||||
}
|
||||
if (tracer_record.domain == ACTIVITY_DOMAIN_HIP_API) {
|
||||
size_t function_name_size = 0;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_HIP_FUNCTION_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &function_name_size));
|
||||
if (function_name_size > 1) {
|
||||
char* function_name_c = (char*)malloc(function_name_size);
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_HIP_FUNCTION_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &function_name_c));
|
||||
if (function_name_c) function_name = std::string(function_name_c);
|
||||
}
|
||||
size_t kernel_name_size = 0;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_HIP_KERNEL_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &kernel_name_size));
|
||||
if (kernel_name_size > 1) {
|
||||
char* kernel_name_str = (char*)malloc(kernel_name_size * sizeof(char));
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_HIP_KERNEL_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &kernel_name_str));
|
||||
if (kernel_name_str) kernel_name = rocmtools::cxx_demangle(std::string(kernel_name_str));
|
||||
}
|
||||
}
|
||||
if (tracer_record.domain == ACTIVITY_DOMAIN_ROCTX) {
|
||||
size_t roctx_message_size = 0;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_roctx_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_ROCTX_MESSAGE, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &roctx_message_size));
|
||||
if (roctx_message_size > 1) {
|
||||
[[maybe_unused]] char* roctx_message_str =
|
||||
static_cast<char*>(malloc(roctx_message_size * sizeof(char)));
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_roctx_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_ROCTX_MESSAGE, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &roctx_message_str));
|
||||
if (roctx_message_str)
|
||||
roctx_message = rocmtools::cxx_demangle(std::string(strdup(roctx_message_str)));
|
||||
}
|
||||
size_t roctx_id_size = 0;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_roctx_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_ROCTX_ID, tracer_record.api_data_handle, tracer_record.operation_id,
|
||||
&roctx_id_size));
|
||||
if (roctx_id_size > 1) {
|
||||
[[maybe_unused]] char* roctx_id_str =
|
||||
static_cast<char*>(malloc(roctx_id_size * sizeof(char)));
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_roctx_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_ROCTX_ID, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &roctx_id_str));
|
||||
if (roctx_id_str) {
|
||||
roctx_id = std::stoll(std::string(strdup(roctx_id_str)));
|
||||
free(roctx_id_str);
|
||||
}
|
||||
}
|
||||
}
|
||||
output_file_t* output_file = get_output_file(output_type_t::TRACER, tracer_record.domain);
|
||||
*output_file << "Record [" << tracer_record.header.id.handle << "], Domain("
|
||||
<< GetDomainName(tracer_record.domain) << "), Begin("
|
||||
<< tracer_record.timestamps.begin.value << "), End("
|
||||
<< tracer_record.timestamps.end.value << "), Correlation ID( "
|
||||
<< tracer_record.correlation_id.value << ")";
|
||||
if (roctx_id >= 0) *output_file << ", ROCTX ID(" << roctx_id << ")";
|
||||
if (roctx_message.size() > 1) *output_file << ", ROCTX Message(" << roctx_message << ")";
|
||||
if (function_name.size() > 1) *output_file << ", Function(" << function_name << ")";
|
||||
if (kernel_name.size() > 1) *output_file << ", Kernel Name(" << kernel_name.c_str() << ")";
|
||||
*output_file << std::endl;
|
||||
}
|
||||
|
||||
void FlushProfilerRecord(const rocprofiler_record_profiler_t* profiler_record,
|
||||
rocprofiler_session_id_t session_id, rocprofiler_buffer_id_t buffer_id) {
|
||||
std::lock_guard<std::mutex> lock(writing_lock);
|
||||
size_t name_length = 0;
|
||||
output_file_t* output_file{nullptr};
|
||||
output_file = get_output_file(output_type_t::COUNTER);
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_kernel_info_size(ROCPROFILER_KERNEL_NAME,
|
||||
profiler_record->kernel_id, &name_length));
|
||||
// Taken from rocprofiler: The size hasn't changed in recent past
|
||||
static const uint32_t lds_block_size = 128 * 4;
|
||||
const char* kernel_name_c;
|
||||
if (name_length > 1) {
|
||||
kernel_name_c = static_cast<const char*>(malloc(name_length * sizeof(char)));
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_kernel_info(ROCPROFILER_KERNEL_NAME, profiler_record->kernel_id,
|
||||
&kernel_name_c));
|
||||
}
|
||||
*output_file << std::string("dispatch[") << std::to_string(profiler_record->header.id.handle)
|
||||
<< "], " << std::string("gpu_id(")
|
||||
<< std::to_string(profiler_record->gpu_id.handle) << "), "
|
||||
<< std::string("queue_id(") << std::to_string(profiler_record->queue_id.handle)
|
||||
<< "), " << std::string("queue_index(")
|
||||
<< std::to_string(profiler_record->queue_idx.value) << "), " << std::string("pid(")
|
||||
<< std::to_string(GetPid()) << "), " << std::string("tid(")
|
||||
<< std::to_string(profiler_record->thread_id.value) << ")";
|
||||
*output_file << ", " << std::string("grd(")
|
||||
<< std::to_string(profiler_record->kernel_properties.grid_size) << "), "
|
||||
<< std::string("wgr(")
|
||||
<< std::to_string(profiler_record->kernel_properties.workgroup_size) << "), "
|
||||
<< std::string("lds(")
|
||||
<< std::to_string(
|
||||
((profiler_record->kernel_properties.lds_size + (lds_block_size - 1)) &
|
||||
~(lds_block_size - 1)))
|
||||
<< "), " << std::string("scr(")
|
||||
<< std::to_string(profiler_record->kernel_properties.scratch_size) << "), "
|
||||
<< std::string("arch_vgpr(")
|
||||
<< std::to_string(profiler_record->kernel_properties.arch_vgpr_count) << "), "
|
||||
<< std::string("accum_vgpr(")
|
||||
<< std::to_string(profiler_record->kernel_properties.accum_vgpr_count) << "), "
|
||||
<< std::string("sgpr(")
|
||||
<< std::to_string(profiler_record->kernel_properties.sgpr_count) << "), "
|
||||
<< std::string("wave_size(")
|
||||
<< std::to_string(profiler_record->kernel_properties.wave_size) << "), "
|
||||
<< std::string("sig(")
|
||||
<< std::to_string(profiler_record->kernel_properties.signal_handle);
|
||||
std::string kernel_name = "";
|
||||
if (name_length > 1) {
|
||||
kernel_name = rocmtools::truncate_name(rocmtools::cxx_demangle(kernel_name_c));
|
||||
}
|
||||
*output_file << "), " << std::string("obj(")
|
||||
<< std::to_string(profiler_record->kernel_id.handle) << "), "
|
||||
<< std::string("kernel-name(\"") << kernel_name << "\")"
|
||||
<< std::string(", start_time(")
|
||||
<< std::to_string(profiler_record->timestamps.begin.value) << ")"
|
||||
<< std::string(", end_time(")
|
||||
<< std::to_string(profiler_record->timestamps.end.value) << ")";
|
||||
|
||||
// For Counters
|
||||
*output_file << std::endl;
|
||||
if (profiler_record->counters) {
|
||||
for (uint64_t i = 0; i < profiler_record->counters_count.value; i++) {
|
||||
if (profiler_record->counters[i].counter_handler.handle > 0) {
|
||||
size_t counter_name_length = 0;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_counter_info_size(
|
||||
session_id, ROCPROFILER_COUNTER_NAME, profiler_record->counters[i].counter_handler,
|
||||
&counter_name_length));
|
||||
if (counter_name_length > 1) {
|
||||
const char* name_c = static_cast<const char*>(malloc(name_length * sizeof(char)));
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_counter_info(
|
||||
session_id, ROCPROFILER_COUNTER_NAME, profiler_record->counters[i].counter_handler,
|
||||
&name_c));
|
||||
*output_file << ", " << name_c << " ("
|
||||
<< std::to_string(profiler_record->counters[i].value.value) << ")"
|
||||
<< std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void FlushPCSamplingRecord(
|
||||
const rocprofiler_record_pc_sample_t *pc_sampling_record) {
|
||||
output_file_t* output_file{nullptr};
|
||||
output_file = get_output_file(output_type_t::PC_SAMPLING);
|
||||
const auto &sample = pc_sampling_record->pc_sample;
|
||||
*output_file << "dispatch[" << sample.dispatch_id.value << "], "
|
||||
<< "timestamp(" << sample.timestamp.value << "), "
|
||||
<< "gpu_id(" << sample.gpu_id.handle << "), "
|
||||
<< "pc-sample(" << std::hex << std::showbase << sample.pc << "), "
|
||||
<< "se(" << sample.se << ')'
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
int WriteBufferRecords(const rocprofiler_record_header_t* begin,
|
||||
const rocprofiler_record_header_t* end, rocprofiler_session_id_t session_id,
|
||||
rocprofiler_buffer_id_t buffer_id) {
|
||||
while (begin < end) {
|
||||
if (!begin) return 0;
|
||||
switch (begin->kind) {
|
||||
case ROCPROFILER_PROFILER_RECORD: {
|
||||
const rocprofiler_record_profiler_t* profiler_record =
|
||||
reinterpret_cast<const rocprofiler_record_profiler_t*>(begin);
|
||||
FlushProfilerRecord(profiler_record, session_id, buffer_id);
|
||||
break;
|
||||
}
|
||||
case ROCPROFILER_TRACER_RECORD: {
|
||||
rocprofiler_record_tracer_t* tracer_record = const_cast<rocprofiler_record_tracer_t*>(
|
||||
reinterpret_cast<const rocprofiler_record_tracer_t*>(begin));
|
||||
FlushTracerRecord(*tracer_record, session_id, buffer_id);
|
||||
break;
|
||||
}
|
||||
case ROCPROFILER_PC_SAMPLING_RECORD: {
|
||||
const rocprofiler_record_pc_sample_t *pc_sampling_record =
|
||||
reinterpret_cast<const rocprofiler_record_pc_sample_t *>(begin);
|
||||
FlushPCSamplingRecord(pc_sampling_record);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
rocprofiler_next_record(begin, &begin, session_id, buffer_id);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool is_valid() const { return valid_; }
|
||||
|
||||
private:
|
||||
bool valid_{false};
|
||||
|
||||
output_file_t roctx_file_{"roctx_trace.txt"}, hsa_api_file_{"hsa_api_trace.txt"},
|
||||
hip_api_file_{"hip_api_trace.txt"}, hip_activity_file_{"hcc_ops_trace.txt"},
|
||||
hsa_async_copy_file_{"async_copy_trace.txt"}, pc_sample_file_{"pcs_trace.txt"},
|
||||
output_file_{"results.txt"};
|
||||
};
|
||||
|
||||
file_plugin_t* file_plugin = nullptr;
|
||||
|
||||
} // namespace
|
||||
|
||||
ROCPROFILER_EXPORT int rocprofiler_plugin_initialize(uint32_t rocprofiler_major_version,
|
||||
uint32_t rocprofiler_minor_version) {
|
||||
if (rocprofiler_major_version != ROCPROFILER_VERSION_MAJOR ||
|
||||
rocprofiler_minor_version < ROCPROFILER_VERSION_MINOR)
|
||||
return -1;
|
||||
|
||||
if (file_plugin != nullptr) return -1;
|
||||
|
||||
file_plugin = new file_plugin_t();
|
||||
if (file_plugin->is_valid()) return 0;
|
||||
|
||||
// The plugin failed to initialied, destroy it and return an error.
|
||||
delete file_plugin;
|
||||
file_plugin = nullptr;
|
||||
return -1;
|
||||
}
|
||||
|
||||
ROCPROFILER_EXPORT void rocprofiler_plugin_finalize() {
|
||||
if (!file_plugin) return;
|
||||
delete file_plugin;
|
||||
file_plugin = nullptr;
|
||||
}
|
||||
|
||||
ROCPROFILER_EXPORT int rocprofiler_plugin_write_buffer_records(const rocprofiler_record_header_t* begin,
|
||||
const rocprofiler_record_header_t* end,
|
||||
rocprofiler_session_id_t session_id,
|
||||
rocprofiler_buffer_id_t buffer_id) {
|
||||
if (!file_plugin || !file_plugin->is_valid()) return -1;
|
||||
return file_plugin->WriteBufferRecords(begin, end, session_id, buffer_id);
|
||||
}
|
||||
|
||||
ROCPROFILER_EXPORT int rocprofiler_plugin_write_record(rocprofiler_record_tracer_t record,
|
||||
rocprofiler_session_id_t session_id) {
|
||||
if (!file_plugin || !file_plugin->is_valid()) return -1;
|
||||
if (record.header.id.handle == 0) return 0;
|
||||
file_plugin->FlushTracerRecord(record, session_id);
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,27 @@
|
||||
file(GLOB ROCPROFILER_UTIL_SRC_FILES ${PROJECT_SOURCE_DIR}/src/utils/helper.cpp)
|
||||
|
||||
add_library(perfetto_plugin
|
||||
${LIBRARY_TYPE} ${ROCPROFILER_UTIL_SRC_FILES}
|
||||
perfetto.cpp perfetto_sdk/sdk/perfetto.cc)
|
||||
|
||||
set_target_properties(perfetto_plugin PROPERTIES
|
||||
CXX_VISIBILITY_PRESET hidden
|
||||
LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/../exportmap
|
||||
LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR})
|
||||
|
||||
target_compile_definitions(perfetto_plugin
|
||||
PRIVATE HIP_PROF_HIP_API_STRING=1
|
||||
__HIP_PLATFORM_HCC__=1)
|
||||
|
||||
target_include_directories(perfetto_plugin
|
||||
PRIVATE ${PROJECT_SOURCE_DIR}/inc ${PROJECT_SOURCE_DIR}
|
||||
${PROJECT_SOURCE_DIR}/plugin/perfetto/perfetto_sdk/sdk)
|
||||
|
||||
target_link_options(perfetto_plugin
|
||||
PRIVATE -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../exportmap -Wl,--no-undefined)
|
||||
|
||||
target_link_libraries(perfetto_plugin PRIVATE ${ROCPROFILER_TARGET} Threads::Threads systemd stdc++fs amd_comgr)
|
||||
|
||||
install(TARGETS perfetto_plugin LIBRARY
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/${PROJECT_NAME}
|
||||
COMPONENT plugins)
|
||||
@@ -0,0 +1,804 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "rocprofiler.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <condition_variable>
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <experimental/filesystem>
|
||||
#include <fstream>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <ostream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <functional>
|
||||
#include <iostream>
|
||||
|
||||
#include <cxxabi.h>
|
||||
#include <fcntl.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
#include <systemd/sd-id128.h>
|
||||
|
||||
#include "perfetto_sdk/sdk/perfetto.h"
|
||||
#include "rocprofiler_plugin.h"
|
||||
#include "../utils.h"
|
||||
|
||||
#define STREAM_CONSTANT 98736677
|
||||
#define QUEUE_CONSTANT 18746479
|
||||
|
||||
namespace fs = std::experimental::filesystem;
|
||||
|
||||
PERFETTO_DEFINE_CATEGORIES(
|
||||
perfetto::Category("GENERIC").SetDescription("GENERAL_CATEGORY"),
|
||||
perfetto::Category("ROCTX_API").SetDescription("ACTIVITY_DOMAIN_ROCTX_API"),
|
||||
perfetto::Category("HSA_API").SetDescription("ACTIVITY_DOMAIN_HSA_API"),
|
||||
perfetto::Category("HIP_API").SetDescription("ACTIVITY_DOMAIN_HIP_API"),
|
||||
perfetto::Category("External_API").SetDescription("ACTIVITY_DOMAIN_EXT_API"),
|
||||
perfetto::Category("HIP_OPS").SetDescription("ACTIVITY_DOMAIN_HIP_OPS"),
|
||||
perfetto::Category("HSA_OPS").SetDescription("ACTIVITY_DOMAIN_HSA_OPS"),
|
||||
perfetto::Category("KERNELS").SetDescription("KERNEL_DISPATCHES"),
|
||||
perfetto::Category("COUNTERS").SetDescription("PERFORMANCE_COUNTERS"));
|
||||
|
||||
PERFETTO_TRACK_EVENT_STATIC_STORAGE();
|
||||
|
||||
namespace {
|
||||
|
||||
std::string process_name;
|
||||
static std::string output_file_name;
|
||||
|
||||
std::string get_kernel_name(rocprofiler_record_profiler_t& profiler_record) {
|
||||
std::string kernel_name = "";
|
||||
size_t name_length = 1;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_kernel_info_size(ROCPROFILER_KERNEL_NAME, profiler_record.kernel_id,
|
||||
&name_length));
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
|
||||
#pragma GCC diagnostic ignored "-Wstringop-overread"
|
||||
if (name_length > 1) {
|
||||
const char* kernel_name_c = static_cast<const char*>(malloc(name_length * sizeof(char)));
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_kernel_info(ROCPROFILER_KERNEL_NAME, profiler_record.kernel_id,
|
||||
&kernel_name_c));
|
||||
if (kernel_name_c && strlen(kernel_name_c) > 1)
|
||||
kernel_name = rocmtools::cxx_demangle(strdup(kernel_name_c));
|
||||
}
|
||||
#pragma GCC diagnostic pop
|
||||
return kernel_name;
|
||||
}
|
||||
|
||||
|
||||
class perfetto_plugin_t {
|
||||
public:
|
||||
perfetto_plugin_t() {
|
||||
const char* output_dir = getenv("OUTPUT_PATH");
|
||||
const char* temp_file_name = getenv("OUT_FILE_NAME");
|
||||
output_file_name = temp_file_name ? std::string(temp_file_name) + "_" : "";
|
||||
|
||||
if (output_dir == nullptr) {
|
||||
stream_.copyfmt(std::cout);
|
||||
stream_.clear(std::cout.rdstate());
|
||||
stream_.basic_ios<char>::rdbuf(std::cout.rdbuf());
|
||||
return;
|
||||
}
|
||||
|
||||
output_prefix_ = output_dir;
|
||||
if (!fs::is_directory(fs::status(output_prefix_))) {
|
||||
if (!stream_.fail()) rocmtools::warning("Cannot open output directory '%s'", output_dir);
|
||||
stream_.setstate(std::ios_base::failbit);
|
||||
return;
|
||||
}
|
||||
|
||||
perfetto::TracingInitArgs args;
|
||||
args.backends |= perfetto::kInProcessBackend;
|
||||
|
||||
perfetto::Tracing::Initialize(args);
|
||||
perfetto::TrackEvent::Register();
|
||||
|
||||
perfetto::protos::gen::TrackEventConfig track_event_cfg;
|
||||
track_event_cfg.add_disabled_categories("*");
|
||||
track_event_cfg.add_enabled_categories("GENERIC");
|
||||
track_event_cfg.add_enabled_categories("ROCTX_API");
|
||||
track_event_cfg.add_enabled_categories("HSA_API");
|
||||
track_event_cfg.add_enabled_categories("HIP_API");
|
||||
track_event_cfg.add_enabled_categories("External_API");
|
||||
track_event_cfg.add_enabled_categories("HIP_OPS");
|
||||
track_event_cfg.add_enabled_categories("HSA_OPS");
|
||||
track_event_cfg.add_enabled_categories("KERNELS");
|
||||
track_event_cfg.add_enabled_categories("COUNTERS");
|
||||
|
||||
perfetto::TraceConfig trace_cfg;
|
||||
|
||||
auto buffer_cfg = trace_cfg.add_buffers();
|
||||
uint32_t max_buffer_size = 10 * 1024 * 1024; // Default max buffer size is 10 GB
|
||||
const char* max_buffer_size_str = getenv("rocprofiler_PERFETTO_MAX_BUFFER_SIZE_KIB");
|
||||
if (max_buffer_size_str && std::atol(max_buffer_size_str) > 0)
|
||||
max_buffer_size = std::atol(max_buffer_size_str);
|
||||
// Record up to max buffer size determined by user or the 10 GB (default value)
|
||||
buffer_cfg->set_size_kb(max_buffer_size);
|
||||
|
||||
auto* data_source_cfg = trace_cfg.add_data_sources()->mutable_config();
|
||||
data_source_cfg->set_name("track_event");
|
||||
data_source_cfg->set_track_event_config_raw(track_event_cfg.SerializeAsString());
|
||||
|
||||
output_prefix_.append(output_file_name + std::to_string(GetPid()) + "_output.pftrace");
|
||||
file_descriptor_ = open(output_prefix_.string().c_str(), O_RDWR | O_CREAT | O_TRUNC, 0600);
|
||||
if (file_descriptor_ == -1) rocmtools::warning("Can't open output file\n");
|
||||
|
||||
tracing_session_ = perfetto::Tracing::NewTrace();
|
||||
tracing_session_->Setup(trace_cfg, file_descriptor_);
|
||||
tracing_session_->StartBlocking();
|
||||
|
||||
|
||||
hostname_[1023] = '\0';
|
||||
gethostname(hostname_, 1023);
|
||||
sd_id128_t ret;
|
||||
char machine_id[SD_ID128_STRING_MAX];
|
||||
[[maybe_unused]] int status = sd_id128_get_machine(&ret);
|
||||
assert(status == 0 && "Error: Couldn't get machine id!");
|
||||
if (sd_id128_to_string(ret, machine_id)) machine_id_ = std::hash<std::string>{}(machine_id);
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(thread_tracks_lock_);
|
||||
process_name =
|
||||
perfetto::ProcessTrack::Current().Serialize().mutable_process()->process_name();
|
||||
auto process_track_desc = perfetto::ProcessTrack::Current().Serialize();
|
||||
uint64_t track_id =
|
||||
track_counter_.fetch_add((1 + machine_id_) * GetPid(), std::memory_order_acquire);
|
||||
for (uint64_t tid : track_ids_used_) {
|
||||
while (track_id == tid) {
|
||||
track_id =
|
||||
track_counter_.fetch_add((1 + machine_id_) * GetPid(), std::memory_order_acquire);
|
||||
}
|
||||
}
|
||||
std::string thread_track_str =
|
||||
rocmtools::string_printf("Node: %s Process ID: %lu Thread ID:", hostname_, GetPid());
|
||||
process_track_desc.mutable_process()->set_process_name(thread_track_str);
|
||||
perfetto::TrackEvent::SetTrackDescriptor(perfetto::ProcessTrack::Current(),
|
||||
process_track_desc);
|
||||
perfetto::ProcessTrack::Current().Serialize().set_uuid(track_id);
|
||||
thread_tracks_.emplace(GetPid(), perfetto::ProcessTrack::Current());
|
||||
}
|
||||
|
||||
is_valid_ = true;
|
||||
}
|
||||
|
||||
~perfetto_plugin_t() {
|
||||
if (is_valid_) {
|
||||
tracing_session_->StopBlocking();
|
||||
close(file_descriptor_);
|
||||
}
|
||||
}
|
||||
|
||||
const char* GetDomainName(rocprofiler_tracer_activity_domain_t domain) {
|
||||
switch (domain) {
|
||||
case ACTIVITY_DOMAIN_ROCTX:
|
||||
return "ROCTX_DOMAIN";
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HIP_API:
|
||||
return "HIP_API_DOMAIN";
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HIP_OPS:
|
||||
return "HIP_OPS_DOMAIN";
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HSA_API:
|
||||
return "HSA_API_DOMAIN";
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HSA_OPS:
|
||||
return "HSA_OPS_DOMAIN";
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HSA_EVT:
|
||||
return "HSA_EVT_DOMAIN";
|
||||
break;
|
||||
default:
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
std::mutex writing_lock;
|
||||
|
||||
int FlushProfilerRecord(rocprofiler_record_profiler_t profiler_record,
|
||||
rocprofiler_session_id_t session_id) {
|
||||
std::lock_guard<std::mutex> lock(writing_lock);
|
||||
// ToDO: rename this variable?
|
||||
if (!tracing_session_) rocmtools::warning("Tracing session is deleted!\n");
|
||||
|
||||
int device_id = profiler_record.gpu_id.handle;
|
||||
std::unordered_map<int, perfetto::Track>::iterator device_track_it;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(device_tracks_lock_);
|
||||
device_track_it = device_tracks_.find(device_id);
|
||||
if (device_track_it == device_tracks_.end()) {
|
||||
/* Create a new perfetto::Track (Sub-Track) */
|
||||
device_track_it =
|
||||
device_tracks_
|
||||
.emplace(device_id, perfetto::ProcessTrack::Global(((device_id + 1) * machine_id_)))
|
||||
.first;
|
||||
auto gpu_desc = device_track_it->second.Serialize();
|
||||
gpu_desc.mutable_process()->set_pid(device_id);
|
||||
std::string gpu_str = rocmtools::string_printf("Node: %s Device:", hostname_);
|
||||
gpu_desc.mutable_process()->set_process_name(gpu_str);
|
||||
perfetto::TrackEvent::SetTrackDescriptor(device_track_it->second, gpu_desc);
|
||||
track_ids_used_.emplace_back(device_id + 1 + machine_id_);
|
||||
}
|
||||
}
|
||||
auto& gpu_track = device_track_it->second;
|
||||
std::pair<int, uint64_t> gpu_queue_id =
|
||||
std::make_pair(device_id, profiler_record.queue_id.handle);
|
||||
auto queue_track_it = queue_tracks_.find(gpu_queue_id.first);
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(queue_tracks_lock_);
|
||||
queue_track_it = queue_tracks_.find(gpu_queue_id.first);
|
||||
if (queue_track_it == queue_tracks_.end()) {
|
||||
/* Create a new perfetto::Track */
|
||||
queue_track_it = queue_tracks_
|
||||
.emplace(gpu_queue_id.first,
|
||||
perfetto::Track((profiler_record.queue_id.handle + 1 +
|
||||
profiler_record.gpu_id.handle) *
|
||||
QUEUE_CONSTANT * machine_id_ * GetPid(),
|
||||
gpu_track))
|
||||
.first;
|
||||
|
||||
auto queue_desc = queue_track_it->second.Serialize();
|
||||
std::string queue_str =
|
||||
rocmtools::string_printf("Process ID: %lu Queue %ld", GetPid(), gpu_queue_id.second);
|
||||
queue_desc.set_name(queue_str);
|
||||
perfetto::TrackEvent::SetTrackDescriptor(queue_track_it->second, queue_desc);
|
||||
}
|
||||
track_ids_used_.emplace_back(profiler_record.queue_id.handle + machine_id_ + 1 +
|
||||
profiler_record.gpu_id.handle);
|
||||
}
|
||||
auto& queue_track = queue_track_it->second;
|
||||
|
||||
// Taken from rocprofiler: The size hasn't changed in recent past
|
||||
static const uint32_t lds_block_size = 128 * 4;
|
||||
|
||||
std::string full_kernel_name = get_kernel_name(profiler_record);
|
||||
// std::string truncated_kernel_name = rocmtools::truncate_name(full_kernel_name);
|
||||
// perfetto::StaticString kernel_name(truncated_kernel_name.c_str());
|
||||
TRACE_EVENT_BEGIN("KERNELS", perfetto::StaticString(full_kernel_name.c_str()), queue_track,
|
||||
profiler_record.timestamps.begin.value, "Full Kernel Name",
|
||||
full_kernel_name.c_str(), "Agent ID", device_id, "Queue ID",
|
||||
profiler_record.queue_id.handle, "GRD",
|
||||
profiler_record.kernel_properties.grid_size, "WGR",
|
||||
profiler_record.kernel_properties.workgroup_size, "LDS",
|
||||
(((profiler_record.kernel_properties.lds_size + (lds_block_size - 1)) &
|
||||
~(lds_block_size - 1))),
|
||||
"SCR", profiler_record.kernel_properties.scratch_size, "Arch. VGPR",
|
||||
profiler_record.kernel_properties.arch_vgpr_count, "Accumilative Vgpr",
|
||||
profiler_record.kernel_properties.accum_vgpr_count, "SGPR",
|
||||
profiler_record.kernel_properties.sgpr_count, "Wave Size",
|
||||
profiler_record.kernel_properties.wave_size, "Signal",
|
||||
profiler_record.kernel_properties.signal_handle);
|
||||
|
||||
TRACE_EVENT_END("KERNELS", queue_track, profiler_record.timestamps.end.value);
|
||||
|
||||
auto get_counter_track_fn = [&](std::string counter_name) {
|
||||
std ::string counter_track_id =
|
||||
std::to_string(machine_id_) + std::to_string(GetPid()) + counter_name;
|
||||
std::pair<int, std::string> gpu_counter_track_id = std::make_pair(device_id, counter_name);
|
||||
std::unordered_map<std::string, perfetto::CounterTrack>::iterator counters_track_it;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(counter_tracks_lock_);
|
||||
counters_track_it = counter_tracks_.find(gpu_counter_track_id.second);
|
||||
if (counters_track_it == counter_tracks_.end()) {
|
||||
/* Create a new perfetto::Track */
|
||||
counters_track_it =
|
||||
counter_tracks_
|
||||
.emplace(gpu_counter_track_id.second,
|
||||
perfetto::CounterTrack(counter_track_id.c_str(), gpu_track))
|
||||
.first;
|
||||
|
||||
auto counter_track_desc = counters_track_it->second.Serialize();
|
||||
std::string counter_track_str = "Process ID " + std::to_string(GetPid()) + " - Counter " +
|
||||
gpu_counter_track_id.second;
|
||||
counter_track_desc.set_name(counter_track_str);
|
||||
perfetto::TrackEvent::SetTrackDescriptor(counters_track_it->second, counter_track_desc);
|
||||
}
|
||||
}
|
||||
return counters_track_it->second;
|
||||
};
|
||||
|
||||
// For Counters
|
||||
if (profiler_record.counters) {
|
||||
for (uint64_t i = 0; i < profiler_record.counters_count.value; i++) {
|
||||
if (profiler_record.counters[i].counter_handler.handle > 0) {
|
||||
size_t name_length = 0;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_counter_info_size(
|
||||
session_id, ROCPROFILER_COUNTER_NAME, profiler_record.counters[i].counter_handler,
|
||||
&name_length));
|
||||
if (name_length > 1) {
|
||||
const char* name_c = static_cast<const char*>(malloc(name_length * sizeof(char)));
|
||||
CHECK_ROCMTOOLS(
|
||||
rocprofiler_query_counter_info(session_id, ROCPROFILER_COUNTER_NAME,
|
||||
profiler_record.counters[i].counter_handler, &name_c));
|
||||
|
||||
perfetto::CounterTrack counters_track = get_counter_track_fn(std::string(name_c));
|
||||
TRACE_COUNTER("COUNTERS", counters_track, profiler_record.timestamps.begin.value,
|
||||
profiler_record.counters[i].value.value);
|
||||
// Added an extra zero event for maintaining start-end of the counter
|
||||
TRACE_COUNTER("COUNTERS", counters_track, profiler_record.timestamps.end.value, 0.001);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int FlushTracerRecord(rocprofiler_record_tracer_t tracer_record,
|
||||
rocprofiler_session_id_t session_id) {
|
||||
std::lock_guard<std::mutex> lock(writing_lock);
|
||||
if (!tracing_session_) rocmtools::warning("Tracing session is deleted!\n");
|
||||
std::string kernel_name;
|
||||
char* function_name;
|
||||
char* activity_name;
|
||||
std::string roctx_message;
|
||||
uint64_t roctx_id = 0;
|
||||
uint64_t thread_id = tracer_record.thread_id.value;
|
||||
std::unordered_map<uint64_t, perfetto::Track>::iterator thread_track_it;
|
||||
std::unordered_map<int, perfetto::Track>::iterator device_track_it;
|
||||
if (tracer_record.domain == ACTIVITY_DOMAIN_HIP_OPS ||
|
||||
tracer_record.domain == ACTIVITY_DOMAIN_HSA_OPS) {
|
||||
int device_id = tracer_record.agent_id.handle;
|
||||
if (tracer_record.domain == ACTIVITY_DOMAIN_HIP_OPS && device_id > 0) device_id--;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(device_tracks_lock_);
|
||||
device_track_it = device_tracks_.find(device_id);
|
||||
if (device_track_it == device_tracks_.end()) {
|
||||
/* Create a new perfetto::Track (Sub-Track) */
|
||||
device_track_it =
|
||||
device_tracks_
|
||||
.emplace(device_id,
|
||||
perfetto::ProcessTrack::Global(((device_id + 1) * machine_id_)))
|
||||
.first;
|
||||
auto gpu_desc = device_track_it->second.Serialize();
|
||||
gpu_desc.mutable_process()->set_pid(device_id);
|
||||
std::string gpu_str = rocmtools::string_printf("Node: %s Device:", hostname_);
|
||||
gpu_desc.mutable_process()->set_process_name(gpu_str);
|
||||
perfetto::TrackEvent::SetTrackDescriptor(device_track_it->second, gpu_desc);
|
||||
track_ids_used_.emplace_back(1 + machine_id_ + device_id);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
std::lock_guard<std::mutex> lock(thread_tracks_lock_);
|
||||
thread_track_it = thread_tracks_.find(thread_id);
|
||||
if (thread_track_it == thread_tracks_.end()) {
|
||||
uint64_t track_id =
|
||||
track_counter_.fetch_add((1 + machine_id_) * GetPid(), std::memory_order_acquire);
|
||||
for (uint64_t tid : track_ids_used_) {
|
||||
while (track_id == tid) {
|
||||
track_id =
|
||||
track_counter_.fetch_add((1 + machine_id_) * GetPid(), std::memory_order_acquire);
|
||||
}
|
||||
}
|
||||
thread_track_it =
|
||||
thread_tracks_.emplace(thread_id, perfetto::ProcessTrack::Global(track_id)).first;
|
||||
auto thread_track_desc = thread_track_it->second.Serialize();
|
||||
std::string thread_track_str =
|
||||
rocmtools::string_printf("Node: %s Process ID: %lu Thread ID:", hostname_, GetPid());
|
||||
thread_track_desc.mutable_process()->set_pid(thread_id);
|
||||
thread_track_desc.mutable_process()->set_process_name(thread_track_str);
|
||||
perfetto::TrackEvent::SetTrackDescriptor(thread_track_it->second, thread_track_desc);
|
||||
}
|
||||
}
|
||||
auto& thread_track = thread_track_it->second;
|
||||
auto& gpu_track = device_track_it->second;
|
||||
switch (tracer_record.domain) {
|
||||
case ACTIVITY_DOMAIN_ROCTX: {
|
||||
std::unordered_map<uint64_t, perfetto::Track>::iterator roctx_track_it;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(roctx_tracks_lock_);
|
||||
roctx_track_it = roctx_tracks_.find(thread_id);
|
||||
if (roctx_track_it == roctx_tracks_.end()) {
|
||||
/* Create a new perfetto::Track */
|
||||
uint64_t track_id =
|
||||
track_counter_.fetch_add((1 + machine_id_) * GetPid(), std::memory_order_acquire);
|
||||
for (uint64_t tid : track_ids_used_) {
|
||||
while (track_id == tid) {
|
||||
track_id = track_counter_.fetch_add((1 + machine_id_) * GetPid(),
|
||||
std::memory_order_acquire);
|
||||
}
|
||||
}
|
||||
roctx_track_it =
|
||||
roctx_tracks_.emplace(thread_id, perfetto::Track(track_id, thread_track)).first;
|
||||
|
||||
auto roctx_track_desc = roctx_track_it->second.Serialize();
|
||||
std::string roctx_track_str = rocmtools::string_printf("ROCTX Markers");
|
||||
roctx_track_desc.set_name(roctx_track_str);
|
||||
perfetto::TrackEvent::SetTrackDescriptor(roctx_track_it->second, roctx_track_desc);
|
||||
}
|
||||
}
|
||||
auto& roctx_track = roctx_track_it->second;
|
||||
|
||||
size_t roctx_message_size = 0;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_roctx_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_ROCTX_MESSAGE, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &roctx_message_size));
|
||||
if (roctx_message_size > 1) {
|
||||
char* roctx_message_str = static_cast<char*>(malloc(roctx_message_size * sizeof(char)));
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_roctx_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_ROCTX_MESSAGE, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &roctx_message_str));
|
||||
if (roctx_message_str)
|
||||
roctx_message = rocmtools::cxx_demangle(std::string(strdup(roctx_message_str)));
|
||||
}
|
||||
size_t roctx_id_size = 0;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_roctx_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_ROCTX_ID, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &roctx_id_size));
|
||||
if (roctx_id_size > 1) {
|
||||
char* roctx_id_str = static_cast<char*>(malloc(roctx_id_size * sizeof(char)));
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_roctx_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_ROCTX_ID, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &roctx_id_str));
|
||||
if (roctx_id_str) {
|
||||
roctx_id = std::stoll(std::string(strdup(roctx_id_str)));
|
||||
free(roctx_id_str);
|
||||
}
|
||||
}
|
||||
|
||||
if (tracer_record.operation_id.id == 1) {
|
||||
perfetto::StaticString roctx_message_pft(
|
||||
(!roctx_message.empty() ? roctx_message.c_str() : ""));
|
||||
TRACE_EVENT_BEGIN("ROCTX_API", roctx_message_pft, roctx_track,
|
||||
tracer_record.timestamps.begin.value, "Timestamp(ns)",
|
||||
tracer_record.timestamps.begin.value, "RocTx ID", roctx_id);
|
||||
roctx_track_entries_++;
|
||||
} else {
|
||||
TRACE_EVENT_END("ROCTX_API", roctx_track, tracer_record.timestamps.begin.value);
|
||||
roctx_track_entries_--;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HSA_API: {
|
||||
std::unordered_map<uint64_t, perfetto::Track>::iterator hsa_track_it;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(hsa_tracks_lock_);
|
||||
hsa_track_it = hsa_tracks_.find(thread_id);
|
||||
if (hsa_track_it == hsa_tracks_.end()) {
|
||||
/* Create a new perfetto::Track */
|
||||
uint64_t track_id =
|
||||
track_counter_.fetch_add((1 + machine_id_) * GetPid(), std::memory_order_acquire);
|
||||
for (uint64_t tid : track_ids_used_) {
|
||||
while (track_id == tid) {
|
||||
track_id = track_counter_.fetch_add((1 + machine_id_) * GetPid(),
|
||||
std::memory_order_acquire);
|
||||
}
|
||||
}
|
||||
hsa_track_it =
|
||||
hsa_tracks_.emplace(thread_id, perfetto::Track(track_id, thread_track)).first;
|
||||
auto hsa_track_desc = hsa_track_it->second.Serialize();
|
||||
std::string hsa_track_str = rocmtools::string_printf("HSA API");
|
||||
hsa_track_desc.set_name(hsa_track_str);
|
||||
perfetto::TrackEvent::SetTrackDescriptor(hsa_track_it->second, hsa_track_desc);
|
||||
}
|
||||
}
|
||||
auto& hsa_track = hsa_track_it->second;
|
||||
size_t function_name_size = 0;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hsa_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_HSA_FUNCTION_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &function_name_size));
|
||||
if (function_name_size > 1) {
|
||||
function_name = static_cast<char*>(malloc(function_name_size * sizeof(char)));
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hsa_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_HSA_FUNCTION_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &function_name));
|
||||
}
|
||||
TRACE_EVENT_BEGIN("HSA_API", perfetto::StaticString(function_name), hsa_track,
|
||||
tracer_record.timestamps.begin.value,
|
||||
perfetto::Flow::ProcessScoped(tracer_record.correlation_id.value));
|
||||
TRACE_EVENT_END("HSA_API", hsa_track, tracer_record.timestamps.end.value);
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HIP_API: {
|
||||
std::unordered_map<uint64_t, perfetto::Track>::iterator hip_track_it;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(hip_tracks_lock_);
|
||||
hip_track_it = hip_tracks_.find(thread_id);
|
||||
if (hip_track_it == hip_tracks_.end()) {
|
||||
/* Create a new perfetto::Track */
|
||||
uint64_t track_id =
|
||||
track_counter_.fetch_add((1 + machine_id_) * GetPid(), std::memory_order_acquire);
|
||||
for (uint64_t tid : track_ids_used_) {
|
||||
while (track_id == tid) {
|
||||
track_id = track_counter_.fetch_add((1 + machine_id_) * GetPid(),
|
||||
std::memory_order_acquire);
|
||||
}
|
||||
}
|
||||
hip_track_it =
|
||||
hip_tracks_.emplace(thread_id, perfetto::Track(track_id, thread_track)).first;
|
||||
|
||||
auto hip_track_desc = hip_track_it->second.Serialize();
|
||||
std::string hip_track_str = rocmtools::string_printf("HIP API");
|
||||
hip_track_desc.set_name(hip_track_str);
|
||||
perfetto::TrackEvent::SetTrackDescriptor(hip_track_it->second, hip_track_desc);
|
||||
}
|
||||
}
|
||||
auto& hip_track = hip_track_it->second;
|
||||
size_t function_name_size = 0;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_HIP_FUNCTION_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &function_name_size));
|
||||
if (function_name_size > 1) {
|
||||
function_name = static_cast<char*>(malloc(function_name_size * sizeof(char)));
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_HIP_FUNCTION_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &function_name));
|
||||
}
|
||||
size_t kernel_name_size = 0;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_HIP_KERNEL_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &kernel_name_size));
|
||||
char* kernel_name_str;
|
||||
if (kernel_name_size > 1) {
|
||||
kernel_name_str = static_cast<char*>(malloc(kernel_name_size * sizeof(char)));
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_HIP_KERNEL_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &kernel_name_str));
|
||||
if (kernel_name_str) {
|
||||
kernel_name = rocmtools::cxx_demangle(std::string(kernel_name_str));
|
||||
free(kernel_name_str);
|
||||
}
|
||||
}
|
||||
if (kernel_name.size() > 0) {
|
||||
TRACE_EVENT_BEGIN("HIP_API", perfetto::StaticString(function_name), hip_track,
|
||||
tracer_record.timestamps.begin.value, "Kernel Name", kernel_name,
|
||||
perfetto::Flow::ProcessScoped(tracer_record.correlation_id.value));
|
||||
} else {
|
||||
TRACE_EVENT_BEGIN("HIP_API", perfetto::StaticString(function_name), hip_track,
|
||||
tracer_record.timestamps.begin.value,
|
||||
perfetto::Flow::ProcessScoped(tracer_record.correlation_id.value));
|
||||
}
|
||||
TRACE_EVENT_END("HIP_API", hip_track, tracer_record.timestamps.end.value);
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_EXT_API: {
|
||||
printf("Warning: External API is not supported!\n");
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HIP_OPS: {
|
||||
uint64_t stream_id = 0;
|
||||
size_t stream_id_str_size = 0;
|
||||
char* stream_id_str;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_HIP_STREAM_ID, rocprofiler_tracer_api_data_handle_t{nullptr, 0},
|
||||
rocprofiler_tracer_operation_id_t{(uint32_t)tracer_record.correlation_id.value},
|
||||
&stream_id_str_size));
|
||||
if (stream_id_str_size > 1) {
|
||||
stream_id_str = static_cast<char*>(malloc(stream_id_str_size * sizeof(char)));
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_HIP_STREAM_ID, rocprofiler_tracer_api_data_handle_t{nullptr, 0},
|
||||
rocprofiler_tracer_operation_id_t{(uint32_t)tracer_record.correlation_id.value},
|
||||
&stream_id_str));
|
||||
if (stream_id_str != nullptr) stream_id = std::stoll(stream_id_str);
|
||||
}
|
||||
std::unordered_map<int, perfetto::Track>::iterator stream_track_it;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(stream_tracks_lock_);
|
||||
stream_track_it = stream_tracks_.find(stream_id);
|
||||
if (stream_track_it == stream_tracks_.end()) {
|
||||
/* Create a new perfetto::Track */
|
||||
uint64_t track_id = ((1 + stream_id + tracer_record.agent_id.handle) * machine_id_ *
|
||||
STREAM_CONSTANT * GetPid());
|
||||
stream_track_it =
|
||||
stream_tracks_.emplace(stream_id, perfetto::Track(track_id, gpu_track)).first;
|
||||
|
||||
auto stream_desc = stream_track_it->second.Serialize();
|
||||
std::string stream_str =
|
||||
rocmtools::string_printf("Process ID: %lu Stream %d", GetPid(), stream_id);
|
||||
stream_desc.set_name(stream_str);
|
||||
perfetto::TrackEvent::SetTrackDescriptor(stream_track_it->second, stream_desc);
|
||||
track_ids_used_.emplace_back(1 + machine_id_ + tracer_record.agent_id.handle);
|
||||
}
|
||||
}
|
||||
auto& stream_track = stream_track_it->second;
|
||||
if (tracer_record.api_data_handle.handle && tracer_record.api_data_handle.size > 1) {
|
||||
kernel_name = rocmtools::cxx_demangle(
|
||||
strdup(reinterpret_cast<const char*>(tracer_record.api_data_handle.handle)));
|
||||
TRACE_EVENT_BEGIN(
|
||||
"HIP_OPS",
|
||||
perfetto::StaticString(strdup(rocmtools::truncate_name(kernel_name).c_str())),
|
||||
stream_track, tracer_record.timestamps.begin.value, "Agent ID",
|
||||
tracer_record.agent_id.handle, "Process ID", GetPid(), "Kernel Name", kernel_name,
|
||||
perfetto::Flow::ProcessScoped(tracer_record.correlation_id.value));
|
||||
} else {
|
||||
size_t activity_name_size = 0;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_HIP_ACTIVITY_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &activity_name_size));
|
||||
if (activity_name_size > 1) {
|
||||
activity_name = static_cast<char*>(malloc(activity_name_size * sizeof(char)));
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_HIP_ACTIVITY_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &activity_name));
|
||||
} else {
|
||||
activity_name = const_cast<char*>(std::string("N/A").c_str());
|
||||
}
|
||||
TRACE_EVENT_BEGIN("HIP_OPS", perfetto::StaticString(activity_name), stream_track,
|
||||
tracer_record.timestamps.begin.value, "Agent ID",
|
||||
tracer_record.agent_id.handle, "Process ID", GetPid(),
|
||||
perfetto::Flow::ProcessScoped(tracer_record.correlation_id.value));
|
||||
}
|
||||
TRACE_EVENT_END("HIP_OPS", stream_track, tracer_record.timestamps.end.value);
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HSA_OPS: {
|
||||
std::pair<int, uint64_t> gpu_queue_id =
|
||||
std::make_pair(tracer_record.agent_id.handle, tracer_record.queue_id.handle);
|
||||
std::unordered_map<int, perfetto::Track>::iterator queue_track_it;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(queue_tracks_lock_);
|
||||
queue_track_it = queue_tracks_.find(gpu_queue_id.first);
|
||||
if (queue_track_it == queue_tracks_.end()) {
|
||||
/* Create a new perfetto::Track */
|
||||
uint64_t track_id =
|
||||
((1 + tracer_record.queue_id.handle + tracer_record.agent_id.handle) * machine_id_ *
|
||||
QUEUE_CONSTANT * GetPid());
|
||||
queue_track_it =
|
||||
queue_tracks_.emplace(gpu_queue_id.first, perfetto::Track(track_id, gpu_track))
|
||||
.first;
|
||||
|
||||
auto queue_desc = queue_track_it->second.Serialize();
|
||||
std::string queue_str = rocmtools::string_printf("Process ID: %lu Queue %ld", GetPid(),
|
||||
gpu_queue_id.second);
|
||||
queue_desc.set_name(queue_str);
|
||||
perfetto::TrackEvent::SetTrackDescriptor(queue_track_it->second, queue_desc);
|
||||
}
|
||||
track_ids_used_.emplace_back(tracer_record.queue_id.handle + machine_id_ + 1 +
|
||||
tracer_record.agent_id.handle);
|
||||
}
|
||||
auto& queue_track = queue_track_it->second;
|
||||
size_t activity_name_size = 0;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hsa_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_HSA_ACTIVITY_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &activity_name_size));
|
||||
if (activity_name_size > 1) {
|
||||
activity_name = static_cast<char*>(malloc(activity_name_size * sizeof(char)));
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hsa_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_HSA_ACTIVITY_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &activity_name));
|
||||
}
|
||||
TRACE_EVENT_BEGIN("HSA_OPS", perfetto::StaticString(activity_name), queue_track,
|
||||
tracer_record.timestamps.begin.value, "Agent ID",
|
||||
tracer_record.agent_id.handle, "Queue ID", tracer_record.queue_id.handle,
|
||||
"Process ID", GetPid(),
|
||||
perfetto::Flow::ProcessScoped(tracer_record.correlation_id.value));
|
||||
TRACE_EVENT_END("HSA_OPS", queue_track, tracer_record.timestamps.end.value);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
rocmtools::warning("ignored record for domain %d", tracer_record.domain);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int WriteBufferRecords(const rocprofiler_record_header_t* begin,
|
||||
const rocprofiler_record_header_t* end, rocprofiler_session_id_t session_id,
|
||||
rocprofiler_buffer_id_t buffer_id) {
|
||||
if (!tracing_session_) rocmtools::warning("Tracing session is deleted!\n");
|
||||
while (begin < end) {
|
||||
if (!begin) return 0;
|
||||
switch (begin->kind) {
|
||||
case ROCPROFILER_PROFILER_RECORD: {
|
||||
rocprofiler_record_profiler_t* profiler_record = const_cast<rocprofiler_record_profiler_t*>(
|
||||
reinterpret_cast<const rocprofiler_record_profiler_t*>(begin));
|
||||
FlushProfilerRecord(*profiler_record, session_id);
|
||||
break;
|
||||
}
|
||||
case ROCPROFILER_TRACER_RECORD: {
|
||||
rocprofiler_record_tracer_t* tracer_record = const_cast<rocprofiler_record_tracer_t*>(
|
||||
reinterpret_cast<const rocprofiler_record_tracer_t*>(begin));
|
||||
FlushTracerRecord(*tracer_record, session_id);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
rocprofiler_next_record(begin, &begin, session_id, buffer_id);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool IsValid() const { return is_valid_; }
|
||||
|
||||
private:
|
||||
fs::path output_prefix_;
|
||||
std::unique_ptr<perfetto::TracingSession> tracing_session_;
|
||||
int file_descriptor_;
|
||||
bool is_valid_{false};
|
||||
size_t roctx_track_entries_{0};
|
||||
|
||||
// Correlate stream id(s) with correlation id(s) to identify the stream id of every HIP activity
|
||||
std::unordered_map<uint64_t, uint64_t> stream_ids_;
|
||||
|
||||
// Callback Tracks
|
||||
std::unordered_map<uint64_t, perfetto::Track> thread_tracks_;
|
||||
std::unordered_map<uint64_t, perfetto::Track> roctx_tracks_, hsa_tracks_, hip_tracks_,
|
||||
hip_ext_tracks_;
|
||||
|
||||
// Activity Tracks
|
||||
std::unordered_map<int, perfetto::Track> device_tracks_;
|
||||
std::unordered_map<int, perfetto::Track> queue_tracks_, stream_tracks_;
|
||||
|
||||
std::unordered_map<std::string, perfetto::CounterTrack> counter_tracks_;
|
||||
|
||||
std::atomic<uint64_t> track_counter_{GetPid()};
|
||||
std::vector<uint64_t> track_ids_used_;
|
||||
|
||||
std::mutex stream_ids_lock_, thread_tracks_lock_, roctx_tracks_lock_, hsa_tracks_lock_,
|
||||
hip_tracks_lock_, hip_ext_tracks_lock_, device_tracks_lock_, queue_tracks_lock_,
|
||||
stream_tracks_lock_, counter_tracks_lock_;
|
||||
|
||||
char hostname_[1024];
|
||||
uint64_t machine_id_;
|
||||
|
||||
std::ofstream stream_;
|
||||
};
|
||||
|
||||
perfetto_plugin_t* perfetto_plugin = nullptr;
|
||||
|
||||
} // namespace
|
||||
|
||||
int rocprofiler_plugin_initialize(uint32_t rocprofiler_major_version,
|
||||
uint32_t rocprofiler_minor_version) {
|
||||
if (rocprofiler_major_version != ROCPROFILER_VERSION_MAJOR ||
|
||||
rocprofiler_minor_version > ROCPROFILER_VERSION_MINOR)
|
||||
return -1;
|
||||
|
||||
if (perfetto_plugin != nullptr) return -1;
|
||||
|
||||
perfetto_plugin = new perfetto_plugin_t();
|
||||
if (perfetto_plugin->IsValid()) return 0;
|
||||
|
||||
delete perfetto_plugin;
|
||||
perfetto_plugin = nullptr;
|
||||
return -1;
|
||||
}
|
||||
|
||||
void rocprofiler_plugin_finalize() {
|
||||
if (!perfetto_plugin) return;
|
||||
delete perfetto_plugin;
|
||||
perfetto_plugin = nullptr;
|
||||
}
|
||||
|
||||
ROCPROFILER_EXPORT int rocprofiler_plugin_write_buffer_records(const rocprofiler_record_header_t* begin,
|
||||
const rocprofiler_record_header_t* end,
|
||||
rocprofiler_session_id_t session_id,
|
||||
rocprofiler_buffer_id_t buffer_id) {
|
||||
if (!perfetto_plugin || !perfetto_plugin->IsValid()) return -1;
|
||||
return perfetto_plugin->WriteBufferRecords(begin, end, session_id, buffer_id);
|
||||
}
|
||||
|
||||
ROCPROFILER_EXPORT int rocprofiler_plugin_write_record(rocprofiler_record_tracer_t record,
|
||||
rocprofiler_session_id_t session_id) {
|
||||
if (!perfetto_plugin || !perfetto_plugin->IsValid()) return -1;
|
||||
if (record.header.id.handle == 0) return 0;
|
||||
perfetto_plugin->FlushTracerRecord(record, session_id);
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,189 @@
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
Copyright (c) 2017, The Android Open Source Project
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
|
||||
@@ -0,0 +1,35 @@
|
||||
# Global OWNERS that can approve Perfetto changes.
|
||||
# Please look first at OWNERS in the various subdirectories before falling back
|
||||
# on this, as the former tend to be more brain-cache-hot.
|
||||
|
||||
# Perfetto tracing internals and API/ABI boundaries.
|
||||
primiano@google.com
|
||||
skyostil@google.com
|
||||
|
||||
# UI, Ftrace interop, traced_probes, protozero, Android internals.
|
||||
hjd@google.com
|
||||
|
||||
# Trace Processor, metrics, infra.
|
||||
lalitm@google.com
|
||||
|
||||
# Callstack / memory profilers, traced_probes & Linux internals.
|
||||
ddiproietto@google.com
|
||||
rsavitski@google.com
|
||||
|
||||
# Chromium-related things and tracing SDK.
|
||||
eseckler@google.com
|
||||
nuskos@google.com
|
||||
oysteine@google.com
|
||||
|
||||
# Most Android-related metrics.
|
||||
ilkos@google.com
|
||||
|
||||
# fmayer@ left the team. Please try first rsavitski@, ddiproietto@ or primiano@
|
||||
# and leave fmayer@ as an emergency-only escalation on profilers.
|
||||
fmayer@google.com
|
||||
|
||||
# chromium.org aliases for adding DEPS entries from chromium subprojects to
|
||||
# third_party/perfetto.
|
||||
eseckler@chromium.org
|
||||
nuskos@chromium.org
|
||||
skyostil@chromium.org
|
||||
@@ -0,0 +1,394 @@
|
||||
# Tracing SDK
|
||||
|
||||
The Perfetto Tracing SDK is a C++11 library that allows userspace applications
|
||||
to emit trace events and add more app-specific context to a Perfetto trace.
|
||||
|
||||
When using the Tracing SDK there are two main aspects to consider:
|
||||
|
||||
1. Whether you are interested only in tracing events coming from your own app
|
||||
or want to collect full-stack traces that overlay app trace events with
|
||||
system trace events like scheduler traces, syscalls or any other Perfetto
|
||||
data source.
|
||||
|
||||
2. For app-specific tracing, whether you need to trace simple types of timeline
|
||||
events (e.g., slices, counters) or need to define complex data sources with a
|
||||
custom strongly-typed schema (e.g., for dumping the state of a subsystem of
|
||||
your app into the trace).
|
||||
|
||||
For Android-only instrumentation, the advice is to keep using the existing
|
||||
[android.os.Trace (SDK)][atrace-sdk] / [ATrace_* (NDK)][atrace-ndk] if they
|
||||
are sufficient for your use cases. Atrace-based instrumentation is fully
|
||||
supported in Perfetto.
|
||||
See the [Data Sources -> Android System -> Atrace Instrumentation][atrace-ds]
|
||||
for details.
|
||||
|
||||
## Getting started
|
||||
|
||||
TIP: The code from these examples is also available [in the
|
||||
repository](/examples/sdk/README.md).
|
||||
|
||||
To start using the Client API, first check out the latest SDK release:
|
||||
|
||||
```bash
|
||||
git clone https://android.googlesource.com/platform/external/perfetto -b v23.0
|
||||
```
|
||||
|
||||
The SDK consists of two files, `sdk/perfetto.h` and `sdk/perfetto.cc`. These are
|
||||
an amalgamation of the Client API designed to easy to integrate to existing
|
||||
build systems. The sources are self-contained and require only a C++11 compliant
|
||||
standard library.
|
||||
|
||||
For example, to add the SDK to a CMake project, edit your CMakeLists.txt:
|
||||
|
||||
```cmake
|
||||
cmake_minimum_required(VERSION 3.13)
|
||||
project(PerfettoExample)
|
||||
find_package(Threads)
|
||||
|
||||
# Define a static library for Perfetto.
|
||||
include_directories(perfetto/sdk)
|
||||
add_library(perfetto STATIC perfetto/sdk/perfetto.cc)
|
||||
|
||||
# Link the library to your main executable.
|
||||
add_executable(example example.cc)
|
||||
target_link_libraries(example perfetto ${CMAKE_THREAD_LIBS_INIT})
|
||||
```
|
||||
|
||||
Next, initialize Perfetto in your program:
|
||||
|
||||
```C++
|
||||
#include <perfetto.h>
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
perfetto::TracingInitArgs args;
|
||||
|
||||
// The backends determine where trace events are recorded. You may select one
|
||||
// or more of:
|
||||
|
||||
// 1) The in-process backend only records within the app itself.
|
||||
args.backends |= perfetto::kInProcessBackend;
|
||||
|
||||
// 2) The system backend writes events into a system Perfetto daemon,
|
||||
// allowing merging app and system events (e.g., ftrace) on the same
|
||||
// timeline. Requires the Perfetto `traced` daemon to be running (e.g.,
|
||||
// on Android Pie and newer).
|
||||
args.backends |= perfetto::kSystemBackend;
|
||||
|
||||
perfetto::Tracing::Initialize(args);
|
||||
}
|
||||
```
|
||||
|
||||
You are now ready to instrument your app with trace events.
|
||||
|
||||
## Custom data sources vs Track events
|
||||
|
||||
The SDK offers two abstraction layers to inject tracing data, built on top of
|
||||
each other, which trade off code complexity vs expressive power:
|
||||
[track events](#track-events) and [custom data sources](#custom-data-sources).
|
||||
|
||||
### Track events
|
||||
|
||||
Track events are the suggested option when dealing with app-specific tracing as
|
||||
they take care of a number of subtleties (e.g., thread safety, flushing, string
|
||||
interning).
|
||||
Track events are time bounded events (e.g., slices, counter) based on simple
|
||||
`TRACE_EVENT` annotation tags in the codebase, like this:
|
||||
|
||||
```c++
|
||||
#include <perfetto.h>
|
||||
|
||||
PERFETTO_DEFINE_CATEGORIES(
|
||||
perfetto::Category("rendering")
|
||||
.SetDescription("Events from the graphics subsystem"),
|
||||
perfetto::Category("network")
|
||||
.SetDescription("Network upload and download statistics"));
|
||||
|
||||
...
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
...
|
||||
perfetto::Tracing::Initialize(args);
|
||||
perfetto::TrackEvent::Register();
|
||||
}
|
||||
|
||||
...
|
||||
|
||||
void LayerTreeHost::DoUpdateLayers() {
|
||||
TRACE_EVENT("rendering", "LayerTreeHost::DoUpdateLayers");
|
||||
...
|
||||
for (PictureLayer& pl : layers) {
|
||||
TRACE_EVENT("rendering", "PictureLayer::Update");
|
||||
pl.Update();
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Which are rendered in the UI as follows:
|
||||
|
||||

|
||||
|
||||
Track events are the best default option and serve most tracing use cases with
|
||||
very little complexity.
|
||||
|
||||
To include your new track events in the trace, ensure that the `track_event`
|
||||
data source is included in the trace config. If you do not specify any
|
||||
categories then all non-debug categories will be included by default. However,
|
||||
you can also add just the categories you are interested in like so:
|
||||
|
||||
```protobuf
|
||||
data_sources {
|
||||
config {
|
||||
name: "track_event"
|
||||
track_event_config {
|
||||
enabled_categories: "rendering"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
See the [Track events page](track-events.md) for full instructions.
|
||||
|
||||
### Custom data sources
|
||||
|
||||
For most uses, track events are the most straightforward way of instrumenting
|
||||
apps for tracing. However, in some rare circumstances they are not
|
||||
flexible enough, e.g., when the data doesn't fit the notion of a track or is
|
||||
high volume enough that it needs a strongly typed schema to minimize the size of
|
||||
each event. In this case, you can implement a *custom data source* for
|
||||
Perfetto.
|
||||
|
||||
Unlike track events, when working with custom data sources, you will also need
|
||||
corresponding changes in [trace processor](/docs/analysis/trace-processor.md)
|
||||
to enable importing your data format.
|
||||
|
||||
A custom data source is a subclass of `perfetto::DataSource`. Perfetto will
|
||||
automatically create one instance of the class for each tracing session it is
|
||||
active in (usually just one).
|
||||
|
||||
```C++
|
||||
class CustomDataSource : public perfetto::DataSource<CustomDataSource> {
|
||||
public:
|
||||
void OnSetup(const SetupArgs&) override {
|
||||
// Use this callback to apply any custom configuration to your data source
|
||||
// based on the TraceConfig in SetupArgs.
|
||||
}
|
||||
|
||||
void OnStart(const StartArgs&) override {
|
||||
// This notification can be used to initialize the GPU driver, enable
|
||||
// counters, etc. StartArgs will contains the DataSourceDescriptor,
|
||||
// which can be extended.
|
||||
}
|
||||
|
||||
void OnStop(const StopArgs&) override {
|
||||
// Undo any initialization done in OnStart.
|
||||
}
|
||||
|
||||
// Data sources can also have per-instance state.
|
||||
int my_custom_state = 0;
|
||||
};
|
||||
|
||||
PERFETTO_DECLARE_DATA_SOURCE_STATIC_MEMBERS(CustomDataSource);
|
||||
```
|
||||
|
||||
The data source's static data should be defined in one source file like this:
|
||||
|
||||
```C++
|
||||
PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(CustomDataSource);
|
||||
```
|
||||
|
||||
Custom data sources need to be registered with Perfetto:
|
||||
|
||||
```C++
|
||||
int main(int argc, char** argv) {
|
||||
...
|
||||
perfetto::Tracing::Initialize(args);
|
||||
// Add the following:
|
||||
perfetto::DataSourceDescriptor dsd;
|
||||
dsd.set_name("com.example.custom_data_source");
|
||||
CustomDataSource::Register(dsd);
|
||||
}
|
||||
```
|
||||
|
||||
As with all data sources, the custom data source needs to be specified in the
|
||||
trace config to enable tracing:
|
||||
|
||||
```C++
|
||||
perfetto::TraceConfig cfg;
|
||||
auto* ds_cfg = cfg.add_data_sources()->mutable_config();
|
||||
ds_cfg->set_name("com.example.custom_data_source");
|
||||
```
|
||||
|
||||
Finally, call the `Trace()` method to record an event with your custom data
|
||||
source. The lambda function passed to that method will only be called if tracing
|
||||
is enabled. It is always called synchronously and possibly multiple times if
|
||||
multiple concurrent tracing sessions are active.
|
||||
|
||||
```C++
|
||||
CustomDataSource::Trace([](CustomDataSource::TraceContext ctx) {
|
||||
auto packet = ctx.NewTracePacket();
|
||||
packet->set_timestamp(perfetto::TrackEvent::GetTraceTimeNs());
|
||||
packet->set_for_testing()->set_str("Hello world!");
|
||||
});
|
||||
```
|
||||
|
||||
If necessary the `Trace()` method can access the custom data source state
|
||||
(`my_custom_state` in the example above). Doing so, will take a mutex to
|
||||
ensure data source isn't destroyed (e.g., because of stopping tracing) while
|
||||
the `Trace()` method is called on another thread. For example:
|
||||
|
||||
```C++
|
||||
CustomDataSource::Trace([](CustomDataSource::TraceContext ctx) {
|
||||
auto safe_handle = trace_args.GetDataSourceLocked(); // Holds a RAII lock.
|
||||
DoSomethingWith(safe_handle->my_custom_state);
|
||||
});
|
||||
```
|
||||
|
||||
## In-process vs System mode
|
||||
|
||||
The two modes are not mutually exclusive. An app can be configured to work
|
||||
in both modes and respond both to in-process tracing requests and system
|
||||
tracing requests. Both modes generate the same trace file format.
|
||||
|
||||
### In-process mode
|
||||
|
||||
In this mode both the perfetto service and the app-defined data sources are
|
||||
hosted fully in-process, in the same process of the profiled app. No connection
|
||||
to the system `traced` daemon will be attempted.
|
||||
|
||||
In-process mode can be enabled by setting
|
||||
`TracingInitArgs.backends = perfetto::kInProcessBackend` when initializing the
|
||||
SDK, see examples below.
|
||||
|
||||
This mode is used to generate traces that contain only events emitted by
|
||||
the app, but not other types of events (e.g. scheduler traces).
|
||||
|
||||
The main advantage is that by running fully in-process, it doesn't require any
|
||||
special OS privileges and the profiled process can control the lifecycle of
|
||||
tracing sessions.
|
||||
|
||||
This mode is supported on Android, Linux, MacOS and Windows.
|
||||
|
||||
### System mode
|
||||
|
||||
In this mode the app-defined data sources will connect to the external `traced`
|
||||
service using the [IPC over UNIX socket][ipc].
|
||||
|
||||
System mode can be enabled by setting
|
||||
`TracingInitArgs.backends = perfetto::kSystemBackend` when initializing the SDK,
|
||||
see examples below.
|
||||
|
||||
The main advantage of this mode is that it is possible to create fused traces where
|
||||
app events are overlaid on the same timeline of OS events. This enables
|
||||
full-stack performance investigations, looking all the way through syscalls and
|
||||
kernel scheduling events.
|
||||
|
||||
The main limitation of this mode is that it requires the external `traced` daemon
|
||||
to be up and running and reachable through the UNIX socket connection.
|
||||
|
||||
This is suggested for local debugging or lab testing scenarios where the user
|
||||
(or the test harness) can control the OS deployment (e.g., sideload binaries on
|
||||
Android).
|
||||
|
||||
When using system mode, the tracing session must be controlled from the outside,
|
||||
using the `perfetto` command-line client
|
||||
(See [reference](/docs/reference/perfetto-cli)). This is because when collecting
|
||||
system traces, tracing data producers are not allowed to read back the trace
|
||||
data as it might disclose information about other processes and allow
|
||||
side-channel attacks.
|
||||
|
||||
* On Android 9 (Pie) and beyond, traced is shipped as part of the platform.
|
||||
* On older versions of Android, traced can be built from sources using the
|
||||
the [standalone NDK-based workflow](/docs/contributing/build-instructions.md)
|
||||
and sideloaded via adb shell.
|
||||
* On Linux and MacOS `traced` must be built and run separately. See the
|
||||
[Linux quickstart](/docs/quickstart/linux-tracing.md) for instructions.
|
||||
|
||||
_System mode is not yet supported on Windows, due to the lack of an IPC
|
||||
implementation_.
|
||||
|
||||
## {#recording} Recording traces through the API
|
||||
|
||||
_Tracing through the API is currently only supported with the in-process mode.
|
||||
When using system mode, use the `perfetto` cmdline client (see quickstart
|
||||
guides)._
|
||||
|
||||
First initialize a [TraceConfig](/docs/reference/trace-config-proto.autogen)
|
||||
message which specifies what type of data to record.
|
||||
|
||||
If your app includes [track events](track-events.md) (i.e, `TRACE_EVENT`), you
|
||||
typically want to choose the categories which are enabled for tracing.
|
||||
|
||||
By default, all non-debug categories are enabled, but you can enable a specific
|
||||
one like this:
|
||||
|
||||
```C++
|
||||
perfetto::protos::gen::TrackEventConfig track_event_cfg;
|
||||
track_event_cfg.add_disabled_categories("*");
|
||||
track_event_cfg.add_enabled_categories("rendering");
|
||||
```
|
||||
|
||||
Next, build the main trace config together with the track event part:
|
||||
|
||||
```C++
|
||||
perfetto::TraceConfig cfg;
|
||||
cfg.add_buffers()->set_size_kb(1024); // Record up to 1 MiB.
|
||||
auto* ds_cfg = cfg.add_data_sources()->mutable_config();
|
||||
ds_cfg->set_name("track_event");
|
||||
ds_cfg->set_track_event_config_raw(track_event_cfg.SerializeAsString());
|
||||
```
|
||||
|
||||
If your app includes a custom data source, you can also enable it here:
|
||||
|
||||
```C++
|
||||
ds_cfg = cfg.add_data_sources()->mutable_config();
|
||||
ds_cfg->set_name("my_data_source");
|
||||
```
|
||||
|
||||
After building the trace config, you can begin tracing:
|
||||
|
||||
```C++
|
||||
std::unique_ptr<perfetto::TracingSession> tracing_session(
|
||||
perfetto::Tracing::NewTrace());
|
||||
tracing_session->Setup(cfg);
|
||||
tracing_session->StartBlocking();
|
||||
```
|
||||
|
||||
TIP: API methods with `Blocking` in their name will suspend the calling thread
|
||||
until the respective operation is complete. There are also asynchronous
|
||||
variants that don't have this limitation.
|
||||
|
||||
Now that tracing is active, instruct your app to perform the operation you
|
||||
want to record. After that, stop tracing and collect the
|
||||
protobuf-formatted trace data:
|
||||
|
||||
```C++
|
||||
tracing_session->StopBlocking();
|
||||
std::vector<char> trace_data(tracing_session->ReadTraceBlocking());
|
||||
|
||||
// Write the trace into a file.
|
||||
std::ofstream output;
|
||||
output.open("example.perfetto-trace", std::ios::out | std::ios::binary);
|
||||
output.write(&trace_data[0], trace_data.size());
|
||||
output.close();
|
||||
```
|
||||
|
||||
To save memory with longer traces, you can also tell Perfetto to write
|
||||
directly into a file by passing a file descriptor into Setup(), remembering
|
||||
to close the file after tracing is done:
|
||||
|
||||
```C++
|
||||
int fd = open("example.perfetto-trace", O_RDWR | O_CREAT | O_TRUNC, 0600);
|
||||
tracing_session->Setup(cfg, fd);
|
||||
tracing_session->StartBlocking();
|
||||
// ...
|
||||
tracing_session->StopBlocking();
|
||||
close(fd);
|
||||
```
|
||||
|
||||
The resulting trace file can be directly opened in the [Perfetto
|
||||
UI](https://ui.perfetto.dev) or the [Trace Processor](/docs/analysis/trace-processor.md).
|
||||
|
||||
[ipc]: /docs/design-docs/api-and-abi.md#socket-protocol
|
||||
[atrace-ds]: /docs/data-sources/atrace.md
|
||||
[atrace-ndk]: https://developer.android.com/ndk/reference/group/tracing
|
||||
[atrace-sdk]: https://developer.android.com/reference/android/os/Trace
|
||||
Filskillnaden har hållits tillbaka eftersom den är för stor
Load Diff
Filskillnaden har hållits tillbaka eftersom den är för stor
Load Diff
@@ -0,0 +1,63 @@
|
||||
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cxxabi.h>
|
||||
#include <fcntl.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <sys/types.h>
|
||||
#include <systemd/sd-id128.h>
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <string>
|
||||
|
||||
#include "src/utils/helper.h"
|
||||
|
||||
// Macro to check ROCMTools calls status
|
||||
#define CHECK_ROCMTOOLS(call) \
|
||||
do { \
|
||||
if ((call) != ROCPROFILER_STATUS_SUCCESS) rocmtools::fatal("Error: ROCMTools API Call Error!"); \
|
||||
} while (false)
|
||||
|
||||
namespace {
|
||||
|
||||
[[maybe_unused]] uint32_t GetPid() {
|
||||
static uint32_t pid = syscall(__NR_getpid);
|
||||
return pid;
|
||||
}
|
||||
|
||||
[[maybe_unused]] uint64_t GetMachineID() {
|
||||
char hostname[1023] = "\0";
|
||||
gethostname(hostname, 1023);
|
||||
sd_id128_t ret;
|
||||
char machine_id[SD_ID128_STRING_MAX];
|
||||
[[maybe_unused]] int status = sd_id128_get_machine(&ret);
|
||||
assert(status == 0 && "Error: Couldn't get machine id!");
|
||||
if (sd_id128_to_string(ret, machine_id)) return std::hash<std::string>{}(machine_id);
|
||||
return std::rand();
|
||||
}
|
||||
|
||||
} // namespace
|
||||
Executable
+247
@@ -0,0 +1,247 @@
|
||||
#!/bin/bash
|
||||
|
||||
ROCPROFV2_DIR=$(dirname -- $(realpath ${BASH_SOURCE[0]}));
|
||||
ROCM_DIR=$(dirname -- ${ROCPROFV2_DIR})
|
||||
RUN_FROM_BUILD=0
|
||||
if [[ $ROCPROFV2_DIR == *"/build"* ]]; then
|
||||
RUN_FROM_BUILD=1
|
||||
elif [[ $ROCPROFV2_DIR == *"/rocprofiler"* ]]; then
|
||||
RUN_FROM_BUILD=1
|
||||
ROCM_DIR=$ROCPROFV2_DIR
|
||||
fi
|
||||
|
||||
usage() {
|
||||
echo -e "ROCProfiler Run Script Usage:"
|
||||
echo -e "-h | --help For showing this message"
|
||||
echo -e "--list-counters For showing all available counters for the current GPUs"
|
||||
if [ $RUN_FROM_BUILD == 1 ]; then
|
||||
echo -e "-b | --build For compiling"
|
||||
echo -e "-cb | --clean-build For full clean build"
|
||||
echo -e "-t | --test For Running the tests"
|
||||
echo -e "-ct | --clean-build-test For Running the tests after a clean build"
|
||||
echo -e "-mt | --mem-test For Running the Memory Leak tests. This run requires building using -acb | --asan-clean-build option"
|
||||
echo -e "-acb | --asan-clean-build For compiling with ASAN library attached"
|
||||
echo -e "--install For installing rocprofiler without clean build in the default installation folder (review build.sh to know more about the default paths)"
|
||||
echo -e "--clean-install For installing rocprofiler with new clean build in the default installation folder (review build.sh to know more about the default paths)"
|
||||
fi
|
||||
echo -e "--hip-api For Collecting HIP API Traces"
|
||||
echo -e "--hip-activity For Collecting HSA API Activities Traces"
|
||||
echo -e "--hsa-api For Collecting HIP API Traces"
|
||||
echo -e "--hsa-activity For Collecting HSA API Activities Traces"
|
||||
echo -e "--roctx-trace For Collecting ROCTx Traces"
|
||||
echo -e "--kernel-trace For Collecting Kernel dispatch Traces"
|
||||
echo -e "--sys-trace For Collecting HIP and HSA APIs and their Activities Traces along ROCTX and Kernel Dispatch traces"
|
||||
echo -e "--plugin PLUGIN_NAME For enabling a plugin (file/perfetto)"
|
||||
echo -e "-i | --input For adding counters file path (every line in the text file represents a counter)"
|
||||
echo -e "-o | --output-file For the output file name"
|
||||
echo -e "-d | --output-directory For adding output path where the output files will be saved"
|
||||
echo -e "-fi | --flush-interval For adding a flush interval in milliseconds, every \"flush interval\" the buffers will be flushed"
|
||||
exit 1
|
||||
}
|
||||
|
||||
if [ -z "$1" ] ; then
|
||||
usage
|
||||
exit 1
|
||||
fi
|
||||
|
||||
while [ 1 ] ; do
|
||||
if [[ "$1" = "-h" || "$1" = "--help" ]] ; then
|
||||
usage
|
||||
exit 1
|
||||
elif [[ "$1" = "-b" || "$1" = "--build" ]] ; then
|
||||
if [ $RUN_FROM_BUILD == 1 ]; then
|
||||
TO_CLEAN=no ./build.sh
|
||||
exit 1
|
||||
fi
|
||||
elif [[ "$1" = "-acb" || "$1" = "--asan-clean-build" ]] ; then
|
||||
if [ $RUN_FROM_BUILD == 1 ]; then
|
||||
ASAN=yes TO_CLEAN=yes ./build.sh
|
||||
exit 1
|
||||
fi
|
||||
elif [[ "$1" = "-cb" || "$1" = "--clean-build" ]] ; then
|
||||
if [ $RUN_FROM_BUILD == 1 ]; then
|
||||
TO_CLEAN=yes ./build.sh
|
||||
exit 1
|
||||
fi
|
||||
elif [[ "$1" = "-t" || "$1" = "--test" ]] ; then
|
||||
if [ $RUN_FROM_BUILD == 1 ]; then
|
||||
export ROCPROFILER_METRICS_PATH=$ROCM_DIR/build/counters/derived_counters.xml
|
||||
TO_CLEAN=no $ROCM_DIR/build.sh
|
||||
pushd build
|
||||
./run_tests.sh
|
||||
exit 1
|
||||
fi
|
||||
elif [[ "$1" = "-mt" || "$1" = "--mem-test" ]] ; then
|
||||
if [ $RUN_FROM_BUILD == 1 ]; then
|
||||
ASAN=yes TO_CLEAN=yes ./build.sh
|
||||
./tests/memorytests/run_asan_tests.sh $ROCM_DIR/build/tests/featuretests/profiler/gtests/apps/hip_vectoradd $ROCM_DIR/build/memleaks.log
|
||||
exit 1
|
||||
fi
|
||||
elif [[ "$1" = "-ct" || "$1" = "--clean-build-test" ]] ; then
|
||||
if [ $RUN_FROM_BUILD == 1 ]; then
|
||||
TO_CLEAN=yes $ROCM_DIR/build.sh
|
||||
pushd build
|
||||
./run_tests.sh
|
||||
exit 1
|
||||
fi
|
||||
elif [[ "$1" = "--install" ]] ; then
|
||||
if [ $RUN_FROM_BUILD == 1 ]; then
|
||||
TO_CLEAN=no $ROCM_DIR/build.sh
|
||||
pushd build
|
||||
make install
|
||||
exit 1
|
||||
fi
|
||||
elif [[ "$1" = "--clean-install" ]] ; then
|
||||
if [ $RUN_FROM_BUILD == 1 ]; then
|
||||
TO_CLEAN=yes $ROCM_DIR/build.sh
|
||||
pushd build
|
||||
make install
|
||||
exit 1
|
||||
fi
|
||||
elif [[ "$1" = "--list-counters" ]] ; then
|
||||
if [ $RUN_FROM_BUILD == 1 ]; then
|
||||
export ROCPROFILER_METRICS_PATH=$ROCM_DIR/build/counters/derived_counters.xml
|
||||
eval $ROCM_DIR/build/src/tools/ctrl
|
||||
else
|
||||
export ROCPROFILER_METRICS_PATH=$ROCPROFV2_DIR/../libexec/rocprofiler/counters/derived_counters.xml
|
||||
export LD_LIBRARY_PATH=$ROCPROFV2_DIR/../lib:$LD_LIBRARY_PATH
|
||||
export LD_PRELOAD=$ROCPROFV2_DIR/../lib/librocprofiler_tool.so
|
||||
eval $ROCPROFV2_DIR/../libexec/rocprofiler/ctrl
|
||||
fi
|
||||
exit 1
|
||||
elif [[ "$1" = "-i" || "$1" = "--input" ]] ; then
|
||||
if [ $2 ] && [ -n $2 ] && [ -r $2 ] ; then
|
||||
if [ $RUN_FROM_BUILD == 1 ]; then
|
||||
export ROCPROFILER_METRICS_PATH=$ROCM_DIR/build/counters/derived_counters.xml
|
||||
else
|
||||
export ROCPROFILER_METRICS_PATH=$ROCPROFV2_DIR/../libexec/rocprofiler/counters/derived_counters.xml
|
||||
fi
|
||||
export COUNTERS_PATH=$2
|
||||
else
|
||||
echo -e "Error: \"$2\" doesn't exist!"
|
||||
usage
|
||||
exit 1
|
||||
fi
|
||||
shift
|
||||
shift
|
||||
elif [[ "$1" = "-o" || "$1" = "--output-file-name" ]] ; then
|
||||
if [ $2 ] ; then
|
||||
export OUT_FILE_NAME=$2
|
||||
else
|
||||
usage
|
||||
exit 1
|
||||
fi
|
||||
shift
|
||||
shift
|
||||
elif [[ "$1" = "-d" || "$1" = "--output-directory" ]] ; then
|
||||
if [ $2 ] ; then
|
||||
mkdir -p $2
|
||||
export OUTPUT_PATH=$2
|
||||
OUTPUT_PATH_INTERNAL=$2
|
||||
else
|
||||
usage
|
||||
exit 1
|
||||
fi
|
||||
shift
|
||||
shift
|
||||
elif [[ "$1" = "-fi" || "$1" = "--flush-interval" ]] ; then
|
||||
if [ $2 ] && [ $2 -gt 0 ] ; then
|
||||
export ROCPROFILER_FLUSH_INTERVAL=$2
|
||||
else
|
||||
echo -e "Wrong input \"$2\" for flush interval, it needs to be integer greater than zero!"
|
||||
usage
|
||||
exit 1
|
||||
fi
|
||||
shift
|
||||
shift
|
||||
elif [ "$1" = "--hip-api" ] ; then
|
||||
export ROCPROFILER_HIP_API_TRACE=1
|
||||
shift
|
||||
elif [ "$1" = "--hip-activity" ] ; then
|
||||
export ROCPROFILER_HIP_API_TRACE=1
|
||||
export ROCPROFILER_HIP_ACTIVITY_TRACE=1
|
||||
shift
|
||||
elif [ "$1" = "--hsa-api" ] ; then
|
||||
export ROCPROFILER_HSA_API_TRACE=1
|
||||
shift
|
||||
elif [ "$1" = "--hsa-activity" ] ; then
|
||||
export ROCPROFILER_HSA_API_TRACE=1
|
||||
export ROCPROFILER_HSA_ACTIVITY_TRACE=1
|
||||
shift
|
||||
elif [ "$1" = "--roctx-trace" ] ; then
|
||||
export ROCPROFILER_ROCTX_TRACE=1
|
||||
shift
|
||||
elif [ "$1" = "--kernel-trace" ] ; then
|
||||
export ROCPROFILER_KERNEL_TRACE=1
|
||||
shift
|
||||
elif [ "$1" = "--sys-trace" ] ; then
|
||||
export ROCPROFILER_HIP_API_TRACE=1
|
||||
export ROCPROFILER_HIP_ACTIVITY_TRACE=1
|
||||
export ROCPROFILER_HSA_API_TRACE=1
|
||||
export ROCPROFILER_HSA_ACTIVITY_TRACE=1
|
||||
export ROCPROFILER_ROCTX_TRACE=1
|
||||
export ROCPROFILER_KERNEL_TRACE=1
|
||||
shift
|
||||
elif [ "$1" = "--amd-sys" ] ; then
|
||||
export ROCPROFILER_ENABLE_AMDSYS=$2
|
||||
shift
|
||||
shift
|
||||
elif [ "$1" = "--plugin" ] ; then
|
||||
if [ -n $2 ] ; then
|
||||
PLUGIN=$2
|
||||
if [ $RUN_FROM_BUILD == 1 ]; then
|
||||
export ROCPROFILER_PLUGIN_LIB=lib${PLUGIN}_plugin.so
|
||||
else
|
||||
export ROCPROFILER_PLUGIN_LIB=rocprofiler/lib${PLUGIN}_plugin.so
|
||||
fi
|
||||
else
|
||||
echo -e "Wrong input \"$2\" for plugin!"
|
||||
usage
|
||||
exit 1
|
||||
fi
|
||||
shift
|
||||
shift
|
||||
elif [[ "$1" = "-"* || "$1" = "--"* ]] ; then
|
||||
echo -e "Wrong option \"$1\", Please use the following options:\n"
|
||||
usage
|
||||
exit 1
|
||||
else
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
PMC_LINES=()
|
||||
if [ -n "$COUNTERS_PATH" ]; then
|
||||
input=$COUNTERS_PATH
|
||||
while IFS= read -r line || [[ -n "$line" ]]; do
|
||||
PMC_LINES+=( "$line" )
|
||||
done < $input
|
||||
fi
|
||||
|
||||
if [ -n "$PMC_LINES" ]; then
|
||||
COUNTER=1
|
||||
for i in ${!PMC_LINES[@]}; do
|
||||
export ROCPROFILER_COUNTERS="${PMC_LINES[$i]}"
|
||||
if [ -n "$OUTPUT_PATH" ]; then
|
||||
FINAL_PATH="$OUTPUT_PATH_INTERNAL/pmc_$COUNTER"
|
||||
echo -e "\nThe output path for the following counters: $FINAL_PATH"
|
||||
mkdir -p $FINAL_PATH
|
||||
echo $ROCPROFILER_COUNTERS > $FINAL_PATH/pmc.txt
|
||||
export OUTPUT_PATH=$FINAL_PATH
|
||||
let COUNTER=COUNTER+1
|
||||
fi
|
||||
if [ $RUN_FROM_BUILD == 1 ]; then
|
||||
LD_PRELOAD=$LD_PRELOAD:$ROCM_DIR/build/librocprofiler_tool.so $*
|
||||
else
|
||||
LD_PRELOAD=$LD_PRELOAD:$ROCM_DIR/lib/librocprofiler_tool.so $*
|
||||
fi
|
||||
done
|
||||
else
|
||||
if [ $RUN_FROM_BUILD == 1 ]; then
|
||||
LD_PRELOAD=$LD_PRELOAD:$ROCM_DIR/build/librocprofiler_tool.so $*
|
||||
else
|
||||
LD_PRELOAD=$LD_PRELOAD:$ROCM_DIR/lib/librocprofiler_tool.so $*
|
||||
fi
|
||||
fi
|
||||
|
||||
|
||||
@@ -0,0 +1,10 @@
|
||||
---
|
||||
If:
|
||||
PathMatch: common/common.h
|
||||
|
||||
CompileFlags:
|
||||
Add: ['-x', 'hip']
|
||||
|
||||
# Local Variables:
|
||||
# mode: yaml
|
||||
# End:
|
||||
@@ -0,0 +1,142 @@
|
||||
include (CheckCSourceCompiles)
|
||||
|
||||
# ############################################################################################################################################
|
||||
# ############################################################################################################################################
|
||||
# General Requirements
|
||||
# ############################################################################################################################################
|
||||
# ############################################################################################################################################
|
||||
get_property(HSA_RUNTIME_INCLUDE_DIRECTORIES TARGET hsa-runtime64::hsa-runtime64 PROPERTY INTERFACE_INCLUDE_DIRECTORIES)
|
||||
find_file(HSA_H hsa.h
|
||||
PATHS ${HSA_RUNTIME_INCLUDE_DIRECTORIES}
|
||||
PATH_SUFFIXES hsa
|
||||
NO_DEFAULT_PATH
|
||||
REQUIRED)
|
||||
get_filename_component(HSA_RUNTIME_INC_PATH ${HSA_H} DIRECTORY)
|
||||
include_directories(${HSA_RUNTIME_INC_PATH})
|
||||
|
||||
# Set the HIP language runtime link flags as FindHIP does not set them.
|
||||
set(CMAKE_EXECUTABLE_RUNTIME_HIP_FLAG ${CMAKE_SHARED_LIBRARY_RUNTIME_CXX_FLAG})
|
||||
set(CMAKE_EXECUTABLE_RUNTIME_HIP_FLAG_SEP ${CMAKE_SHARED_LIBRARY_RUNTIME_CXX_FLAG_SEP})
|
||||
set(CMAKE_EXECUTABLE_RPATH_LINK_HIP_FLAG ${CMAKE_SHARED_LIBRARY_RPATH_LINK_CXX_FLAG})
|
||||
|
||||
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${ROCM_PATH}/lib/cmake/hip")
|
||||
set(CMAKE_HIP_ARCHITECTURES OFF)
|
||||
find_package(HIP REQUIRED MODULE)
|
||||
|
||||
find_package(Clang REQUIRED CONFIG
|
||||
PATHS "${ROCM_PATH}"
|
||||
PATH_SUFFIXES "llvm/lib/cmake/clang")
|
||||
|
||||
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/modules" "${ROCM_PATH}/lib/cmake/hip")
|
||||
find_package(LibElf REQUIRED)
|
||||
find_package(LibDw REQUIRED)
|
||||
|
||||
## Add a custom targets to build and run all the tests
|
||||
add_custom_target(samples)
|
||||
add_dependencies(samples ${ROCPROFILER_TARGET})
|
||||
add_custom_target(run-samples COMMAND ${PROJECT_BINARY_DIR}/samples/run_samples.sh DEPENDS samples)
|
||||
|
||||
file(GLOB ROCPROFILER_UTIL_SRC_FILES ${PROJECT_SOURCE_DIR}/src/utils/helper.cpp)
|
||||
# ############################################################################################################################################
|
||||
|
||||
# ############################################################################################################################################
|
||||
# ############################################################################################################################################
|
||||
# Samples Build & Run Script
|
||||
# ############################################################################################################################################
|
||||
# ############################################################################################################################################
|
||||
|
||||
# ############################################################################################################################################
|
||||
# Profiler Samples
|
||||
# ############################################################################################################################################
|
||||
|
||||
## Build Application Replay Sample
|
||||
set_source_files_properties(profiler/application_replay_sample.cpp PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
|
||||
hip_add_executable(profiler_application_replay profiler/application_replay_sample.cpp ${ROCPROFILER_UTIL_SRC_FILES})
|
||||
target_include_directories(profiler_application_replay PRIVATE ${PROJECT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/inc ${CMAKE_CURRENT_SOURCE_DIR}/common)
|
||||
target_link_libraries(profiler_application_replay PRIVATE ${ROCPROFILER_TARGET} systemd amd_comgr)
|
||||
add_dependencies(samples profiler_application_replay)
|
||||
install(TARGETS profiler_application_replay RUNTIME DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/samples COMPONENT samples)
|
||||
|
||||
## Build Kernel Replay Sample
|
||||
set_source_files_properties(profiler/kernel_replay_sample.cpp PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
|
||||
hip_add_executable(profiler_kernel_replay profiler/kernel_replay_sample.cpp ${ROCPROFILER_UTIL_SRC_FILES})
|
||||
target_include_directories(profiler_kernel_replay PRIVATE ${PROJECT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/inc ${CMAKE_CURRENT_SOURCE_DIR}/common)
|
||||
target_link_libraries(profiler_kernel_replay PRIVATE ${ROCPROFILER_TARGET} systemd amd_comgr)
|
||||
add_dependencies(samples profiler_kernel_replay)
|
||||
install(TARGETS profiler_kernel_replay RUNTIME DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/samples COMPONENT samples)
|
||||
|
||||
## Build User Replay Sample
|
||||
set_source_files_properties(profiler/user_replay_sample.cpp PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
|
||||
hip_add_executable(profiler_user_replay profiler/user_replay_sample.cpp ${ROCPROFILER_UTIL_SRC_FILES})
|
||||
target_include_directories(profiler_user_replay PRIVATE ${PROJECT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/inc ${CMAKE_CURRENT_SOURCE_DIR}/common)
|
||||
target_link_libraries(profiler_user_replay PRIVATE ${ROCPROFILER_TARGET} systemd amd_comgr)
|
||||
add_dependencies(samples profiler_user_replay)
|
||||
install(TARGETS profiler_user_replay RUNTIME DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/samples COMPONENT samples)
|
||||
|
||||
## Build Device Profiling Sample
|
||||
set_source_files_properties(profiler/device_profiling_sample.cpp PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
|
||||
hip_add_executable(profiler_device_profiling profiler/device_profiling_sample.cpp ${ROCPROFILER_UTIL_SRC_FILES})
|
||||
target_include_directories(profiler_device_profiling PRIVATE ${PROJECT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/inc ${CMAKE_CURRENT_SOURCE_DIR}/common)
|
||||
target_link_libraries(profiler_device_profiling PRIVATE ${ROCPROFILER_TARGET} systemd amd_comgr)
|
||||
add_dependencies(samples profiler_device_profiling)
|
||||
install(TARGETS profiler_device_profiling RUNTIME DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/samples COMPONENT samples)
|
||||
|
||||
# ############################################################################################################################################
|
||||
# Tracer Samples
|
||||
# ############################################################################################################################################
|
||||
|
||||
## Build HIP/HSA Trace Sample
|
||||
set_source_files_properties(tracer/sample.cpp PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
|
||||
hip_add_executable(tracer_hip_hsa tracer/sample.cpp ${ROCPROFILER_UTIL_SRC_FILES})
|
||||
target_include_directories(tracer_hip_hsa PRIVATE ${PROJECT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/inc ${CMAKE_CURRENT_SOURCE_DIR}/common)
|
||||
target_link_libraries(tracer_hip_hsa PRIVATE ${ROCPROFILER_TARGET} systemd amd_comgr)
|
||||
add_dependencies(samples tracer_hip_hsa)
|
||||
install(TARGETS tracer_hip_hsa RUNTIME DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/samples COMPONENT samples)
|
||||
|
||||
# ############################################################################################################################################
|
||||
# PC Sampling Samples
|
||||
# ############################################################################################################################################
|
||||
|
||||
set(CODE_PRINTING_SAMPLE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/pcsampler/code_printing_sample)
|
||||
file(GLOB PC_SAMPLING_CODE_PRINTING_FILES ${CODE_PRINTING_SAMPLE_DIR}/*.cpp)
|
||||
set_source_files_properties(${PC_SAMPLING_CODE_PRINTING_FILES} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
|
||||
hip_add_executable(pc_sampling_code_printing ${PC_SAMPLING_CODE_PRINTING_FILES}
|
||||
HIPCC_OPTIONS
|
||||
-std=c++17
|
||||
# Include debugging symbols and source for the contextual disassembly
|
||||
-gdwarf-4)
|
||||
|
||||
check_c_source_compiles("
|
||||
#define _GNU_SOURCE
|
||||
#include <sys/mman.h>
|
||||
int main() { return memfd_create (\"cmake_test\", 0); }
|
||||
" HAVE_MEMFD_CREATE)
|
||||
if (HAVE_MEMFD_CREATE)
|
||||
target_compile_definitions(pc_sampling_code_printing PRIVATE HAVE_MEMFD_CREATE)
|
||||
endif()
|
||||
|
||||
target_link_libraries(pc_sampling_code_printing
|
||||
PRIVATE
|
||||
${ROCPROFILER_TARGET}
|
||||
rocm-dbgapi
|
||||
${LIBELF_LIBRARIES}
|
||||
${LIBDW_LIBRARIES}
|
||||
hsa-runtime64::hsa-runtime64 Threads::Threads dl)
|
||||
target_include_directories(pc_sampling_code_printing
|
||||
PRIVATE
|
||||
# INTERFACE_INCLUDE_DIRECTORIES
|
||||
${TEST_DIR}
|
||||
${ROOT_DIR}
|
||||
${HSA_RUNTIME_INC_PATH}
|
||||
${PROJECT_SOURCE_DIR})
|
||||
add_dependencies(samples pc_sampling_code_printing)
|
||||
install(TARGETS pc_sampling_code_printing RUNTIME DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/samples COMPONENT samples)
|
||||
|
||||
# ############################################################################################################################################
|
||||
# Scripts to run samples
|
||||
# ############################################################################################################################################
|
||||
|
||||
# Copy run_samples script to samples folder
|
||||
configure_file(run_samples.sh ${PROJECT_BINARY_DIR}/samples COPYONLY)
|
||||
|
||||
# ############################################################################################################################################
|
||||
@@ -0,0 +1,350 @@
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <rocprofiler.h>
|
||||
|
||||
#include <cxxabi.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
#include <systemd/sd-id128.h>
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <vector>
|
||||
#include <cassert>
|
||||
#include <cstddef>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <ostream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <mutex>
|
||||
|
||||
#include "src/utils/helper.h"
|
||||
|
||||
// Custom assert to print error messages
|
||||
#define ASSERTM(exp, msg) assert(((void)msg, exp))
|
||||
|
||||
// Macro to check HIP calls status
|
||||
#define HIP_CALL(call) \
|
||||
do { \
|
||||
hipError_t err = call; \
|
||||
if (err != hipSuccess) { \
|
||||
fprintf(stderr, "%s\n", hipGetErrorString(err)); \
|
||||
abort(); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
// Macro to check ROCPROFILER calls status
|
||||
#define CHECK_ROCPROFILER(call) \
|
||||
do { \
|
||||
if ((call) != ROCPROFILER_STATUS_SUCCESS) rocmtools::fatal("Error: ROCMTools API Call Error!"); \
|
||||
} while (false)
|
||||
|
||||
// Device (Kernel) functions, it must be void
|
||||
__global__ void kernelA() { printf("\nKernel A\n"); }
|
||||
__global__ void kernelB() { printf("\nKernel B\n"); }
|
||||
__global__ void kernelC() { printf("\nKernel C\n"); }
|
||||
__global__ void kernelD() { printf("\nKernel D\n"); }
|
||||
__global__ void kernelE() { printf("\nKernel E\n"); }
|
||||
__global__ void kernelF() { printf("\nKernel F\n"); }
|
||||
|
||||
[[maybe_unused]] uint32_t GetPid() {
|
||||
static uint32_t pid = syscall(__NR_getpid);
|
||||
return pid;
|
||||
}
|
||||
|
||||
[[maybe_unused]] uint64_t GetMachineID() {
|
||||
char hostname[1023] = "\0";
|
||||
gethostname(hostname, 1023);
|
||||
sd_id128_t ret;
|
||||
char machine_id[SD_ID128_STRING_MAX];
|
||||
[[maybe_unused]] int status = sd_id128_get_machine(&ret);
|
||||
assert(status == 0 && "Error: Couldn't get machine id!");
|
||||
if (sd_id128_to_string(ret, machine_id)) return std::hash<std::string>{}(machine_id);
|
||||
return std::rand();
|
||||
}
|
||||
|
||||
std::ofstream output_file;
|
||||
|
||||
void prepare() {
|
||||
output_file.copyfmt(std::cout);
|
||||
output_file.clear(std::cout.rdstate());
|
||||
output_file.basic_ios<char>::rdbuf(std::cout.rdbuf());
|
||||
}
|
||||
|
||||
std::mutex writing_lock;
|
||||
|
||||
const char* GetDomainName(rocprofiler_tracer_activity_domain_t domain) {
|
||||
switch (domain) {
|
||||
case ACTIVITY_DOMAIN_ROCTX:
|
||||
return "ROCTX_DOMAIN";
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HIP_API:
|
||||
return "HIP_API_DOMAIN";
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HIP_OPS:
|
||||
return "HIP_OPS_DOMAIN";
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HSA_API:
|
||||
return "HSA_API_DOMAIN";
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HSA_OPS:
|
||||
return "HSA_OPS_DOMAIN";
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HSA_EVT:
|
||||
return "HSA_EVT_DOMAIN";
|
||||
break;
|
||||
default:
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
// Flush function needs to be provided by the user to be used in three cases by
|
||||
// the user buffer:
|
||||
// 1- Application is finished
|
||||
// 2- Buffer is full
|
||||
// 3- Flush Interval specified by the user
|
||||
void FlushTracerRecord(rocprofiler_record_tracer_t tracer_record, rocprofiler_session_id_t session_id,
|
||||
rocprofiler_buffer_id_t buffer_id = rocprofiler_buffer_id_t{0}) {
|
||||
std::lock_guard<std::mutex> lock(writing_lock);
|
||||
std::string kernel_name;
|
||||
std::string function_name;
|
||||
std::string roctx_message;
|
||||
uint64_t roctx_id;
|
||||
if ((tracer_record.operation_id.id == 0 && tracer_record.domain == ACTIVITY_DOMAIN_HIP_OPS)) {
|
||||
if (tracer_record.api_data_handle.handle &&
|
||||
strlen(reinterpret_cast<const char*>(tracer_record.api_data_handle.handle)) > 1)
|
||||
kernel_name = rocmtools::cxx_demangle(
|
||||
reinterpret_cast<const char*>(tracer_record.api_data_handle.handle));
|
||||
}
|
||||
if (tracer_record.domain == ACTIVITY_DOMAIN_HSA_API) {
|
||||
size_t function_name_size = 0;
|
||||
CHECK_ROCPROFILER(rocprofiler_query_hsa_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_HSA_FUNCTION_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &function_name_size));
|
||||
if (function_name_size > 1) {
|
||||
char* function_name_c = (char*)malloc(function_name_size);
|
||||
CHECK_ROCPROFILER(rocprofiler_query_hsa_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_HSA_FUNCTION_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &function_name_c));
|
||||
if (function_name_c) function_name = std::string(function_name_c);
|
||||
}
|
||||
}
|
||||
if (tracer_record.domain == ACTIVITY_DOMAIN_HIP_API) {
|
||||
size_t function_name_size = 0;
|
||||
CHECK_ROCPROFILER(rocprofiler_query_hip_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_HIP_FUNCTION_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &function_name_size));
|
||||
if (function_name_size > 1) {
|
||||
char* function_name_c = (char*)malloc(function_name_size);
|
||||
CHECK_ROCPROFILER(rocprofiler_query_hip_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_HIP_FUNCTION_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &function_name_c));
|
||||
if (function_name_c) function_name = std::string(function_name_c);
|
||||
}
|
||||
size_t kernel_name_size = 0;
|
||||
CHECK_ROCPROFILER(rocprofiler_query_hip_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_HIP_KERNEL_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &kernel_name_size));
|
||||
if (kernel_name_size > 1) {
|
||||
char* kernel_name_str = (char*)malloc(kernel_name_size * sizeof(char));
|
||||
CHECK_ROCPROFILER(rocprofiler_query_hip_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_HIP_KERNEL_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &kernel_name_str));
|
||||
if (kernel_name_str) kernel_name = rocmtools::cxx_demangle(std::string(kernel_name_str));
|
||||
}
|
||||
}
|
||||
if (tracer_record.domain == ACTIVITY_DOMAIN_ROCTX) {
|
||||
size_t roctx_message_size = 0;
|
||||
CHECK_ROCPROFILER(rocprofiler_query_roctx_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_ROCTX_MESSAGE, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &roctx_message_size));
|
||||
if (roctx_message_size > 1) {
|
||||
[[maybe_unused]] char* roctx_message_str =
|
||||
static_cast<char*>(malloc(roctx_message_size * sizeof(char)));
|
||||
CHECK_ROCPROFILER(rocprofiler_query_roctx_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_ROCTX_MESSAGE, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &roctx_message_str));
|
||||
if (roctx_message_str)
|
||||
roctx_message = rocmtools::cxx_demangle(std::string(strdup(roctx_message_str)));
|
||||
}
|
||||
size_t roctx_id_size = 0;
|
||||
CHECK_ROCPROFILER(rocprofiler_query_roctx_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_ROCTX_ID, tracer_record.api_data_handle, tracer_record.operation_id,
|
||||
&roctx_id_size));
|
||||
if (roctx_id_size > 1) {
|
||||
[[maybe_unused]] char* roctx_id_str =
|
||||
static_cast<char*>(malloc(roctx_id_size * sizeof(char)));
|
||||
CHECK_ROCPROFILER(rocprofiler_query_roctx_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_ROCTX_ID, tracer_record.api_data_handle, tracer_record.operation_id,
|
||||
&roctx_id_str));
|
||||
if (roctx_id_str) {
|
||||
roctx_id = std::stoll(std::string(strdup(roctx_id_str)));
|
||||
free(roctx_id_str);
|
||||
}
|
||||
}
|
||||
}
|
||||
output_file << "Record [" << tracer_record.header.id.handle << "], Domain("
|
||||
<< GetDomainName(tracer_record.domain) << "), Begin("
|
||||
<< tracer_record.timestamps.begin.value << "), End("
|
||||
<< tracer_record.timestamps.end.value << "), Correlation ID( "
|
||||
<< tracer_record.correlation_id.value << ")";
|
||||
if (roctx_id >= 0) output_file << ", ROCTX ID(" << roctx_id << ")";
|
||||
if (roctx_message.size() > 1) output_file << ", ROCTX Message(" << roctx_message << ")";
|
||||
if (function_name.size() > 1) output_file << ", Function(" << function_name << ")";
|
||||
if (kernel_name.size() > 1) output_file << ", Kernel Name(" << kernel_name.c_str() << ")";
|
||||
output_file << std::endl;
|
||||
}
|
||||
|
||||
void FlushProfilerRecord(const rocprofiler_record_profiler_t* profiler_record,
|
||||
rocprofiler_session_id_t session_id, rocprofiler_buffer_id_t buffer_id) {
|
||||
std::lock_guard<std::mutex> lock(writing_lock);
|
||||
size_t name_length = 0;
|
||||
bool is_counter = true;
|
||||
CHECK_ROCPROFILER(rocprofiler_query_kernel_info_size(ROCPROFILER_KERNEL_NAME,
|
||||
profiler_record->kernel_id, &name_length));
|
||||
// Taken from rocprofiler: The size hasn't changed in recent past
|
||||
static const uint32_t lds_block_size = 128 * 4;
|
||||
const char* kernel_name_c;
|
||||
if (name_length > 1) {
|
||||
kernel_name_c = static_cast<const char*>(malloc(name_length * sizeof(char)));
|
||||
CHECK_ROCPROFILER(rocprofiler_query_kernel_info(ROCPROFILER_KERNEL_NAME, profiler_record->kernel_id,
|
||||
&kernel_name_c));
|
||||
}
|
||||
output_file << std::string("dispatch[") << std::to_string(profiler_record->header.id.handle)
|
||||
<< "], " << std::string("gpu_id(") << std::to_string(profiler_record->gpu_id.handle)
|
||||
<< "), " << std::string("queue_id(")
|
||||
<< std::to_string(profiler_record->queue_id.handle) << "), "
|
||||
<< std::string("queue_index(") << std::to_string(profiler_record->queue_idx.value)
|
||||
<< "), " << std::string("pid(") << std::to_string(GetPid()) << "), "
|
||||
<< std::string("tid(") << std::to_string(profiler_record->thread_id.value) << ")";
|
||||
output_file << ", " << std::string("grd(")
|
||||
<< std::to_string(profiler_record->kernel_properties.grid_size) << "), "
|
||||
<< std::string("wgr(")
|
||||
<< std::to_string(profiler_record->kernel_properties.workgroup_size) << "), "
|
||||
<< std::string("lds(")
|
||||
<< std::to_string(
|
||||
((profiler_record->kernel_properties.lds_size + (lds_block_size - 1)) &
|
||||
~(lds_block_size - 1)))
|
||||
<< "), " << std::string("scr(")
|
||||
<< std::to_string(profiler_record->kernel_properties.scratch_size) << "), "
|
||||
<< std::string("arch_vgpr(")
|
||||
<< std::to_string(profiler_record->kernel_properties.arch_vgpr_count) << "), "
|
||||
<< std::string("accum_vgpr(")
|
||||
<< std::to_string(profiler_record->kernel_properties.accum_vgpr_count) << "), "
|
||||
<< std::string("sgpr(")
|
||||
<< std::to_string(profiler_record->kernel_properties.sgpr_count) << "), "
|
||||
<< std::string("wave_size(")
|
||||
<< std::to_string(profiler_record->kernel_properties.wave_size) << "), "
|
||||
<< std::string("sig(")
|
||||
<< std::to_string(profiler_record->kernel_properties.signal_handle);
|
||||
std::string kernel_name = rocmtools::cxx_demangle(kernel_name_c);
|
||||
output_file << "), " << std::string("obj(") << std::to_string(profiler_record->kernel_id.handle)
|
||||
<< "), " << std::string("kernel-name(\"") << kernel_name << "\")"
|
||||
<< std::string(", time(") << std::to_string(profiler_record->timestamps.begin.value)
|
||||
<< ") ";
|
||||
|
||||
// For Counters
|
||||
output_file << std::endl;
|
||||
if (profiler_record->counters) {
|
||||
for (uint64_t i = 0; i < profiler_record->counters_count.value; i++) {
|
||||
if (profiler_record->counters[i].counter_handler.handle > 0) {
|
||||
size_t counter_name_length = 0;
|
||||
CHECK_ROCPROFILER(rocprofiler_query_counter_info_size(
|
||||
session_id, ROCPROFILER_COUNTER_NAME, profiler_record->counters[i].counter_handler,
|
||||
&counter_name_length));
|
||||
if (counter_name_length > 1) {
|
||||
const char* name_c = static_cast<const char*>(malloc(name_length * sizeof(char)));
|
||||
CHECK_ROCPROFILER(rocprofiler_query_counter_info(session_id, ROCPROFILER_COUNTER_NAME,
|
||||
profiler_record->counters[i].counter_handler,
|
||||
&name_c));
|
||||
output_file << ", " << name_c << " ("
|
||||
<< std::to_string(profiler_record->counters[i].value.value) << ")"
|
||||
<< std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void FlushPCSamplingRecord(
|
||||
const rocprofiler_record_pc_sample_t *pc_sampling_record) {
|
||||
const auto &sample = pc_sampling_record->pc_sample;
|
||||
output_file << "dispatch[" << sample.dispatch_id.value << "], "
|
||||
<< "timestamp(" << sample.timestamp.value << "), "
|
||||
<< "gpu_id(" << sample.gpu_id.handle << "), "
|
||||
<< "pc-sample(" << std::hex << std::showbase << sample.pc << "), "
|
||||
<< "se(" << sample.se << ')'
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
int WriteBufferRecords(const rocprofiler_record_header_t* begin, const rocprofiler_record_header_t* end,
|
||||
rocprofiler_session_id_t session_id, rocprofiler_buffer_id_t buffer_id) {
|
||||
while (begin < end) {
|
||||
if (!begin) return 0;
|
||||
switch (begin->kind) {
|
||||
case ROCPROFILER_PROFILER_RECORD: {
|
||||
const rocprofiler_record_profiler_t* profiler_record =
|
||||
reinterpret_cast<const rocprofiler_record_profiler_t*>(begin);
|
||||
FlushProfilerRecord(profiler_record, session_id, buffer_id);
|
||||
break;
|
||||
}
|
||||
case ROCPROFILER_TRACER_RECORD: {
|
||||
rocprofiler_record_tracer_t* tracer_record = const_cast<rocprofiler_record_tracer_t*>(
|
||||
reinterpret_cast<const rocprofiler_record_tracer_t*>(begin));
|
||||
FlushTracerRecord(*tracer_record, session_id, buffer_id);
|
||||
break;
|
||||
}
|
||||
case ROCPROFILER_PC_SAMPLING_RECORD: {
|
||||
const rocprofiler_record_pc_sample_t *pc_sampling_record =
|
||||
reinterpret_cast<const rocprofiler_record_pc_sample_t *>(begin);
|
||||
FlushPCSamplingRecord(pc_sampling_record);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
break;
|
||||
}
|
||||
}
|
||||
rocprofiler_next_record(begin, &begin, session_id, buffer_id);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kernelCalls(char c) {
|
||||
switch (c) {
|
||||
case 'A': {
|
||||
hipLaunchKernelGGL(kernelA, dim3(1), dim3(1), 0, 0);
|
||||
break;
|
||||
}
|
||||
case 'B': {
|
||||
hipLaunchKernelGGL(kernelB, dim3(1), dim3(1), 0, 0);
|
||||
break;
|
||||
}
|
||||
case 'C': {
|
||||
hipLaunchKernelGGL(kernelC, dim3(1), dim3(1), 0, 0);
|
||||
break;
|
||||
}
|
||||
case 'D': {
|
||||
hipLaunchKernelGGL(kernelD, dim3(1), dim3(1), 0, 0);
|
||||
break;
|
||||
}
|
||||
case 'E': {
|
||||
hipLaunchKernelGGL(kernelE, dim3(1), dim3(1), 0, 0);
|
||||
break;
|
||||
}
|
||||
case 'F': {
|
||||
hipLaunchKernelGGL(kernelF, dim3(1), dim3(1), 0, 0);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
fprintf(stderr, "Error: Wrong Kernel character (%c) Given for kernelCalls!\n", c);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1 @@
|
||||
pmc: SQ_WAVES GRBM_COUNT GRBM_GUI_ACTIVE SQ_INSTS_VALU FETCH_SIZE
|
||||
@@ -0,0 +1,10 @@
|
||||
---
|
||||
If:
|
||||
PathMatch: main.cpp
|
||||
|
||||
CompileFlags:
|
||||
Add: ['-x', 'hip']
|
||||
|
||||
# Local Variables:
|
||||
# mode: yaml
|
||||
# End:
|
||||
Filskillnaden har hållits tillbaka eftersom den är för stor
Load Diff
@@ -0,0 +1,126 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef SAMPLES_PCSAMPLER_CODE_PRINTING_SAMPLE_CODE_PRINTING_HPP_
|
||||
#define SAMPLES_PCSAMPLER_CODE_PRINTING_SAMPLE_CODE_PRINTING_HPP_
|
||||
|
||||
#include <map>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <amd-dbgapi/amd-dbgapi.h>
|
||||
|
||||
namespace amd::debug_agent {
|
||||
|
||||
class code_object_t {
|
||||
struct symbol_info_t {
|
||||
const std::string m_name;
|
||||
amd_dbgapi_global_address_t m_value;
|
||||
amd_dbgapi_size_t m_size;
|
||||
};
|
||||
|
||||
using symbol_map_t =
|
||||
std::optional
|
||||
< std::map
|
||||
< amd_dbgapi_global_address_t
|
||||
, std::pair<std::string, amd_dbgapi_size_t>
|
||||
>
|
||||
>;
|
||||
|
||||
public:
|
||||
void load_symbol_map();
|
||||
void load_debug_info();
|
||||
|
||||
std::optional<symbol_info_t>
|
||||
find_symbol(amd_dbgapi_global_address_t address);
|
||||
|
||||
code_object_t(amd_dbgapi_code_object_id_t code_object_id);
|
||||
code_object_t(code_object_t &&rhs);
|
||||
|
||||
~code_object_t();
|
||||
|
||||
void open();
|
||||
bool is_open() const { return m_fd.has_value(); }
|
||||
|
||||
amd_dbgapi_global_address_t load_address() const { return m_load_address; }
|
||||
amd_dbgapi_size_t mem_size() const { return m_mem_size; }
|
||||
// FIXME(?): extra function not in rocr-debug-agent
|
||||
uint32_t elf_amdgpu_machine() const { return m_elf_amdgpu_machine; }
|
||||
|
||||
void disassemble_around(amd_dbgapi_architecture_id_t architecture_id,
|
||||
amd_dbgapi_global_address_t pc);
|
||||
|
||||
void disassemble_kernel(amd_dbgapi_architecture_id_t architecture_id,
|
||||
amd_dbgapi_global_address_t start_addr,
|
||||
bool const print_src = false);
|
||||
|
||||
bool save(const std::string &directory) const;
|
||||
|
||||
amd_dbgapi_global_address_t m_load_address{ 0 };
|
||||
amd_dbgapi_size_t m_mem_size{ 0 };
|
||||
std::optional<int> m_fd;
|
||||
|
||||
std::optional
|
||||
< std::map<amd_dbgapi_global_address_t, std::pair<std::string, size_t>>
|
||||
>
|
||||
m_line_number_map;
|
||||
|
||||
std::optional
|
||||
< std::map<amd_dbgapi_global_address_t, amd_dbgapi_global_address_t>
|
||||
>
|
||||
m_pc_ranges_map;
|
||||
|
||||
symbol_map_t m_symbol_map;
|
||||
std::string m_uri;
|
||||
amd_dbgapi_code_object_id_t const m_code_object_id;
|
||||
// FIXME(?): extra field not in rocr-debug-agent
|
||||
uint32_t m_elf_amdgpu_machine{ 0 };
|
||||
};
|
||||
|
||||
} // namespace amd::debug_agent
|
||||
|
||||
enum struct disassembly_mode {
|
||||
AROUND,
|
||||
KERNEL
|
||||
};
|
||||
|
||||
std::tuple
|
||||
< amd_dbgapi_process_id_t
|
||||
, std::map<amd_dbgapi_global_address_t, amd::debug_agent::code_object_t>
|
||||
>
|
||||
init_disassembly();
|
||||
|
||||
void
|
||||
disassemble(
|
||||
disassembly_mode const mode,
|
||||
amd_dbgapi_process_id_t const process_id,
|
||||
std::map<amd_dbgapi_global_address_t, amd::debug_agent::code_object_t>
|
||||
&code_object_map,
|
||||
uint64_t const addr);
|
||||
|
||||
void
|
||||
print_pc_context(
|
||||
amd_dbgapi_process_id_t const process_id,
|
||||
std::map<amd_dbgapi_global_address_t, amd::debug_agent::code_object_t>
|
||||
&code_object_map,
|
||||
amd_dbgapi_global_address_t const pc);
|
||||
|
||||
#endif // SAMPLES_PCSAMPLER_CODE_PRINTING_SAMPLE_CODE_PRINTING_HPP_
|
||||
@@ -0,0 +1,215 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include <algorithm>
|
||||
#include <atomic>
|
||||
#include <functional>
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <type_traits>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include <cassert>
|
||||
#include <cinttypes>
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
|
||||
#include <sys/mman.h>
|
||||
|
||||
#include <hsa/hsa.h>
|
||||
#include <amd-dbgapi/amd-dbgapi.h>
|
||||
#include <hsa/amd_hsa_kernel_code.h>
|
||||
#include <hsa/hsa_ven_amd_loader.h>
|
||||
|
||||
#include "inc/rocprofiler.h"
|
||||
|
||||
#include "code_printing.hpp"
|
||||
#include "program.hpp"
|
||||
|
||||
struct libc_freer {
|
||||
void operator()(char *p) { free(p); }
|
||||
};
|
||||
|
||||
namespace util {
|
||||
|
||||
template <typename T, typename... Ts>
|
||||
static void
|
||||
hash_combine(size_t &hsh, T const& v, Ts const&... rest)
|
||||
{
|
||||
hsh ^= std::hash<T>{}(v) + 0x9e3779b9 + (hsh << 6) + (hsh >> 2);
|
||||
(hash_combine(hsh, rest), ...);
|
||||
}
|
||||
|
||||
} // namespace util
|
||||
|
||||
[[maybe_unused]]
|
||||
static inline bool
|
||||
operator==(hsa_executable_t const &l, hsa_executable_t const &r)
|
||||
{
|
||||
return l.handle == r.handle;
|
||||
}
|
||||
|
||||
[[maybe_unused]]
|
||||
static inline bool
|
||||
operator==(
|
||||
rocprofiler_kernel_dispatch_id_t const &l,
|
||||
rocprofiler_kernel_dispatch_id_t const &r)
|
||||
{
|
||||
return l.value == r.value;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
operator==(amd_dbgapi_process_id_t const &l, amd_dbgapi_process_id_t const &r)
|
||||
{
|
||||
return l.handle == r.handle;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
operator!=(amd_dbgapi_process_id_t const &l, amd_dbgapi_process_id_t const &r)
|
||||
{
|
||||
return !(l == r);
|
||||
}
|
||||
|
||||
namespace std {
|
||||
|
||||
template <>
|
||||
struct hash<hsa_executable_t> {
|
||||
size_t operator()(hsa_executable_t const &v) const {
|
||||
size_t ret = 0;
|
||||
util::hash_combine(ret, v.handle);
|
||||
return ret;
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct hash<rocprofiler_kernel_dispatch_id_t> {
|
||||
size_t operator()(rocprofiler_kernel_dispatch_id_t const &v) const {
|
||||
size_t ret = 0;
|
||||
util::hash_combine(ret, v.value);
|
||||
return ret;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace std
|
||||
|
||||
struct disassembly_ctx_t {
|
||||
disassembly_ctx_t();
|
||||
~disassembly_ctx_t();
|
||||
|
||||
void disassemble_kernels(bool const reinitialize);
|
||||
void init();
|
||||
bool inited() const;
|
||||
void reset();
|
||||
|
||||
amd_dbgapi_process_id_t process_id;
|
||||
std::map
|
||||
< amd_dbgapi_global_address_t
|
||||
, amd::debug_agent::code_object_t
|
||||
> codeobjs;
|
||||
};
|
||||
|
||||
disassembly_ctx_t::disassembly_ctx_t()
|
||||
: process_id(AMD_DBGAPI_PROCESS_NONE)
|
||||
, codeobjs()
|
||||
{}
|
||||
|
||||
disassembly_ctx_t::~disassembly_ctx_t()
|
||||
{
|
||||
reset();
|
||||
}
|
||||
|
||||
void
|
||||
disassembly_ctx_t::disassemble_kernels(bool const reinitialize)
|
||||
{
|
||||
if (reinitialize) {
|
||||
reset();
|
||||
}
|
||||
if (!inited()) {
|
||||
init();
|
||||
}
|
||||
|
||||
auto it = codeobjs.begin();
|
||||
auto const end = codeobjs.end();
|
||||
auto const pred = [](decltype(*it) &x){
|
||||
/*
|
||||
* A lame filter for the kernels in the current file, because nothing
|
||||
* else in this little demo will have the URL prefix of `file://`.
|
||||
*/
|
||||
return x.second.m_uri.find("file://", 0, 7) != std::string::npos;
|
||||
};
|
||||
while (end != (it = std::find_if(it, end, pred))) {
|
||||
auto &codeobj = it->second;
|
||||
codeobj.load_symbol_map();
|
||||
if (!codeobj.m_symbol_map) {
|
||||
fputs(PROGNAME ": error: failed to load symbol map\n", stderr);
|
||||
break;
|
||||
}
|
||||
|
||||
for (auto const &sym : *codeobj.m_symbol_map) {
|
||||
auto const &addr = sym.first;
|
||||
::disassemble(disassembly_mode::KERNEL, process_id, codeobjs, addr);
|
||||
}
|
||||
|
||||
++it;
|
||||
}
|
||||
}
|
||||
|
||||
inline void
|
||||
disassembly_ctx_t::init()
|
||||
{
|
||||
std::tie(process_id, codeobjs) = init_disassembly();
|
||||
}
|
||||
|
||||
inline bool
|
||||
disassembly_ctx_t::inited() const
|
||||
{
|
||||
return AMD_DBGAPI_PROCESS_NONE != process_id;
|
||||
}
|
||||
|
||||
void
|
||||
disassembly_ctx_t::reset()
|
||||
{
|
||||
codeobjs.clear();
|
||||
if (AMD_DBGAPI_PROCESS_NONE.handle != process_id.handle) {
|
||||
amd_dbgapi_process_detach(process_id);
|
||||
amd_dbgapi_finalize();
|
||||
process_id = AMD_DBGAPI_PROCESS_NONE;
|
||||
}
|
||||
}
|
||||
|
||||
static disassembly_ctx_t g_dis;
|
||||
|
||||
void
|
||||
disassembly_disassemble_kernels(bool const reinitialize)
|
||||
{
|
||||
g_dis.disassemble_kernels(reinitialize);
|
||||
}
|
||||
|
||||
void
|
||||
disassembly_print_pc_sample_context(amd_dbgapi_global_address_t const pc)
|
||||
{
|
||||
if (!g_dis.inited()) {
|
||||
g_dis.init();
|
||||
}
|
||||
print_pc_context(g_dis.process_id, g_dis.codeobjs, pc);
|
||||
}
|
||||
@@ -0,0 +1,32 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef SAMPLES_PCSAMPLER_CODE_PRINTING_SAMPLE_DISASSEMBLY_HPP_
|
||||
#define SAMPLES_PCSAMPLER_CODE_PRINTING_SAMPLE_DISASSEMBLY_HPP_
|
||||
|
||||
#include <amd-dbgapi/amd-dbgapi.h>
|
||||
|
||||
void
|
||||
disassembly_disassemble_kernels(bool const);
|
||||
|
||||
void
|
||||
disassembly_print_pc_sample_context(amd_dbgapi_global_address_t const);
|
||||
|
||||
#endif // SAMPLES_PCSAMPLER_CODE_PRINTING_SAMPLE_DISASSEMBLY_HPP_
|
||||
@@ -0,0 +1,447 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include <algorithm>
|
||||
#include <chrono>
|
||||
#include <memory>
|
||||
#include <numeric>
|
||||
#include <vector>
|
||||
#include <cfloat>
|
||||
#include <cinttypes>
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
|
||||
#include <unistd.h>
|
||||
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <hsa/hsa.h>
|
||||
|
||||
#include <rocprofiler.h>
|
||||
|
||||
#include "program.hpp"
|
||||
#include "program_options.hpp"
|
||||
#include "disassembly.hpp"
|
||||
|
||||
#define XSTR(x) STR(x)
|
||||
#define STR(x) #x
|
||||
#define DBL_FMT "." XSTR(DBL_DECIMAL_DIG) "f"
|
||||
|
||||
namespace util {
|
||||
|
||||
struct hipMalloc_freer {
|
||||
void operator()(void * const ptr) { (void)hipFree(ptr); }
|
||||
};
|
||||
|
||||
} // namespace util
|
||||
|
||||
namespace prng {
|
||||
|
||||
static uint64_t
|
||||
splitmix64_next(uint64_t * const sm64_state)
|
||||
{
|
||||
uint64_t z = (*sm64_state += 0x9e3779b97f4a7c15);
|
||||
z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9;
|
||||
z = (z ^ (z >> 27)) * 0x94d049bb133111eb;
|
||||
return z ^ (z >> 31);
|
||||
}
|
||||
|
||||
static inline uint64_t
|
||||
rotl64(const uint64_t x, int k)
|
||||
{
|
||||
return (x << k) | (x >> (64 - k));
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
xrs_next(uint64_t * const xrs_state)
|
||||
{
|
||||
const uint64_t result =
|
||||
rotl64(xrs_state[0] + xrs_state[3], 23) + xrs_state[0];
|
||||
|
||||
const uint64_t t = xrs_state[1] << 17;
|
||||
|
||||
xrs_state[2] ^= xrs_state[0];
|
||||
xrs_state[3] ^= xrs_state[1];
|
||||
xrs_state[1] ^= xrs_state[2];
|
||||
xrs_state[0] ^= xrs_state[3];
|
||||
|
||||
xrs_state[2] ^= t;
|
||||
|
||||
xrs_state[3] = rotl64(xrs_state[3], 45);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace prng
|
||||
|
||||
namespace kernel {
|
||||
|
||||
template <typename T>
|
||||
__global__ static void
|
||||
memset_gpu(T * const s, T const c, size_t const n)
|
||||
{
|
||||
size_t i_start = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
size_t i_shift = blockDim.x * gridDim.x;
|
||||
for (size_t i = i_start; i < n; i += i_shift) {
|
||||
s[i] = c;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__global__ static void
|
||||
count_gpu(
|
||||
T const * const xs,
|
||||
T * const out,
|
||||
size_t const n,
|
||||
size_t const nblocks,
|
||||
T const gt)
|
||||
{
|
||||
size_t i_start = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
size_t i_shift = blockDim.x * gridDim.x;
|
||||
for (size_t i = i_start; i < n; i += i_shift) {
|
||||
if (xs[i] > gt) {
|
||||
atomicAdd(&out[i % nblocks], 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace kernel
|
||||
|
||||
static char const GETOPT_ARGS[] = "cd:mn:DP";
|
||||
|
||||
static void
|
||||
usage()
|
||||
{
|
||||
fputs("usage: " PROGNAME " [OPTION]... MIN [SEED]\n"
|
||||
" -d DEV\tHIP device number\n"
|
||||
" -n LEN\tLength of random integer array\n"
|
||||
" -D\t\tPrint kernel disassembly\n"
|
||||
" -P\t\tPrint source and disassembly of sampled PC locations\n"
|
||||
"where\n"
|
||||
" DEV : i32\n"
|
||||
" MIN : u64\n"
|
||||
" LEN : u64\n"
|
||||
" SEED : u64\n",
|
||||
stderr);
|
||||
}
|
||||
|
||||
static int
|
||||
get_options(int argc, char **argv, program_options * const opts)
|
||||
{
|
||||
int opt;
|
||||
|
||||
while (-1 != (opt = getopt(argc, argv, GETOPT_ARGS))) {
|
||||
switch (opt) {
|
||||
case 'd':
|
||||
// TODO error checking
|
||||
opts->device = strtol(optarg, nullptr, 10);
|
||||
break;
|
||||
case 'n':
|
||||
// TODO error checking
|
||||
opts->rands_len = strtoul(optarg, nullptr, 10);
|
||||
break;
|
||||
case 'D':
|
||||
opts->disassemble = true;
|
||||
break;
|
||||
case 'P':
|
||||
opts->pc_sampling = true;
|
||||
break;
|
||||
default:
|
||||
usage();
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
auto const optcount = argc - optind;
|
||||
if (!(1 == optcount || 2 == optcount)) {
|
||||
usage();
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// TODO error checking
|
||||
opts->gt = strtoul(argv[optind], nullptr, 10);
|
||||
if (2 == argc - optind) {
|
||||
opts->seed = strtoull(argv[optind + 1], nullptr, 10);
|
||||
}
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
static program_options g_opts;
|
||||
|
||||
static void
|
||||
callback_flush_fn(
|
||||
rocprofiler_record_header_t const *record,
|
||||
rocprofiler_record_header_t const *end_record,
|
||||
rocprofiler_session_id_t session_id,
|
||||
rocprofiler_buffer_id_t buffer_id)
|
||||
{
|
||||
while (record < end_record) {
|
||||
if (nullptr == record) {
|
||||
break;
|
||||
}
|
||||
if (ROCPROFILER_PC_SAMPLING_RECORD == record->kind) {
|
||||
auto const &pcr = (rocprofiler_record_pc_sample_t &)*record;
|
||||
printf(
|
||||
"dispatch[%" PRIu64 "] timestamp(%" PRIu64
|
||||
") gpu_id(%#" PRIx64 ") pc-sample(%#" PRIx64
|
||||
") se(%" PRIu32 ")\n",
|
||||
pcr.pc_sample.dispatch_id.value,
|
||||
pcr.pc_sample.timestamp.value,
|
||||
pcr.pc_sample.gpu_id.handle,
|
||||
pcr.pc_sample.pc,
|
||||
pcr.pc_sample.se);
|
||||
if (g_opts.pc_sampling) {
|
||||
disassembly_print_pc_sample_context(pcr.pc_sample.pc);
|
||||
}
|
||||
}
|
||||
rocprofiler_next_record(record, &record, session_id, buffer_id);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
run_kernel(program_options const &opts)
|
||||
{
|
||||
rocprofiler_session_id_t sid;
|
||||
rocprofiler_filter_id_t fid, fid2;
|
||||
rocprofiler_buffer_id_t bid;
|
||||
auto rocprofiler_ok = ROCPROFILER_STATUS_SUCCESS;
|
||||
|
||||
if (opts.pc_sampling) {
|
||||
ROCPROFILER_CHECK(
|
||||
rocprofiler_create_session(ROCPROFILER_NONE_REPLAY_MODE, &sid),
|
||||
rocprofiler_ok);
|
||||
if (ROCPROFILER_STATUS_SUCCESS != rocprofiler_ok) {
|
||||
fputs("error: failed to create rocmtools session\n", stderr);
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
rocprofiler_filter_property_t property{};
|
||||
|
||||
ROCPROFILER_CHECK(
|
||||
rocprofiler_create_buffer(
|
||||
sid, callback_flush_fn, static_cast<size_t>(0x1000), &bid),
|
||||
rocprofiler_ok);
|
||||
if (ROCPROFILER_STATUS_SUCCESS != rocprofiler_ok) {
|
||||
fputs("error: failed to add PC sampling session mode\n", stderr);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ROCPROFILER_CHECK(
|
||||
rocprofiler_create_filter(
|
||||
sid, ROCPROFILER_PC_SAMPLING_COLLECTION,
|
||||
rocprofiler_filter_data_t{},
|
||||
0, &fid, property),
|
||||
rocprofiler_ok);
|
||||
if (ROCPROFILER_STATUS_SUCCESS != rocprofiler_ok) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
ROCPROFILER_CHECK(
|
||||
rocprofiler_create_filter(
|
||||
sid, ROCPROFILER_DISPATCH_TIMESTAMPS_COLLECTION,
|
||||
rocprofiler_filter_data_t{},
|
||||
0, &fid2, property),
|
||||
rocprofiler_ok);
|
||||
if (ROCPROFILER_STATUS_SUCCESS != rocprofiler_ok) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
ROCPROFILER_CHECK(
|
||||
rocprofiler_set_filter_buffer(sid, fid, bid),
|
||||
rocprofiler_ok);
|
||||
if (ROCPROFILER_STATUS_SUCCESS != rocprofiler_ok) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
ROCPROFILER_CHECK(
|
||||
rocprofiler_set_filter_buffer(sid, fid2, bid),
|
||||
rocprofiler_ok);
|
||||
if (ROCPROFILER_STATUS_SUCCESS != rocprofiler_ok) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
ROCPROFILER_CHECK(
|
||||
rocprofiler_start_session(sid),
|
||||
rocprofiler_ok);
|
||||
if (ROCPROFILER_STATUS_SUCCESS != rocprofiler_ok) {
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
printf("seed = %" PRIu64 "\n", opts.seed);
|
||||
|
||||
std::vector<uint64_t> rands(opts.rands_len);
|
||||
using rands_elt_t = decltype(rands)::value_type;
|
||||
|
||||
uint64_t
|
||||
sm64_state = opts.seed,
|
||||
xrs_state[4];
|
||||
|
||||
{
|
||||
using prng::splitmix64_next;
|
||||
using prng::xrs_next;
|
||||
|
||||
// Initialize the Xoroshiro PRNG
|
||||
xrs_state[0] = splitmix64_next(&sm64_state);
|
||||
xrs_state[1] = splitmix64_next(&sm64_state);
|
||||
xrs_state[2] = splitmix64_next(&sm64_state);
|
||||
xrs_state[3] = splitmix64_next(&sm64_state);
|
||||
|
||||
// Fill rands with random integers
|
||||
for (auto &i : rands) {
|
||||
i = xrs_next(xrs_state);
|
||||
}
|
||||
}
|
||||
|
||||
struct tm {
|
||||
using monoclk = std::chrono::steady_clock;
|
||||
using dur = std::chrono::duration<double>;
|
||||
};
|
||||
|
||||
using util::hipMalloc_freer;
|
||||
|
||||
auto const begin_time = tm::monoclk::now();
|
||||
|
||||
auto hip_ok = hipSuccess;
|
||||
do {
|
||||
HIP_CHECK_BREAK(hipSetDevice(opts.device), hip_ok);
|
||||
|
||||
auto const rands_nbytes = rands.size() * sizeof(rands_elt_t);
|
||||
std::unique_ptr<rands_elt_t, hipMalloc_freer> rands_gpu;
|
||||
{
|
||||
rands_elt_t *rands_gpu_ptr;
|
||||
HIP_CHECK_BREAK(hipMalloc(&rands_gpu_ptr, rands_nbytes), hip_ok);
|
||||
rands_gpu.reset(rands_gpu_ptr);
|
||||
}
|
||||
|
||||
HIP_CHECK_BREAK(
|
||||
hipMemcpy(rands_gpu.get(), rands.data(), rands_nbytes,
|
||||
hipMemcpyHostToDevice),
|
||||
hip_ok);
|
||||
(void)hipDeviceSynchronize();
|
||||
|
||||
uint32_t constexpr nthreads = 256U;
|
||||
uint32_t const nblocks = (rands.size() + nthreads - 1) / nthreads;
|
||||
|
||||
using count_elt_t = size_t;
|
||||
|
||||
auto const count_subtotals_nbytes = nblocks * sizeof(count_elt_t);
|
||||
std::unique_ptr<count_elt_t, hipMalloc_freer> count_subtotals_gpu;
|
||||
{
|
||||
count_elt_t *count_subtotals_gpu_ptr;
|
||||
HIP_CHECK_BREAK(
|
||||
hipMalloc(&count_subtotals_gpu_ptr, count_subtotals_nbytes),
|
||||
hip_ok);
|
||||
count_subtotals_gpu.reset(count_subtotals_gpu_ptr);
|
||||
}
|
||||
|
||||
hipLaunchKernelGGL(
|
||||
kernel::memset_gpu, nblocks, nthreads, 0, 0,
|
||||
count_subtotals_gpu.get(), 0UL, static_cast<size_t>(nblocks));
|
||||
HIP_CHECK_BREAK(hipGetLastError(), hip_ok);
|
||||
(void)hipDeviceSynchronize();
|
||||
|
||||
auto const kernel_begin_time = tm::monoclk::now();
|
||||
|
||||
hipLaunchKernelGGL(
|
||||
kernel::count_gpu, nblocks, nthreads, 0, 0,
|
||||
rands_gpu.get(), count_subtotals_gpu.get(), rands.size(),
|
||||
static_cast<size_t>(nblocks), opts.gt);
|
||||
HIP_CHECK_BREAK(hipGetLastError(), hip_ok);
|
||||
(void)hipDeviceSynchronize();
|
||||
|
||||
auto const kernel_end_time = tm::monoclk::now();
|
||||
|
||||
std::vector<size_t> count_subtotals(nblocks);
|
||||
HIP_CHECK_BREAK(
|
||||
hipMemcpy(count_subtotals.data(), count_subtotals_gpu.get(),
|
||||
count_subtotals_nbytes, hipMemcpyDeviceToHost),
|
||||
hip_ok);
|
||||
(void)hipDeviceSynchronize();
|
||||
|
||||
// TODO parallel sum on GPU
|
||||
auto const total =
|
||||
std::accumulate(
|
||||
count_subtotals.cbegin(), count_subtotals.cend(),
|
||||
static_cast<size_t>(0));
|
||||
|
||||
auto const all_end_time = tm::monoclk::now();
|
||||
|
||||
tm::dur const kernel_time(kernel_end_time - kernel_begin_time);
|
||||
auto total_time(all_end_time - begin_time);
|
||||
tm::dur const total_time_without_tool_init(total_time);
|
||||
printf("len(rands) = %zu; gt = %zu; count(rands, gt) = %zu\n"
|
||||
"main kernel time elapsed: %" DBL_FMT "\n"
|
||||
"full time elapsed: %" DBL_FMT "\n",
|
||||
rands.size(), opts.gt, total,
|
||||
kernel_time.count(),
|
||||
total_time_without_tool_init.count());
|
||||
} while (false);
|
||||
|
||||
if (opts.disassemble) {
|
||||
disassembly_disassemble_kernels(false);
|
||||
}
|
||||
}
|
||||
|
||||
cleanup:
|
||||
if (opts.pc_sampling) {
|
||||
rocprofiler_terminate_session(sid);
|
||||
rocprofiler_flush_data(sid, bid);
|
||||
rocprofiler_destroy_session(sid);
|
||||
}
|
||||
|
||||
out:
|
||||
return ROCPROFILER_STATUS_SUCCESS == rocprofiler_ok
|
||||
? EXIT_SUCCESS
|
||||
: EXIT_FAILURE;
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
if (auto const ret = get_options(argc, argv, &g_opts);
|
||||
EXIT_SUCCESS != ret)
|
||||
{
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (hsa_init() != HSA_STATUS_SUCCESS){
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
int ret = EXIT_FAILURE;
|
||||
auto ok = ROCPROFILER_STATUS_SUCCESS;
|
||||
|
||||
ROCPROFILER_CHECK(rocprofiler_initialize(), ok);
|
||||
if (ROCPROFILER_STATUS_SUCCESS == ok) {
|
||||
ret = run_kernel(g_opts);
|
||||
} else {
|
||||
goto out;
|
||||
}
|
||||
|
||||
rocprofiler_finalize();
|
||||
|
||||
out:
|
||||
hsa_shut_down();
|
||||
return ROCPROFILER_STATUS_SUCCESS == ok && EXIT_FAILURE != ret
|
||||
? EXIT_SUCCESS
|
||||
: EXIT_FAILURE;
|
||||
}
|
||||
@@ -0,0 +1,54 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef SAMPLES_PCSAMPLER_CODE_PRINTING_SAMPLE_PROGRAM_HPP_
|
||||
#define SAMPLES_PCSAMPLER_CODE_PRINTING_SAMPLE_PROGRAM_HPP_
|
||||
|
||||
#define PROGNAME "code_printing_sample"
|
||||
|
||||
#define HIP_ERROR(code) \
|
||||
do { \
|
||||
fprintf(stderr, \
|
||||
PROGNAME ": Assertion failed at %s:%d, HIP error: %s\n", \
|
||||
__FILE__, __LINE__, hipGetErrorString((code))); \
|
||||
fflush(stderr); \
|
||||
} while (false);
|
||||
|
||||
#define HIP_CHECK_BREAK(expr, var) \
|
||||
if (auto const code = (expr); hipSuccess != code) { \
|
||||
HIP_ERROR(code); \
|
||||
(var) = code; \
|
||||
break; \
|
||||
}
|
||||
|
||||
#define ROCPROFILER_ERROR(code) \
|
||||
do { \
|
||||
fprintf(stderr, \
|
||||
PROGNAME ": Assertion failed at %s:%d, ROCmtools error: %s\n", \
|
||||
__FILE__, __LINE__, rocprofiler_error_str(code)); \
|
||||
fflush(stderr); \
|
||||
} while (false);
|
||||
|
||||
#define ROCPROFILER_CHECK(expr, var) \
|
||||
if ((var) = (expr); ROCPROFILER_STATUS_SUCCESS != (var)) { \
|
||||
ROCPROFILER_ERROR((var)); \
|
||||
}
|
||||
|
||||
#endif // SAMPLES_PCSAMPLER_CODE_PRINTING_SAMPLE_PROGRAM_HPP_
|
||||
@@ -0,0 +1,49 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef SAMPLES_PCSAMPLER_CODE_PRINTING_SAMPLE_PROGRAM_OPTIONS_HPP_
|
||||
#define SAMPLES_PCSAMPLER_CODE_PRINTING_SAMPLE_PROGRAM_OPTIONS_HPP_
|
||||
|
||||
#include <chrono>
|
||||
#include <cstdint>
|
||||
|
||||
struct program_options {
|
||||
program_options()
|
||||
: device(0)
|
||||
, no_gpu(false)
|
||||
, hip_memset(false)
|
||||
, rands_len(1024 * 1024 * 4)
|
||||
, gt(0)
|
||||
, seed(std::chrono::steady_clock::now().time_since_epoch().count())
|
||||
, disassemble(false)
|
||||
, pc_sampling(false)
|
||||
{}
|
||||
|
||||
int device;
|
||||
bool no_gpu;
|
||||
bool hip_memset;
|
||||
size_t rands_len;
|
||||
uint64_t gt;
|
||||
uint64_t seed;
|
||||
bool disassemble;
|
||||
bool pc_sampling;
|
||||
};
|
||||
|
||||
#endif // SAMPLES_PCSAMPLER_CODE_PRINTING_SAMPLE_PROGRAM_OPTIONS_HPP_
|
||||
@@ -0,0 +1,72 @@
|
||||
#include "../common/common.h"
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
printf("APPLICATION REPLAY Mode is not yet Supported!");
|
||||
#if 0
|
||||
int* gpuMem;
|
||||
prepare();
|
||||
// Initialize the tools
|
||||
CHECK_ROCPROFILER(rocprofiler_initialize());
|
||||
|
||||
// Creating the session with given replay mode
|
||||
rocprofiler_session_id_t session_id;
|
||||
CHECK_ROCPROFILER(rocprofiler_create_session(ROCPROFILER_APPLICATION_REPLAY_MODE, &session_id));
|
||||
|
||||
// Creating Output Buffer for the data
|
||||
rocprofiler_buffer_id_t buffer_id;
|
||||
CHECK_ROCPROFILER(rocprofiler_create_buffer(
|
||||
session_id,
|
||||
[](const rocprofiler_record_header_t* record, const rocprofiler_record_header_t* end_record,
|
||||
rocprofiler_session_id_t session_id, rocprofiler_buffer_id_t buffer_id) {
|
||||
WriteBufferRecords(record, end_record, session_id, buffer_id);
|
||||
},
|
||||
0x9999, &buffer_id));
|
||||
|
||||
// Counter Collection Filter
|
||||
std::vector<const char*> counters;
|
||||
counters.emplace_back("GRBM_COUNT");
|
||||
rocprofiler_filter_id_t filter_id;
|
||||
[[maybe_unused]] rocprofiler_filter_property_t property = {};
|
||||
CHECK_ROCPROFILER(rocprofiler_create_filter(session_id, ROCPROFILER_COUNTERS_COLLECTION,
|
||||
rocprofiler_filter_data_t{.counters_names = &counters[0]},
|
||||
counters.size(), &filter_id, property));
|
||||
CHECK_ROCPROFILER(rocprofiler_set_filter_buffer(session_id, filter_id, buffer_id));
|
||||
filter_ids.emplace_back(filter_id);
|
||||
|
||||
// Normal HIP Calls
|
||||
hipDeviceProp_t devProp;
|
||||
HIP_CALL(hipGetDeviceProperties(&devProp, 0));
|
||||
HIP_CALL(hipMalloc((void**)&gpuMem, 1 * sizeof(int)));
|
||||
|
||||
// KernelA and KernelB won't be profiled
|
||||
kernelCalls('A');
|
||||
kernelCalls('B');
|
||||
|
||||
// Activating Profiling Session to profile whatever kernel launches occurs up
|
||||
// till the next terminate session
|
||||
CHECK_ROCPROFILER(rocprofiler_start_session(session_id));
|
||||
|
||||
// KernelC, KernelD, KernelE and KernelF to be profiled as part of the session
|
||||
kernelCalls('C');
|
||||
kernelCalls('D');
|
||||
kernelCalls('E');
|
||||
kernelCalls('F');
|
||||
// Normal HIP Calls
|
||||
HIP_CALL(hipFree(gpuMem));
|
||||
|
||||
// Deactivating session
|
||||
CHECK_ROCPROFILER(rocprofiler_terminate_session(session_id));
|
||||
|
||||
// Manual Flush user buffer request
|
||||
CHECK_ROCPROFILER(rocprofiler_flush_data(session_id, buffer_id));
|
||||
|
||||
// Destroy sessions
|
||||
CHECK_ROCPROFILER(rocprofiler_destroy_session(session_id));
|
||||
|
||||
// Destroy all profiling related objects(User buffer, sessions, filters,
|
||||
// etc..)
|
||||
CHECK_ROCPROFILER(rocprofiler_finalize());
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,61 @@
|
||||
#include "../common/common.h"
|
||||
#include <chrono>
|
||||
#include <thread>
|
||||
#include <iostream>
|
||||
|
||||
#include "rocprofiler.h"
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
int poll_duration = 5;
|
||||
if (argc > 1) poll_duration = atoi(argv[1]);
|
||||
|
||||
CHECK_ROCPROFILER(rocprofiler_initialize());
|
||||
printf("initialize\n");
|
||||
|
||||
rocprofiler_session_id_t dp_session_id;
|
||||
std::vector<const char*> counters;
|
||||
counters.emplace_back("FETCH_SIZE");
|
||||
|
||||
printf("session create\n");
|
||||
|
||||
int gpu_agent = 0;
|
||||
int cpu_agent = 0;
|
||||
CHECK_ROCPROFILER(rocprofiler_device_profiling_session_create(&counters[0], counters.size(),
|
||||
&dp_session_id, gpu_agent, cpu_agent));
|
||||
|
||||
printf("session start \n");
|
||||
// start GPU device profiling
|
||||
CHECK_ROCPROFILER(rocprofiler_device_profiling_session_start(dp_session_id));
|
||||
|
||||
using std::chrono::duration_cast;
|
||||
using std::chrono::high_resolution_clock;
|
||||
using std::chrono::milliseconds;
|
||||
|
||||
auto t1 = high_resolution_clock::now();
|
||||
|
||||
do {
|
||||
printf("polling\n");
|
||||
std::vector<rocprofiler_device_profile_metric_t> data(counters.size());
|
||||
// Poll metrics
|
||||
CHECK_ROCPROFILER(rocprofiler_device_profiling_session_poll(dp_session_id, &data[0]));
|
||||
|
||||
for (size_t i = 0; i < data.size(); i++)
|
||||
std::cout << data[i].metric_name << ": " << data[i].value.value << std::endl;
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(1));
|
||||
// break;
|
||||
} while (--poll_duration > 0);
|
||||
|
||||
auto t2 = high_resolution_clock::now();
|
||||
/* Getting number of milliseconds as an integer. */
|
||||
auto ms_int = duration_cast<milliseconds>(t2 - t1);
|
||||
|
||||
std::cout << ms_int.count() << "ms\n";
|
||||
|
||||
// Stop session
|
||||
CHECK_ROCPROFILER(rocprofiler_device_profiling_session_stop(dp_session_id));
|
||||
|
||||
// Destroy session
|
||||
CHECK_ROCPROFILER(rocprofiler_device_profiling_session_destroy(dp_session_id));
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,68 @@
|
||||
#include "../common/common.h"
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
int* gpuMem;
|
||||
prepare();
|
||||
// Initialize the tools
|
||||
CHECK_ROCPROFILER(rocprofiler_initialize());
|
||||
|
||||
// Creating the session with given replay mode
|
||||
rocprofiler_session_id_t session_id;
|
||||
CHECK_ROCPROFILER(rocprofiler_create_session(ROCPROFILER_KERNEL_REPLAY_MODE, &session_id));
|
||||
|
||||
// Creating Output Buffer for the data
|
||||
rocprofiler_buffer_id_t buffer_id;
|
||||
CHECK_ROCPROFILER(rocprofiler_create_buffer(
|
||||
session_id,
|
||||
[](const rocprofiler_record_header_t* record, const rocprofiler_record_header_t* end_record,
|
||||
rocprofiler_session_id_t session_id, rocprofiler_buffer_id_t buffer_id) {
|
||||
WriteBufferRecords(record, end_record, session_id, buffer_id);
|
||||
},
|
||||
0x9999, &buffer_id));
|
||||
|
||||
// Counter Collection Filter
|
||||
std::vector<const char*> counters;
|
||||
counters.emplace_back("GRBM_COUNT");
|
||||
rocprofiler_filter_id_t filter_id;
|
||||
[[maybe_unused]] rocprofiler_filter_property_t property = {};
|
||||
CHECK_ROCPROFILER(rocprofiler_create_filter(session_id, ROCPROFILER_COUNTERS_COLLECTION,
|
||||
rocprofiler_filter_data_t{.counters_names = &counters[0]},
|
||||
counters.size(), &filter_id, property));
|
||||
CHECK_ROCPROFILER(rocprofiler_set_filter_buffer(session_id, filter_id, buffer_id));
|
||||
|
||||
// Normal HIP Calls
|
||||
hipDeviceProp_t devProp;
|
||||
HIP_CALL(hipGetDeviceProperties(&devProp, 0));
|
||||
HIP_CALL(hipMalloc((void**)&gpuMem, 1 * sizeof(int)));
|
||||
|
||||
// KernelA and KernelB won't be profiled
|
||||
kernelCalls('A');
|
||||
kernelCalls('B');
|
||||
|
||||
// Activating Profiling Session to profile whatever kernel launches occurs up
|
||||
// till the next terminate session
|
||||
CHECK_ROCPROFILER(rocprofiler_start_session(session_id));
|
||||
|
||||
// KernelC, KernelD, KernelE and KernelF to be profiled as part of the session
|
||||
kernelCalls('C');
|
||||
kernelCalls('D');
|
||||
kernelCalls('E');
|
||||
kernelCalls('F');
|
||||
// Normal HIP Calls
|
||||
HIP_CALL(hipFree(gpuMem));
|
||||
|
||||
// Deactivating session
|
||||
CHECK_ROCPROFILER(rocprofiler_terminate_session(session_id));
|
||||
|
||||
// Manual Flush user buffer request
|
||||
CHECK_ROCPROFILER(rocprofiler_flush_data(session_id, buffer_id));
|
||||
|
||||
// Destroy sessions
|
||||
CHECK_ROCPROFILER(rocprofiler_destroy_session(session_id));
|
||||
|
||||
// Destroy all profiling related objects(User buffer, sessions, filters,
|
||||
// etc..)
|
||||
CHECK_ROCPROFILER(rocprofiler_finalize());
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,78 @@
|
||||
#include "../common/common.h"
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
printf("USER REPLAY Mode is not yet Supported!");
|
||||
#if 0
|
||||
int* gpuMem;
|
||||
prepare();
|
||||
// Initialize the tools
|
||||
CHECK_ROCPROFILER(rocprofiler_initialize());
|
||||
|
||||
// Creating the session with given replay mode
|
||||
rocprofiler_session_id_t session_id;
|
||||
CHECK_ROCPROFILER(rocprofiler_create_session(ROCPROFILER_USER_REPLAY_MODE, &session_id));
|
||||
|
||||
// Creating Output Buffer for the data
|
||||
rocprofiler_buffer_id_t buffer_id;
|
||||
CHECK_ROCPROFILER(rocprofiler_create_buffer(
|
||||
session_id,
|
||||
[](const rocprofiler_record_header_t* record, const rocprofiler_record_header_t* end_record,
|
||||
rocprofiler_session_id_t session_id, rocprofiler_buffer_id_t buffer_id) {
|
||||
WriteBufferRecords(record, end_record, session_id, buffer_id);
|
||||
},
|
||||
0x9999, &buffer_id));
|
||||
|
||||
// Counter Collection Filter
|
||||
std::vector<const char*> counters;
|
||||
counters.emplace_back("GRBM_COUNT");
|
||||
rocprofiler_filter_id_t filter_id;
|
||||
[[maybe_unused]] rocprofiler_filter_property_t property = {};
|
||||
CHECK_ROCPROFILER(rocprofiler_create_filter(session_id, ROCPROFILER_COUNTERS_COLLECTION,
|
||||
rocprofiler_filter_data_t{.counters_names = &counters[0]},
|
||||
counters.size(), &filter_id, property));
|
||||
CHECK_ROCPROFILER(rocprofiler_set_filter_buffer(session_id, filter_id, buffer_id));
|
||||
filter_ids.emplace_back(filter_id);
|
||||
|
||||
// Normal HIP Calls
|
||||
hipDeviceProp_t devProp;
|
||||
HIP_CALL(hipGetDeviceProperties(&devProp, 0));
|
||||
HIP_CALL(hipMalloc((void**)&gpuMem, 1 * sizeof(int)));
|
||||
|
||||
// KernelA and KernelB won't be profiled
|
||||
kernelCalls('A');
|
||||
kernelCalls('B');
|
||||
|
||||
// Activating Profiling Session to profile whatever kernel launches occurs up
|
||||
// till the next terminate session
|
||||
CHECK_ROCPROFILER(rocprofiler_start_session(session_id));
|
||||
|
||||
// Replay Pass Start point
|
||||
CHECK_ROCPROFILER(rocprofiler_start_replay_pass());
|
||||
|
||||
// KernelC, KernelD, KernelE and KernelF to be profiled as part of the session
|
||||
kernelCalls('C');
|
||||
kernelCalls('D');
|
||||
kernelCalls('E');
|
||||
kernelCalls('F');
|
||||
// Normal HIP Calls
|
||||
HIP_CALL(hipFree(gpuMem));
|
||||
|
||||
// Deactivating session
|
||||
CHECK_ROCPROFILER(rocprofiler_terminate_session(session_id));
|
||||
|
||||
// Manual Flush user buffer request
|
||||
CHECK_ROCPROFILER(rocprofiler_flush_data(session_id, buffer_id));
|
||||
|
||||
// Replay Pass End point
|
||||
CHECK_ROCPROFILER(rocprofiler_end_replay_pass());
|
||||
|
||||
// Destroy sessions
|
||||
CHECK_ROCPROFILER(rocprofiler_destroy_session(session_id));
|
||||
|
||||
// Destroy all profiling related objects(User buffer, sessions, filters,
|
||||
// etc..)
|
||||
CHECK_ROCPROFILER(rocprofiler_finalize());
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
Executable
+28
@@ -0,0 +1,28 @@
|
||||
#!/bin/bash
|
||||
CURRENT_DIR="$( dirname -- "$0"; )";
|
||||
|
||||
echo -e "Running Samples"
|
||||
|
||||
export ROCPROFILER_METRICS_PATH=${CURRENT_DIR}/../counters/derived_counters.xml
|
||||
|
||||
echo -e "\tProfiler Samples:"
|
||||
|
||||
# echo -e "\t\tApplication Replay Sample:"
|
||||
# eval ${CURRENT_DIR}/profiler_application_replay
|
||||
|
||||
echo -e "\t\tKernel Replay Sample:"
|
||||
eval ${CURRENT_DIR}/profiler_kernel_replay
|
||||
|
||||
# echo -e "\t\tUser Replay Sample:"
|
||||
# eval ${CURRENT_DIR}/profiler_user_replay
|
||||
|
||||
echo -e "\t\tDevice Profiling Sample:"
|
||||
eval ${CURRENT_DIR}/profiler_device_profiling
|
||||
|
||||
|
||||
|
||||
# echo -e "\tTracer Samples:"
|
||||
|
||||
|
||||
# echo -e "\t\tHIP/HSA Trace Sample:"
|
||||
# eval ${CURRENT_DIR}/tracer_hip_hsa
|
||||
@@ -0,0 +1,82 @@
|
||||
#include "../common/common.h"
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
int* gpuMem;
|
||||
prepare();
|
||||
// Initialize the tools
|
||||
CHECK_ROCPROFILER(rocprofiler_initialize());
|
||||
|
||||
// Creating the session with given replay mode
|
||||
rocprofiler_session_id_t session_id;
|
||||
CHECK_ROCPROFILER(rocprofiler_create_session(ROCPROFILER_KERNEL_REPLAY_MODE, &session_id));
|
||||
|
||||
// Creating Output Buffer for the data
|
||||
rocprofiler_buffer_id_t buffer_id;
|
||||
CHECK_ROCPROFILER(rocprofiler_create_buffer(
|
||||
session_id,
|
||||
[](const rocprofiler_record_header_t* record, const rocprofiler_record_header_t* end_record,
|
||||
rocprofiler_session_id_t session_id, rocprofiler_buffer_id_t buffer_id) {
|
||||
WriteBufferRecords(record, end_record, session_id, buffer_id);
|
||||
},
|
||||
0x9999, &buffer_id));
|
||||
|
||||
// Tracing Filter
|
||||
std::vector<rocprofiler_tracer_activity_domain_t> apis_requested;
|
||||
apis_requested.emplace_back(ACTIVITY_DOMAIN_HIP_API);
|
||||
apis_requested.emplace_back(ACTIVITY_DOMAIN_HIP_OPS);
|
||||
apis_requested.emplace_back(ACTIVITY_DOMAIN_HSA_API);
|
||||
apis_requested.emplace_back(ACTIVITY_DOMAIN_HSA_OPS);
|
||||
apis_requested.emplace_back(ACTIVITY_DOMAIN_ROCTX);
|
||||
rocprofiler_filter_id_t api_tracing_filter_id;
|
||||
CHECK_ROCPROFILER(rocprofiler_create_filter(
|
||||
session_id, ROCPROFILER_API_TRACE, rocprofiler_filter_data_t{&apis_requested[0]},
|
||||
apis_requested.size(), &api_tracing_filter_id, rocprofiler_filter_property_t{}));
|
||||
CHECK_ROCPROFILER(rocprofiler_set_filter_buffer(session_id, api_tracing_filter_id, buffer_id));
|
||||
CHECK_ROCPROFILER(rocprofiler_set_api_trace_sync_callback(
|
||||
session_id, api_tracing_filter_id,
|
||||
[](rocprofiler_record_tracer_t record, rocprofiler_session_id_t session_id) {
|
||||
FlushTracerRecord(record, session_id);
|
||||
}));
|
||||
|
||||
// Kernel Tracing
|
||||
rocprofiler_filter_id_t kernel_tracing_filter_id;
|
||||
CHECK_ROCPROFILER(rocprofiler_create_filter(session_id, ROCPROFILER_DISPATCH_TIMESTAMPS_COLLECTION,
|
||||
rocprofiler_filter_data_t{}, 0, &kernel_tracing_filter_id,
|
||||
rocprofiler_filter_property_t{}));
|
||||
CHECK_ROCPROFILER(rocprofiler_set_filter_buffer(session_id, kernel_tracing_filter_id, buffer_id));
|
||||
|
||||
// Normal HIP Calls won't be traced
|
||||
hipDeviceProp_t devProp;
|
||||
HIP_CALL(hipGetDeviceProperties(&devProp, 0));
|
||||
HIP_CALL(hipMalloc((void**)&gpuMem, 1 * sizeof(int)));
|
||||
// KernelA and KernelB won't be traced
|
||||
kernelCalls('A');
|
||||
kernelCalls('B');
|
||||
|
||||
// Activating Profiling Session to profile whatever kernel launches occurs up
|
||||
// till the next terminate session
|
||||
CHECK_ROCPROFILER(rocprofiler_start_session(session_id));
|
||||
|
||||
// KernelC, KernelD, KernelE and KernelF to be traced as part of the session
|
||||
kernelCalls('C');
|
||||
kernelCalls('D');
|
||||
kernelCalls('E');
|
||||
kernelCalls('F');
|
||||
// Normal HIP Calls that will be traced
|
||||
HIP_CALL(hipFree(gpuMem));
|
||||
|
||||
// Deactivating session
|
||||
CHECK_ROCPROFILER(rocprofiler_terminate_session(session_id));
|
||||
|
||||
// Manual Flush user buffer request
|
||||
CHECK_ROCPROFILER(rocprofiler_flush_data(session_id, buffer_id));
|
||||
|
||||
// Destroy sessions
|
||||
CHECK_ROCPROFILER(rocprofiler_destroy_session(session_id));
|
||||
|
||||
// Destroy all profiling related objects(User buffer, sessions, filters,
|
||||
// etc..)
|
||||
CHECK_ROCPROFILER(rocprofiler_finalize());
|
||||
|
||||
return 0;
|
||||
}
|
||||
+242
@@ -0,0 +1,242 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
################################################################################
|
||||
# Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to
|
||||
# deal in the Software without restriction, including without limitation the
|
||||
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
# sell copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
################################################################################
|
||||
|
||||
import os, sys, re
|
||||
import CppHeaderParser
|
||||
import argparse
|
||||
import string
|
||||
|
||||
LICENSE = \
|
||||
'/*\n' + \
|
||||
'Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.\n' + \
|
||||
'\n' + \
|
||||
'Permission is hereby granted, free of charge, to any person obtaining a copy\n' + \
|
||||
'of this software and associated documentation files (the "Software"), to deal\n' + \
|
||||
'in the Software without restriction, including without limitation the rights\n' + \
|
||||
'to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n' + \
|
||||
'copies of the Software, and to permit persons to whom the Software is\n' + \
|
||||
'furnished to do so, subject to the following conditions:\n' + \
|
||||
'\n' + \
|
||||
'The above copyright notice and this permission notice shall be included in\n' + \
|
||||
'all copies or substantial portions of the Software.\n' + \
|
||||
'\n' + \
|
||||
'THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n' + \
|
||||
'IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n' + \
|
||||
'FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n' + \
|
||||
'AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n' + \
|
||||
'LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n' + \
|
||||
'OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n' + \
|
||||
'THE SOFTWARE.\n' + \
|
||||
'*/\n'
|
||||
|
||||
|
||||
header_basic = \
|
||||
'namespace detail {\n' + \
|
||||
'template <typename T>\n' + \
|
||||
' inline static std::ostream& operator<<(std::ostream& out, const T& v) {\n' + \
|
||||
' using std::operator<<;\n' + \
|
||||
' static bool recursion = false;\n' + \
|
||||
' if (recursion == false) { recursion = true; out << v; recursion = false; }\n' + \
|
||||
' return out;\n }\n' + \
|
||||
'\n' + \
|
||||
' inline static std::ostream &operator<<(std::ostream &out, const unsigned char &v) {\n' + \
|
||||
' out << (unsigned int)v;\n' + \
|
||||
' return out;\n }\n' + \
|
||||
'\n' + \
|
||||
' inline static std::ostream &operator<<(std::ostream &out, const char &v) {\n' + \
|
||||
' out << (unsigned char)v;\n' + \
|
||||
' return out;\n }\n'
|
||||
|
||||
structs_analyzed = {}
|
||||
global_ops = ''
|
||||
global_str = ''
|
||||
output_filename_h = None
|
||||
apiname = ""
|
||||
|
||||
# process_struct traverses recursively all structs to extract all fields
|
||||
def process_struct(file_handle, cppHeader_struct, cppHeader, parent_hier_name, apiname):
|
||||
# file_handle: handle for output file {api_name}_ostream_ops.h to be generated
|
||||
# cppHeader_struct: cppHeader struct being processed
|
||||
# cppHeader: cppHeader object created by CppHeaderParser.CppHeader(...)
|
||||
# parent_hier_name: parent hierarchical name used for nested structs/enums
|
||||
# apiname: for example hip.
|
||||
global global_str
|
||||
|
||||
if cppHeader_struct == 'max_align_t': #function pointers not working in cppheaderparser
|
||||
return
|
||||
if cppHeader_struct not in cppHeader.classes:
|
||||
return
|
||||
if cppHeader_struct in structs_analyzed:
|
||||
return
|
||||
structs_analyzed[cppHeader_struct] = 1
|
||||
for l in reversed(range(len(cppHeader.classes[cppHeader_struct]["properties"]["public"]))):
|
||||
key = 'name'
|
||||
name = ""
|
||||
if key in cppHeader.classes[cppHeader_struct]["properties"]["public"][l]:
|
||||
if parent_hier_name != '':
|
||||
name = parent_hier_name + '.' + cppHeader.classes[cppHeader_struct]["properties"]["public"][l][key]
|
||||
else:
|
||||
name = cppHeader.classes[cppHeader_struct]["properties"]["public"][l][key]
|
||||
if name == '':
|
||||
continue
|
||||
key2 = 'type'
|
||||
mtype = ""
|
||||
if key2 in cppHeader.classes[cppHeader_struct]["properties"]["public"][l]:
|
||||
mtype = cppHeader.classes[cppHeader_struct]["properties"]["public"][l][key2]
|
||||
if mtype == '':
|
||||
continue
|
||||
key3 = 'array_size'
|
||||
array_size = ""
|
||||
if key3 in cppHeader.classes[cppHeader_struct]["properties"]["public"][l]:
|
||||
array_size = cppHeader.classes[cppHeader_struct]["properties"]["public"][l][key3]
|
||||
key4 = 'property_of_class'
|
||||
prop = ""
|
||||
if key4 in cppHeader.classes[cppHeader_struct]["properties"]["public"][l]:
|
||||
prop = cppHeader.classes[cppHeader_struct]["properties"]["public"][l][key4]
|
||||
|
||||
str = ''
|
||||
if "union" not in mtype:
|
||||
indent = ""
|
||||
str += " if (std::string(\"" + cppHeader_struct + "::" + name + "\").find(" + apiname.upper() + "_structs_regex" + ") != std::string::npos) {\n"
|
||||
indent = " "
|
||||
str += indent + " roctracer::" + apiname.lower() + "_support::detail::operator<<(out, \"" + name + "=\");\n"
|
||||
str += indent + " roctracer::" + apiname.lower() + "_support::detail::operator<<(out, v." + name + ");\n"
|
||||
str += indent + " roctracer::" + apiname.lower() + "_support::detail::operator<<(out, \", \");\n"
|
||||
str += " }\n"
|
||||
if "void" not in mtype:
|
||||
global_str += str
|
||||
else:
|
||||
if prop != '':
|
||||
next_cppHeader_struct = prop + "::"
|
||||
process_struct(file_handle, next_cppHeader_struct, cppHeader, name, apiname)
|
||||
next_cppHeader_struct = prop + "::" + mtype + " "
|
||||
process_struct(file_handle, next_cppHeader_struct, cppHeader, name, apiname)
|
||||
next_cppHeader_struct = cppHeader_struct + "::"
|
||||
process_struct(file_handle, next_cppHeader_struct, cppHeader, name, apiname)
|
||||
|
||||
# Parses API header file and generates ostream ops files ostream_ops.h
|
||||
def gen_cppheader(infilepath, outfilepath, rank):
|
||||
# infilepath: API Header file to be parsed
|
||||
# outfilepath: Output file where ostream operators are written
|
||||
global global_ops
|
||||
global output_filename_h
|
||||
global apiname
|
||||
global global_str
|
||||
try:
|
||||
cppHeader = CppHeaderParser.CppHeader(infilepath)
|
||||
except CppHeaderParser.CppParseError as e:
|
||||
print(e)
|
||||
sys.exit(1)
|
||||
if rank == 0 or rank == 2:
|
||||
mpath = os.path.dirname(outfilepath)
|
||||
if mpath == "":
|
||||
mpath = os.getcwd()
|
||||
apiname = outfilepath.replace(mpath + "/","")
|
||||
output_filename_h = open(outfilepath,"w+")
|
||||
apiname = apiname.replace("_ostream_ops.h","")
|
||||
apiname = apiname.upper()
|
||||
output_filename_h.write("// automatically generated\n")
|
||||
output_filename_h.write(LICENSE + '\n')
|
||||
header_s = \
|
||||
'#ifndef INC_' + apiname + '_OSTREAM_OPS_H_\n' + \
|
||||
'#define INC_' + apiname + '_OSTREAM_OPS_H_\n' + \
|
||||
'\n' + \
|
||||
'#include "src/core/session/tracer/src/roctracer.h"\n' + \
|
||||
'\n' + \
|
||||
'#ifdef __cplusplus\n' + \
|
||||
'#include <iostream>\n' + \
|
||||
'#include <string>\n'
|
||||
|
||||
output_filename_h.write(header_s)
|
||||
output_filename_h.write('\n')
|
||||
output_filename_h.write('namespace roctracer {\n')
|
||||
output_filename_h.write('namespace ' + apiname.lower() + '_support {\n')
|
||||
output_filename_h.write('static int ' + apiname.upper() + '_depth_max = 1;\n')
|
||||
output_filename_h.write('static int ' + apiname.upper() + '_depth_max_cnt = 0;\n')
|
||||
output_filename_h.write('static std::string ' + apiname.upper() + '_structs_regex = \"\";\n')
|
||||
output_filename_h.write('// begin ostream ops for '+ apiname + ' \n')
|
||||
output_filename_h.write("// basic ostream ops\n")
|
||||
output_filename_h.write(header_basic)
|
||||
output_filename_h.write("// End of basic ostream ops\n\n")
|
||||
|
||||
for c in cppHeader.classes:
|
||||
if c[-2] == ':' and c[-1] == ':': continue #ostream operator cannot be overloaded for anonymous struct therefore it is skipped
|
||||
if "union" in c:
|
||||
continue
|
||||
if c in structs_analyzed:
|
||||
continue
|
||||
if c == 'max_align_t' or c == '__fsid_t': # Skipping as it is defined in multiple domains
|
||||
continue
|
||||
if len(cppHeader.classes[c]["properties"]["public"]) != 0:
|
||||
output_filename_h.write("inline static std::ostream& operator<<(std::ostream& out, const " + c + "& v)\n")
|
||||
output_filename_h.write("{\n")
|
||||
output_filename_h.write(" std::operator<<(out, '{');\n")
|
||||
output_filename_h.write(" " + apiname.upper() + "_depth_max_cnt++;\n")
|
||||
output_filename_h.write(" if (" + apiname.upper() + "_depth_max == -1 || " + apiname.upper() + "_depth_max_cnt <= " + apiname.upper() + "_depth_max" + ") {\n" )
|
||||
process_struct(output_filename_h, c, cppHeader, "", apiname)
|
||||
global_str = "\n".join(global_str.split("\n")[0:-3])
|
||||
if global_str != '': global_str += "\n }\n"
|
||||
output_filename_h.write(global_str)
|
||||
output_filename_h.write(" };\n")
|
||||
output_filename_h.write(" " + apiname.upper() + "_depth_max_cnt--;\n")
|
||||
output_filename_h.write(" std::operator<<(out, '}');\n")
|
||||
output_filename_h.write(" return out;\n")
|
||||
output_filename_h.write("}\n")
|
||||
global_str = ''
|
||||
global_ops += "inline static std::ostream& operator<<(std::ostream& out, const " + c + "& v)\n" + "{\n" + " roctracer::" + apiname.lower() + "_support::detail::operator<<(out, v);\n" + " return out;\n" + "}\n\n"
|
||||
|
||||
if rank == 1 or rank == 2:
|
||||
footer = '// end ostream ops for '+ apiname + ' \n'
|
||||
footer += '};};};\n\n'
|
||||
output_filename_h.write(footer)
|
||||
output_filename_h.write(global_ops)
|
||||
footer = '#endif //__cplusplus\n' + \
|
||||
'#endif // INC_' + apiname + '_OSTREAM_OPS_H_\n' + \
|
||||
' \n'
|
||||
output_filename_h.write(footer)
|
||||
output_filename_h.write('#include <hip/amd_detail/hip_prof_str.h>')
|
||||
output_filename_h.close()
|
||||
print('File ' + outfilepath + ' generated')
|
||||
|
||||
return
|
||||
|
||||
parser = argparse.ArgumentParser(description='genOstreamOps.py: generates ostream operators for all typedefs in provided input file.')
|
||||
requiredNamed = parser.add_argument_group('Required arguments')
|
||||
requiredNamed.add_argument('-in', metavar='fileList', help='Comma separated list of header files to be parsed', required=True)
|
||||
requiredNamed.add_argument('-out', metavar='file', help='Output file with ostream operators', required=True)
|
||||
|
||||
args = vars(parser.parse_args())
|
||||
|
||||
if __name__ == '__main__':
|
||||
flist = args['in'].split(',')
|
||||
if len(flist) == 1:
|
||||
gen_cppheader(flist[0], args['out'],2)
|
||||
else:
|
||||
for i in range(len(flist)):
|
||||
if i == 0:
|
||||
gen_cppheader(flist[i], args['out'],0)
|
||||
elif i == len(flist)-1:
|
||||
gen_cppheader(flist[i], args['out'],1)
|
||||
else:
|
||||
gen_cppheader(flist[i], args['out'],-1)
|
||||
Executable
+581
@@ -0,0 +1,581 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
################################################################################
|
||||
# Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to
|
||||
# deal in the Software without restriction, including without limitation the
|
||||
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
# sell copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
################################################################################
|
||||
|
||||
from __future__ import print_function
|
||||
import os, sys, re
|
||||
|
||||
H_OUT='hsa_prof_str.h'
|
||||
CPP_OUT='hsa_prof_str.inline.h'
|
||||
API_TABLES_H = 'hsa_api_trace.h'
|
||||
API_HEADERS_H = (
|
||||
('CoreApi', 'hsa.h'),
|
||||
('AmdExt', 'hsa_ext_amd.h'),
|
||||
('ImageExt', 'hsa_ext_image.h'),
|
||||
('AmdExt', API_TABLES_H),
|
||||
)
|
||||
|
||||
LICENSE = \
|
||||
'/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.\n' + \
|
||||
'\n' + \
|
||||
' Permission is hereby granted, free of charge, to any person obtaining a copy\n' + \
|
||||
' of this software and associated documentation files (the "Software"), to deal\n' + \
|
||||
' in the Software without restriction, including without limitation the rights\n' + \
|
||||
' to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n' + \
|
||||
' copies of the Software, and to permit persons to whom the Software is\n' + \
|
||||
' furnished to do so, subject to the following conditions:\n' + \
|
||||
'\n' + \
|
||||
' The above copyright notice and this permission notice shall be included in\n' + \
|
||||
' all copies or substantial portions of the Software.\n' + \
|
||||
'\n' + \
|
||||
' THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n' + \
|
||||
' IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n' + \
|
||||
' FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n' + \
|
||||
' AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n' + \
|
||||
' LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n' + \
|
||||
' OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n' + \
|
||||
' THE SOFTWARE. */\n'
|
||||
|
||||
#############################################################
|
||||
# Error handler
|
||||
def fatal(module, msg):
|
||||
print (module + ' Error: "' + msg + '"', file = sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
# Get next text block
|
||||
def NextBlock(pos, record):
|
||||
if len(record) == 0: return pos
|
||||
|
||||
space_pattern = re.compile(r'(\s+)')
|
||||
word_pattern = re.compile(r'([\w\*]+)')
|
||||
if record[pos] != '(':
|
||||
m = space_pattern.match(record, pos)
|
||||
if not m:
|
||||
m = word_pattern.match(record, pos)
|
||||
if m:
|
||||
return pos + len(m.group(1))
|
||||
else:
|
||||
fatal('NextBlock', "bad record '" + record + "' pos(" + str(pos) + ")")
|
||||
else:
|
||||
count = 0
|
||||
for index in range(pos, len(record)):
|
||||
if record[index] == '(':
|
||||
count = count + 1
|
||||
elif record[index] == ')':
|
||||
count = count - 1
|
||||
if count == 0:
|
||||
index = index + 1
|
||||
break
|
||||
if count != 0:
|
||||
fatal('NextBlock', "count is not zero (" + str(count) + ")")
|
||||
if record[index - 1] != ')':
|
||||
fatal('NextBlock', "last char is not ')' '" + record[index - 1] + "'")
|
||||
return index
|
||||
|
||||
#############################################################
|
||||
# API table parser class
|
||||
class API_TableParser:
|
||||
def fatal(self, msg):
|
||||
fatal('API_TableParser', msg)
|
||||
|
||||
def __init__(self, header, name):
|
||||
self.name = name
|
||||
|
||||
if not os.path.isfile(header):
|
||||
self.fatal("file '" + header + "' not found")
|
||||
|
||||
self.inp = open(header, 'r')
|
||||
|
||||
self.beg_pattern = re.compile('^\s*struct\s+' + name + 'Table\s*{\s*$')
|
||||
self.end_pattern = re.compile('^\s*};\s*$')
|
||||
self.array = []
|
||||
self.parse()
|
||||
|
||||
# normalizing a line
|
||||
def norm_line(self, line):
|
||||
return re.sub(r'^\s+', r' ', line[:-1])
|
||||
|
||||
# check for start record
|
||||
def is_start(self, record):
|
||||
return self.beg_pattern.match(record)
|
||||
|
||||
# check for end record
|
||||
def is_end(self, record):
|
||||
return self.end_pattern.match(record)
|
||||
|
||||
# check for declaration entry record
|
||||
def is_entry(self, record):
|
||||
return re.match(r'^\s*decltype\(([^\)]*)\)', record)
|
||||
|
||||
# parse method
|
||||
def parse(self):
|
||||
active = 0
|
||||
for line in self.inp.readlines():
|
||||
record = self.norm_line(line)
|
||||
if self.is_start(record): active = 1
|
||||
if active != 0:
|
||||
if self.is_end(record): return
|
||||
m = self.is_entry(record)
|
||||
if m:
|
||||
self.array.append(m.group(1))
|
||||
|
||||
#############################################################
|
||||
# API declaration parser class
|
||||
class API_DeclParser:
|
||||
def fatal(self, msg):
|
||||
fatal('API_DeclParser', msg)
|
||||
|
||||
def __init__(self, header, array, data):
|
||||
if not os.path.isfile(header):
|
||||
self.fatal("file '" + header + "' not found")
|
||||
|
||||
self.inp = open(header, 'r')
|
||||
|
||||
self.end_pattern = re.compile('\);\s*$')
|
||||
self.data = data
|
||||
for call in array:
|
||||
if call in data:
|
||||
self.fatal(call + ' is already found')
|
||||
self.parse(call)
|
||||
|
||||
# api record filter
|
||||
def api_filter(self, record):
|
||||
record = re.sub(r'\sHSA_API\s', r' ', record)
|
||||
record = re.sub(r'\sHSA_DEPRECATED\s', r' ', record)
|
||||
return record
|
||||
|
||||
# check for start record
|
||||
def is_start(self, call, record):
|
||||
return re.search('\s' + call + '\s*\(', record)
|
||||
|
||||
# check for API method record
|
||||
def is_api(self, call, record):
|
||||
record = self.api_filter(record)
|
||||
return re.match('\s+\S+\s+' + call + '\s*\(', record)
|
||||
|
||||
# check for end record
|
||||
def is_end(self, record):
|
||||
return self.end_pattern.search(record)
|
||||
|
||||
# parse method args
|
||||
def get_args(self, record):
|
||||
struct = {'ret': '', 'args': '', 'astr': {}, 'alst': [], 'tlst': []}
|
||||
record = re.sub(r'^\s+', r'', record)
|
||||
record = re.sub(r'\s*(\*+)\s*', r'\1 ', record)
|
||||
rind = NextBlock(0, record)
|
||||
struct['ret'] = record[0:rind]
|
||||
pos = record.find('(')
|
||||
end = NextBlock(pos, record);
|
||||
args = record[pos:end]
|
||||
args = re.sub(r'^\(\s*', r'', args)
|
||||
args = re.sub(r'\s*\)$', r'', args)
|
||||
args = re.sub(r'\s*,\s*', r',', args)
|
||||
struct['args'] = re.sub(r',', r', ', args)
|
||||
if len(args) == 0: return struct
|
||||
|
||||
pos = 0
|
||||
args = args + ','
|
||||
while pos < len(args):
|
||||
ind1 = NextBlock(pos, args) # type
|
||||
ind2 = NextBlock(ind1, args) # space
|
||||
if args[ind2] != '(':
|
||||
while ind2 < len(args):
|
||||
end = NextBlock(ind2, args)
|
||||
if args[end] == ',': break
|
||||
else: ind2 = end
|
||||
name = args[ind2:end]
|
||||
else:
|
||||
ind3 = NextBlock(ind2, args) # field
|
||||
m = re.match(r'\(\s*\*\s*(\S+)\s*\)', args[ind2:ind3])
|
||||
if not m:
|
||||
self.fatal("bad block3 '" + args + "' : '" + args[ind2:ind3] + "'")
|
||||
name = m.group(1)
|
||||
end = NextBlock(ind3, args) # the rest
|
||||
item = args[pos:end]
|
||||
struct['astr'][name] = item
|
||||
struct['alst'].append(name)
|
||||
struct['tlst'].append(item)
|
||||
if args[end] != ',':
|
||||
self.fatal("no comma '" + args + "'")
|
||||
pos = end + 1
|
||||
|
||||
return struct
|
||||
|
||||
# parse given api
|
||||
def parse(self, call):
|
||||
record = ''
|
||||
active = 0
|
||||
found = 0
|
||||
api_name = ''
|
||||
prev_line = ''
|
||||
|
||||
self.inp.seek(0)
|
||||
for line in self.inp.readlines():
|
||||
record += ' ' + line[:-1]
|
||||
record = re.sub(r'^\s*', r' ', record)
|
||||
|
||||
if active == 0:
|
||||
if self.is_start(call, record):
|
||||
active = 1
|
||||
m = self.is_api(call, record)
|
||||
if not m:
|
||||
record = ' ' + prev_line + ' ' + record
|
||||
m = self.is_api(call, record)
|
||||
if not m:
|
||||
self.fatal("bad api '" + line + "'")
|
||||
|
||||
if active == 1:
|
||||
if self.is_end(record):
|
||||
self.data[call] = self.get_args(record)
|
||||
active = 0
|
||||
found = 0
|
||||
|
||||
if active == 0: record = ''
|
||||
prev_line = line
|
||||
|
||||
#############################################################
|
||||
# API description parser class
|
||||
class API_DescrParser:
|
||||
def fatal(self, msg):
|
||||
fatal('API_DescrParser', msg)
|
||||
|
||||
def __init__(self, out_h_file, hsa_dir, api_table_h, api_headers, license):
|
||||
out_macro = re.sub(r'[\/\.]', r'_', out_h_file.upper()) + '_'
|
||||
|
||||
self.h_content = ''
|
||||
self.cpp_content = ''
|
||||
self.api_names = []
|
||||
self.api_calls = {}
|
||||
self.api_rettypes = set()
|
||||
self.api_id = {}
|
||||
|
||||
api_data = {}
|
||||
api_list = []
|
||||
ns_calls = []
|
||||
|
||||
for i in range(0, len(api_headers)):
|
||||
(name, header) = api_headers[i]
|
||||
|
||||
if i < len(api_headers) - 1:
|
||||
api = API_TableParser(hsa_dir + api_table_h, name)
|
||||
api_list = api.array
|
||||
self.api_names.append(name)
|
||||
self.api_calls[name] = api_list
|
||||
else:
|
||||
api_list = ns_calls
|
||||
ns_calls = []
|
||||
|
||||
for call in api_list:
|
||||
if call in api_data:
|
||||
self.fatal("call '" + call + "' is already found")
|
||||
|
||||
API_DeclParser(hsa_dir + header, api_list, api_data)
|
||||
|
||||
for call in api_list:
|
||||
if not call in api_data:
|
||||
# Not-supported functions
|
||||
ns_calls.append(call)
|
||||
else:
|
||||
# API ID map
|
||||
self.api_id[call] = 'HSA_API_ID_' + call
|
||||
# Return types
|
||||
self.api_rettypes.add(api_data[call]['ret'])
|
||||
|
||||
self.api_rettypes.discard('void')
|
||||
self.api_data = api_data
|
||||
self.ns_calls = ns_calls
|
||||
|
||||
self.h_content += "/* Generated by " + os.path.basename(__file__) + " */\n" + license + "\n\n"
|
||||
|
||||
self.h_content += "/* HSA API tracing primitives\n"
|
||||
for (name, header) in api_headers:
|
||||
self.h_content += " '" + name + "', header '" + header + "', " + str(len(self.api_calls[name])) + ' funcs\n'
|
||||
for call in self.ns_calls:
|
||||
self.h_content += ' ' + call + ' was not parsed\n'
|
||||
self.h_content += " */\n"
|
||||
self.h_content += '\n'
|
||||
self.h_content += '#ifndef ' + out_macro + '\n'
|
||||
self.h_content += '#define ' + out_macro + '\n'
|
||||
|
||||
self.h_content += self.add_section('API ID enumeration', ' ', self.gen_id_enum)
|
||||
|
||||
self.h_content += '/* Declarations of APIs intended for use only by tools. */\n'
|
||||
self.h_content += 'typedef void (*hsa_amd_queue_intercept_packet_writer)(const void*, uint64_t);\n'
|
||||
self.h_content += 'typedef void (*hsa_amd_queue_intercept_handler)(const void*, uint64_t, uint64_t, void*,\n'
|
||||
self.h_content += ' hsa_amd_queue_intercept_packet_writer);\n'
|
||||
self.h_content += 'typedef void (*hsa_amd_runtime_queue_notifier)(const hsa_queue_t*, hsa_agent_t, void*);\n'
|
||||
|
||||
self.h_content += self.add_section('API arg structure', ' ', self.gen_arg_struct)
|
||||
self.h_content += self.add_section('API output stream', ' ', self.gen_out_stream)
|
||||
self.h_content += '#endif /* ' + out_macro + ' */\n'
|
||||
|
||||
self.cpp_content += "/* Generated by " + os.path.basename(__file__) + " */\n" + license + "\n\n"
|
||||
|
||||
self.cpp_content += '#include <hsa/hsa_api_trace.h>\n'
|
||||
self.cpp_content += '#include <atomic>\n'
|
||||
self.cpp_content += 'namespace roctracer::hsa_support::detail {\n'
|
||||
|
||||
self.cpp_content += 'static CoreApiTable CoreApi_saved_before_cb;\n'
|
||||
self.cpp_content += 'static AmdExtTable AmdExt_saved_before_cb;\n'
|
||||
self.cpp_content += 'static ImageExtTable ImageExt_saved_before_cb;\n\n'
|
||||
|
||||
self.cpp_content += self.add_section('API callback functions', '', self.gen_callbacks)
|
||||
self.cpp_content += self.add_section('API intercepting code', '', self.gen_intercept)
|
||||
self.cpp_content += self.add_section('API get_name function', ' ', self.gen_get_name)
|
||||
self.cpp_content += self.add_section('API get_code function', ' ', self.gen_get_code)
|
||||
self.cpp_content += '\n};\n'
|
||||
|
||||
# add code section
|
||||
def add_section(self, title, gap, fun):
|
||||
content = ''
|
||||
n = 0
|
||||
content += '\n/* section: ' + title + ' */\n\n'
|
||||
content += fun(-1, '-', '-', {})
|
||||
for index in range(len(self.api_names)):
|
||||
last = (index == len(self.api_names) - 1)
|
||||
name = self.api_names[index]
|
||||
if n != 0:
|
||||
if gap == '': content += fun(n, name, '-', {})
|
||||
content += '\n'
|
||||
content += gap + '/* block: ' + name + ' API */\n'
|
||||
for call in self.api_calls[name]:
|
||||
content += fun(n, name, call, self.api_data[call])
|
||||
n += 1
|
||||
content += fun(n, '-', '-', {})
|
||||
return content
|
||||
|
||||
# generate API ID enumeration
|
||||
def gen_id_enum(self, n, name, call, data):
|
||||
content = ''
|
||||
if n == -1:
|
||||
content += 'enum hsa_api_id_t {\n'
|
||||
return content
|
||||
if call != '-':
|
||||
content += ' ' + self.api_id[call] + ' = ' + str(n) + ',\n'
|
||||
else:
|
||||
content += '\n'
|
||||
content += ' HSA_API_ID_DISPATCH = ' + str(n) + ',\n'
|
||||
content += ' HSA_API_ID_NUMBER = ' + str(n + 1) + ',\n'
|
||||
content += '};\n'
|
||||
return content
|
||||
|
||||
# generate API args structure
|
||||
def gen_arg_struct(self, n, name, call, struct):
|
||||
content = ''
|
||||
if n == -1:
|
||||
content += 'struct hsa_api_data_t {\n'
|
||||
content += ' uint64_t correlation_id;\n'
|
||||
content += ' uint32_t phase;\n'
|
||||
content += ' union {\n'
|
||||
for ret_type in self.api_rettypes:
|
||||
content += ' ' + ret_type + ' ' + ret_type + '_retval;\n'
|
||||
content += ' };\n'
|
||||
content += ' union {\n'
|
||||
return content
|
||||
if call != '-':
|
||||
content += ' struct {\n'
|
||||
for (var, item) in struct['astr'].items():
|
||||
content += ' ' + item + ';\n'
|
||||
if call == "hsa_amd_memory_async_copy_rect" and item == "const hsa_dim3_t* range":
|
||||
content += ' hsa_dim3_t range__val;\n'
|
||||
content += ' } ' + call + ';\n'
|
||||
else:
|
||||
content += ' } args;\n'
|
||||
content += ' uint64_t *phase_data;\n'
|
||||
content += '};\n'
|
||||
return content
|
||||
|
||||
# generate API callbacks
|
||||
def gen_callbacks(self, n, name, call, struct):
|
||||
content = ''
|
||||
if n == -1:
|
||||
content += '/* section: Static declarations */\n'
|
||||
content += '\n'
|
||||
if call != '-':
|
||||
call_id = self.api_id[call];
|
||||
ret_type = struct['ret']
|
||||
content += 'static ' + ret_type + ' ' + call + '_callback(' + struct['args'] + ') {\n'
|
||||
|
||||
content += ' hsa_trace_data_t trace_data;\n'
|
||||
content += ' bool enabled{false};\n'
|
||||
content += '\n'
|
||||
content += ' if (auto function = report_activity.load(std::memory_order_relaxed); function &&\n'
|
||||
content += ' (enabled =\n'
|
||||
content += ' function(ACTIVITY_DOMAIN_HSA_API, ' + call_id + ', &trace_data) == 0)) {\n'
|
||||
content += ' if (trace_data.phase_enter != nullptr) {\n'
|
||||
|
||||
for var in struct['alst']:
|
||||
item = struct['astr'][var];
|
||||
if re.search(r'char\* ', item):
|
||||
# FIXME: we should not strdup the char* arguments here, as the callback will not outlive the scope of this function. Instead, we
|
||||
# should generate a helper function to capture the content of the arguments similar to hipApiArgsInit for HIP. We also need a
|
||||
# helper to free the memory that is allocated to capture the content.
|
||||
content += ' trace_data.api_data.args.' + call + '.' + var + ' = ' + '(' + var + ' != NULL) ? strdup(' + var + ')' + ' : NULL;\n'
|
||||
else:
|
||||
content += ' trace_data.api_data.args.' + call + '.' + var + ' = ' + var + ';\n'
|
||||
if call == 'hsa_amd_memory_async_copy_rect' and var == 'range':
|
||||
content += ' trace_data.api_data.args.' + call + '.' + var + '__val = ' + '*(' + var + ');\n'
|
||||
|
||||
content += ' trace_data.phase_enter(' + call_id + ', &trace_data);\n'
|
||||
content += ' }\n'
|
||||
content += ' }\n'
|
||||
content += '\n'
|
||||
|
||||
if ret_type != 'void':
|
||||
content += ' trace_data.api_data.' + ret_type + '_retval = '
|
||||
content += ' ' + name + '_saved_before_cb.' + call + '_fn(' + ', '.join(struct['alst']) + ');\n'
|
||||
|
||||
content += '\n'
|
||||
content += ' if (enabled && trace_data.phase_exit != nullptr)\n'
|
||||
content += ' trace_data.phase_exit(' + call_id + ', &trace_data);\n'
|
||||
|
||||
if ret_type != 'void':
|
||||
content += ' return trace_data.api_data.' + ret_type + '_retval;\n'
|
||||
content += '}\n'
|
||||
|
||||
return content
|
||||
|
||||
# generate API intercepting code
|
||||
def gen_intercept(self, n, name, call, struct):
|
||||
content = ''
|
||||
if n > 0 and call == '-':
|
||||
content += '};\n'
|
||||
if n == 0 or (call == '-' and name != '-'):
|
||||
content += 'static void Install' + name + 'Wrappers(' + name + 'Table* table) {\n'
|
||||
content += ' ' + name + '_saved_before_cb = *table;\n'
|
||||
if call != '-':
|
||||
if call != 'hsa_shut_down':
|
||||
content += ' table->' + call + '_fn = ' + call + '_callback;\n'
|
||||
else:
|
||||
content += ' { void* p = (void*)' + call + '_callback; (void)p; }\n'
|
||||
return content
|
||||
|
||||
# generate API name function
|
||||
def gen_get_name(self, n, name, call, struct):
|
||||
content = ''
|
||||
if n == -1:
|
||||
content += 'static const char* GetApiName(uint32_t id) {\n'
|
||||
content += ' switch (id) {\n'
|
||||
return content
|
||||
if call != '-':
|
||||
content += ' case ' + self.api_id[call] + ': return "' + call + '";\n'
|
||||
else:
|
||||
content += ' }\n'
|
||||
content += ' return "unknown";\n'
|
||||
content += '}\n'
|
||||
return content
|
||||
|
||||
# generate API code function
|
||||
def gen_get_code(self, n, name, call, struct):
|
||||
content = ''
|
||||
if n == -1:
|
||||
content += 'static uint32_t GetApiCode(const char* str) {\n'
|
||||
return content
|
||||
if call != '-':
|
||||
content += ' if (strcmp("' + call + '", str) == 0) return ' + self.api_id[call] + ';\n'
|
||||
else:
|
||||
content += ' return HSA_API_ID_NUMBER;\n'
|
||||
content += '}\n'
|
||||
return content
|
||||
|
||||
# generate stream operator
|
||||
def gen_out_stream(self, n, name, call, struct):
|
||||
content = ''
|
||||
if n == -1:
|
||||
content += '#ifdef __cplusplus\n'
|
||||
content += '#include "hsa_ostream_ops.h"\n'
|
||||
content += 'typedef std::pair<uint32_t, hsa_api_data_t> hsa_api_data_pair_t;\n'
|
||||
content += 'inline std::ostream& operator<< (std::ostream& out, const hsa_api_data_pair_t& data_pair) {\n'
|
||||
content += ' const uint32_t cid = data_pair.first;\n'
|
||||
content += ' const hsa_api_data_t& api_data = data_pair.second;\n'
|
||||
content += ' switch(cid) {\n'
|
||||
return content
|
||||
if call != '-':
|
||||
content += ' case ' + self.api_id[call] + ': {\n'
|
||||
content += ' out << "' + call + '(";\n'
|
||||
arg_list = struct['alst']
|
||||
if len(arg_list) != 0:
|
||||
for ind in range(len(arg_list)):
|
||||
arg_var = arg_list[ind]
|
||||
arg_val = 'api_data.args.' + call + '.' + arg_var
|
||||
if re.search(r'char\* ', struct['astr'][arg_var]):
|
||||
content += ' out << "0x" << std::hex << (uint64_t)' + arg_val
|
||||
else:
|
||||
content += ' out << ' + arg_val
|
||||
if call == "hsa_amd_memory_async_copy_rect" and arg_var == "range":
|
||||
content += ' << ", ";\n'
|
||||
content += ' out << ' + arg_val + '__val'
|
||||
'''
|
||||
arg_item = struct['tlst'][ind]
|
||||
if re.search(r'\(\* ', arg_item): arg_pref = ''
|
||||
elif re.search(r'void\* ', arg_item): arg_pref = ''
|
||||
elif re.search(r'\*\* ', arg_item): arg_pref = '**'
|
||||
elif re.search(r'\* ', arg_item): arg_pref = '*'
|
||||
else: arg_pref = ''
|
||||
if arg_pref != '':
|
||||
content += ' if (' + arg_val + ') out << ' + arg_pref + '(' + arg_val + '); else out << ' + arg_val
|
||||
else:
|
||||
content += ' out << ' + arg_val
|
||||
'''
|
||||
if ind < len(arg_list) - 1: content += ' << ", ";\n'
|
||||
else: content += ';\n'
|
||||
if struct['ret'] != 'void':
|
||||
content += ' out << ") = " << api_data.' + struct['ret'] + '_retval;\n'
|
||||
else:
|
||||
content += ' out << ") = void";\n'
|
||||
content += ' break;\n'
|
||||
content += ' }\n'
|
||||
else:
|
||||
content += ' default:\n'
|
||||
content += ' out << "ERROR: unknown API";\n'
|
||||
content += ' abort();\n'
|
||||
content += ' }\n'
|
||||
content += ' return out;\n'
|
||||
content += '}\n'
|
||||
content += '#endif\n'
|
||||
return content
|
||||
|
||||
#############################################################
|
||||
# main
|
||||
# Usage
|
||||
if len(sys.argv) != 3:
|
||||
print ("Usage:", sys.argv[0], " <OUT prefix> <HSA runtime include path>", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
else:
|
||||
PREFIX = sys.argv[1] + '/'
|
||||
HSA_DIR = sys.argv[2] + '/'
|
||||
|
||||
descr = API_DescrParser(H_OUT, HSA_DIR, API_TABLES_H, API_HEADERS_H, LICENSE)
|
||||
|
||||
out_file = PREFIX + H_OUT
|
||||
print ('Generating "' + out_file + '"')
|
||||
f = open(out_file, 'w')
|
||||
f.write(descr.h_content[:-1])
|
||||
f.close()
|
||||
|
||||
out_file = PREFIX + CPP_OUT
|
||||
print ('Generating "' + out_file + '"')
|
||||
f = open(out_file, 'w')
|
||||
f.write(descr.cpp_content[:-1])
|
||||
f.close()
|
||||
#############################################################
|
||||
@@ -20,20 +20,5 @@
|
||||
# THE SOFTWARE.
|
||||
################################################################################
|
||||
|
||||
#
|
||||
# Build dynamic Library object
|
||||
#
|
||||
set ( TARGET_LIB "${TARGET_NAME}" )
|
||||
set ( LIB_SRC
|
||||
${LIB_DIR}/core/rocprofiler.cpp
|
||||
${LIB_DIR}/core/gpu_command.cpp
|
||||
${LIB_DIR}/core/proxy_queue.cpp
|
||||
${LIB_DIR}/core/simple_proxy_queue.cpp
|
||||
${LIB_DIR}/core/intercept_queue.cpp
|
||||
${LIB_DIR}/core/metrics.cpp
|
||||
${LIB_DIR}/core/activity.cpp
|
||||
${LIB_DIR}/util/hsa_rsrc_factory.cpp
|
||||
)
|
||||
add_library ( ${TARGET_LIB} SHARED ${LIB_SRC} )
|
||||
target_include_directories ( ${TARGET_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} )
|
||||
target_link_libraries( ${TARGET_LIB} PRIVATE hsa-runtime64::hsa-runtime64 c stdc++ )
|
||||
add_subdirectory(api)
|
||||
add_subdirectory(tools)
|
||||
|
||||
@@ -0,0 +1,258 @@
|
||||
# ############################################################################################################################################
|
||||
# ROCMTOOL General Requirements
|
||||
# ############################################################################################################################################
|
||||
find_package(Python3 COMPONENTS Interpreter REQUIRED)
|
||||
|
||||
execute_process(COMMAND ${Python3_EXECUTABLE} -c "import lxml"
|
||||
RESULT_VARIABLE CPP_HEADER_PARSER
|
||||
OUTPUT_QUIET)
|
||||
|
||||
if(NOT ${CPP_HEADER_PARSER} EQUAL 0)
|
||||
message(FATAL_ERROR "\
|
||||
The \"lxml\" Python3 package is not installed. \
|
||||
Please install it using the following command: \"${Python3_EXECUTABLE} -m pip install lxml\".\
|
||||
")
|
||||
endif()
|
||||
|
||||
execute_process(COMMAND ${Python3_EXECUTABLE} -c "import CppHeaderParser"
|
||||
RESULT_VARIABLE CPP_HEADER_PARSER
|
||||
OUTPUT_QUIET)
|
||||
|
||||
if(NOT ${CPP_HEADER_PARSER} EQUAL 0)
|
||||
message(FATAL_ERROR "\
|
||||
The \"CppHeaderParser\" Python3 package is not installed. \
|
||||
Please install it using the following command: \"${Python3_EXECUTABLE} -m pip install CppHeaderParser\".\
|
||||
")
|
||||
endif()
|
||||
|
||||
# Setting Default Binary output directory
|
||||
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR})
|
||||
|
||||
# Getting HSA Include Directory
|
||||
get_property(HSA_RUNTIME_INCLUDE_DIRECTORIES TARGET hsa-runtime64::hsa-runtime64 PROPERTY INTERFACE_INCLUDE_DIRECTORIES)
|
||||
find_file(HSA_H hsa.h
|
||||
PATHS ${HSA_RUNTIME_INCLUDE_DIRECTORIES}
|
||||
PATH_SUFFIXES hsa
|
||||
NO_DEFAULT_PATH
|
||||
REQUIRED)
|
||||
get_filename_component(HSA_RUNTIME_INC_PATH ${HSA_H} DIRECTORY)
|
||||
|
||||
find_library(AQLPROFILE_LIB "libhsa-amd-aqlprofile64.so" HINTS ${CMAKE_PREFIX_PATH} PATHS ${ROCM_PATH} PATH_SUFFIXES lib)
|
||||
|
||||
if(NOT AQLPROFILE_LIB)
|
||||
message(FATAL_ERROR "AQL_PROFILE not installed. Please install hsa-amd-aqlprofile!")
|
||||
endif()
|
||||
|
||||
# ############################################################################################################################################
|
||||
# Adding Old Library Files
|
||||
# ############################################################################################################################################
|
||||
set (OLD_LIB_SRC
|
||||
${LIB_DIR}/core/rocprofiler.cpp
|
||||
${LIB_DIR}/core/gpu_command.cpp
|
||||
${LIB_DIR}/core/proxy_queue.cpp
|
||||
${LIB_DIR}/core/simple_proxy_queue.cpp
|
||||
${LIB_DIR}/core/intercept_queue.cpp
|
||||
${LIB_DIR}/core/metrics.cpp
|
||||
${LIB_DIR}/core/activity.cpp
|
||||
${LIB_DIR}/util/hsa_rsrc_factory.cpp
|
||||
)
|
||||
|
||||
# ############################################################################################################################################
|
||||
# Configuring Basic/Derived Counters
|
||||
# ############################################################################################################################################
|
||||
set(COUNTERS_DIR ${PROJECT_SOURCE_DIR}/src/core/counters)
|
||||
|
||||
execute_process(
|
||||
COMMAND ${Python3_EXECUTABLE} ${COUNTERS_DIR}/basic/xml_parser_basic.py ${COUNTERS_DIR}/basic ${CMAKE_CURRENT_BINARY_DIR}/basic_counter.cpp
|
||||
COMMENT "Generating basic_counter.cpp...")
|
||||
|
||||
# execute_process(
|
||||
# COMMAND ${Python3_EXECUTABLE} ${COUNTERS_DIR}/derived/xml_parser_derived.py ${COUNTERS_DIR}/derived ${CMAKE_CURRENT_BINARY_DIR}/derived_counter.cpp
|
||||
# COMMENT "Generating derived_counter.cpp...")
|
||||
|
||||
# ############################################################################################################################################
|
||||
# ROCMTOOL Tracer HIP/HSA Parsing
|
||||
# ############################################################################################################################################
|
||||
get_property(HIP_INCLUDE_DIRECTORIES TARGET hip::amdhip64 PROPERTY INTERFACE_INCLUDE_DIRECTORIES)
|
||||
find_file(HIP_RUNTIME_API_H hip_runtime_api.h
|
||||
PATHS ${HIP_INCLUDE_DIRECTORIES}
|
||||
PATH_SUFFIXES hip
|
||||
NO_DEFAULT_PATH
|
||||
REQUIRED)
|
||||
|
||||
# # Generate the HSA wrapper functions header
|
||||
add_custom_command(
|
||||
OUTPUT hsa_prof_str.h hsa_prof_str.inline.h
|
||||
COMMAND ${Python3_EXECUTABLE} ${PROJECT_SOURCE_DIR}/script/hsaap.py ${CMAKE_CURRENT_BINARY_DIR} "${HSA_RUNTIME_INC_PATH}" > /dev/null
|
||||
DEPENDS ${PROJECT_SOURCE_DIR}/script/hsaap.py
|
||||
"${HSA_RUNTIME_INC_PATH}/hsa.h" "${HSA_RUNTIME_INC_PATH}/hsa_ext_amd.h"
|
||||
"${HSA_RUNTIME_INC_PATH}/hsa_ext_image.h" "${HSA_RUNTIME_INC_PATH}/hsa_api_trace.h"
|
||||
COMMENT "Generating hsa_prof_str.h,hsa_prof_str.inline.h...")
|
||||
|
||||
# # Generate the HSA pretty printers
|
||||
add_custom_command(
|
||||
OUTPUT hsa_ostream_ops.h
|
||||
COMMAND ${CMAKE_C_COMPILER} -E "${HSA_RUNTIME_INC_PATH}/hsa.h" -o hsa.h.i
|
||||
COMMAND ${CMAKE_C_COMPILER} -E "${HSA_RUNTIME_INC_PATH}/hsa_ext_amd.h" -o hsa_ext_amd.h.i
|
||||
BYPRODUCTS hsa.h.i hsa_ext_amd.h.i
|
||||
COMMAND ${Python3_EXECUTABLE} ${PROJECT_SOURCE_DIR}/script/gen_ostream_ops.py
|
||||
-in hsa.h.i,hsa_ext_amd.h.i -out hsa_ostream_ops.h > /dev/null
|
||||
DEPENDS ${PROJECT_SOURCE_DIR}/script/gen_ostream_ops.py
|
||||
"${HSA_RUNTIME_INC_PATH}/hsa.h" "${HSA_RUNTIME_INC_PATH}/hsa_ext_amd.h"
|
||||
COMMENT "Generating hsa_ostream_ops.h...")
|
||||
|
||||
get_property(HIP_INCLUDE_DIRECTORIES TARGET hip::amdhip64 PROPERTY INTERFACE_INCLUDE_DIRECTORIES)
|
||||
find_file(HIP_RUNTIME_API_H hip_runtime_api.h
|
||||
PATHS ${HIP_INCLUDE_DIRECTORIES}
|
||||
PATH_SUFFIXES hip
|
||||
NO_DEFAULT_PATH
|
||||
REQUIRED)
|
||||
|
||||
## Generate the HIP pretty printers
|
||||
add_custom_command(
|
||||
OUTPUT hip_ostream_ops.h
|
||||
COMMAND ${CMAKE_C_COMPILER} "$<$<BOOL:${HIP_INCLUDE_DIRECTORIES}>:-I$<JOIN:${HIP_INCLUDE_DIRECTORIES},$<SEMICOLON>-I>>"
|
||||
-E "${HIP_RUNTIME_API_H}" -D__HIP_PLATFORM_HCC__=1 -D__HIP_ROCclr__=1 -o hip_runtime_api.h.i
|
||||
BYPRODUCTS hip_runtime_api.h.i
|
||||
COMMAND ${Python3_EXECUTABLE} ${PROJECT_SOURCE_DIR}/script/gen_ostream_ops.py
|
||||
-in hip_runtime_api.h.i -out hip_ostream_ops.h > /dev/null
|
||||
DEPENDS ${PROJECT_SOURCE_DIR}/script/gen_ostream_ops.py "${HIP_RUNTIME_API_H}"
|
||||
COMMENT "Generating hip_ostream_ops.h..."
|
||||
COMMAND_EXPAND_LISTS)
|
||||
|
||||
set(GENERATED_SOURCES
|
||||
hip_ostream_ops.h
|
||||
hsa_prof_str.h
|
||||
hsa_ostream_ops.h
|
||||
hsa_prof_str.inline.h)
|
||||
|
||||
# ############################################################################################################################################
|
||||
# ROCMTOOL API
|
||||
# ############################################################################################################################################
|
||||
# PC sampling uses libpciaccess as a fallback if the debugfs ioctl is
|
||||
# unavailable
|
||||
find_path(PCIACCESS_INCLUDE_DIR pciaccess.h REQUIRED)
|
||||
find_library(PCIACCESS_LIBRARIES pciaccess REQUIRED)
|
||||
|
||||
set(PUBLIC_HEADERS
|
||||
rocprofiler_plugin.h
|
||||
rocprofiler.h)
|
||||
|
||||
foreach(header ${PUBLIC_HEADERS})
|
||||
install(FILES ${PROJECT_SOURCE_DIR}/inc/${header}
|
||||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}
|
||||
COMPONENT dev)
|
||||
endforeach()
|
||||
|
||||
# Getting Source files for ROCProfiler, Hardware, HSA, Memory, Session, Counters, Utils
|
||||
file(GLOB ROCPROFILER_SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp)
|
||||
|
||||
file(GLOB ROCPROFILER_PROFILER_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/session/profiler/profiler.cpp)
|
||||
file(GLOB ROCPROFILER_TRACER_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/session/tracer/*.cpp)
|
||||
file(GLOB ROCPROFILER_ROCTRACER_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/session/tracer/src/*.cpp)
|
||||
file(GLOB ROCMTOOL_CLASS_SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/rocmtool.cpp)
|
||||
file(GLOB ROCPROFILER_SPM_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/session/spm/spm.cpp)
|
||||
|
||||
|
||||
set(CORE_HARDWARE_DIR ${PROJECT_SOURCE_DIR}/src/core/hardware)
|
||||
file(GLOB CORE_HARDWARE_SRC_FILES ${CORE_HARDWARE_DIR}/*.cpp)
|
||||
|
||||
set(CORE_HSA_QUEUES_DIR ${PROJECT_SOURCE_DIR}/src/core/hsa/queues)
|
||||
file(GLOB CORE_HSA_QUEUES_SRC_FILES ${CORE_HSA_QUEUES_DIR}/queue.cpp)
|
||||
|
||||
set(CORE_HSA_PACKETS_DIR ${PROJECT_SOURCE_DIR}/src/core/hsa/packets)
|
||||
file(GLOB CORE_HSA_PACKETS_SRC_FILES ${CORE_HSA_PACKETS_DIR}/packets_generator.cpp)
|
||||
|
||||
set(CORE_HSA_DIR ${PROJECT_SOURCE_DIR}/src/core/hsa)
|
||||
file(GLOB CORE_HSA_SRC_FILES ${CORE_HSA_DIR}/*.cpp)
|
||||
|
||||
set(CORE_MEMORY_DIR ${PROJECT_SOURCE_DIR}/src/core/memory)
|
||||
file(GLOB CORE_MEMORY_SRC_FILES ${CORE_MEMORY_DIR}/*.cpp)
|
||||
|
||||
set(CORE_SESSION_DIR ${PROJECT_SOURCE_DIR}/src/core/session)
|
||||
file(GLOB CORE_SESSION_SRC_FILES ${CORE_SESSION_DIR}/session.cpp)
|
||||
file(GLOB CORE_FILTER_SRC_FILES ${CORE_SESSION_DIR}/filter.cpp)
|
||||
file(GLOB CORE_DEVICE_PROFILING_SRC_FILES ${CORE_SESSION_DIR}/device_profiling.cpp)
|
||||
|
||||
file(GLOB CORE_COUNTERS_SRC_FILES ${PROJECT_BINARY_DIR}/src/api/*_counter.cpp)
|
||||
file(GLOB CORE_COUNTERS_PARENT_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/counters/*.cpp)
|
||||
file(GLOB CORE_COUNTERS_METRICS_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/counters/metrics/*.cpp)
|
||||
|
||||
set(CORE_UTILS_DIR ${PROJECT_SOURCE_DIR}/src/utils)
|
||||
file(GLOB CORE_UTILS_SRC_FILES ${CORE_UTILS_DIR}/*.cpp)
|
||||
|
||||
set(CORE_PC_SAMPLING_DIR ${PROJECT_SOURCE_DIR}/src/pcsampler)
|
||||
file(GLOB CORE_PC_SAMPLING_FILES ${CORE_PC_SAMPLING_DIR}/core/*.cpp ${CORE_PC_SAMPLING_DIR}/gfxip/*.cpp ${CORE_PC_SAMPLING_DIR}/session/*.cpp)
|
||||
|
||||
# Compiling/Installing ROCProfiler API
|
||||
add_library(${ROCPROFILER_TARGET} SHARED
|
||||
${ROCPROFILER_SRC_FILES}
|
||||
${ROCMTOOL_CLASS_SRC_FILES}
|
||||
${ROCPROFILER_PROFILER_SRC_FILES}
|
||||
${CORE_HARDWARE_SRC_FILES}
|
||||
${CORE_HSA_SRC_FILES}
|
||||
${ROCPROFILER_SPM_SRC_FILES}
|
||||
${CORE_MEMORY_SRC_FILES}
|
||||
${CORE_SESSION_SRC_FILES}
|
||||
${CORE_FILTER_SRC_FILES}
|
||||
${CORE_DEVICE_PROFILING_SRC_FILES}
|
||||
${CORE_COUNTERS_PARENT_SRC_FILES}
|
||||
${CORE_COUNTERS_METRICS_SRC_FILES}
|
||||
${CORE_UTILS_SRC_FILES}
|
||||
${CORE_HSA_PACKETS_SRC_FILES}
|
||||
${CORE_HSA_QUEUES_SRC_FILES}
|
||||
${ROCPROFILER_TRACER_SRC_FILES}
|
||||
${ROCPROFILER_ROCTRACER_SRC_FILES}
|
||||
${GENERATED_SOURCES}
|
||||
${CORE_COUNTERS_SRC_FILES}
|
||||
${CORE_PC_SAMPLING_FILES}
|
||||
${OLD_LIB_SRC})
|
||||
|
||||
set_target_properties(${ROCPROFILER_TARGET} PROPERTIES
|
||||
CXX_VISIBILITY_PRESET hidden
|
||||
DEFINE_SYMBOL "ROCPROFILER_EXPORTS"
|
||||
LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/exportmap
|
||||
VERSION ${PROJECT_VERSION}
|
||||
SOVERSION ${PROJECT_VERSION_MAJOR})
|
||||
|
||||
target_compile_definitions(${ROCPROFILER_TARGET}
|
||||
PUBLIC AMD_INTERNAL_BUILD
|
||||
PRIVATE PROF_API_IMPL HIP_PROF_HIP_API_STRING=1 __HIP_PLATFORM_AMD__=1)
|
||||
|
||||
target_include_directories(${ROCPROFILER_TARGET}
|
||||
PUBLIC
|
||||
${ROCM_PATH}/include
|
||||
${HIP_INCLUDE_DIRECTORIES} ${HSA_RUNTIME_INCLUDE_DIRECTORIES}
|
||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
|
||||
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/tools>
|
||||
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/inc>
|
||||
PRIVATE
|
||||
${LIB_DIR} ${ROOT_DIR}
|
||||
${CMAKE_CURRENT_BINARY_DIR}
|
||||
${PROJECT_SOURCE_DIR}
|
||||
${PROJECT_SOURCE_DIR}/tools
|
||||
${PROJECT_SOURCE_DIR}/inc)
|
||||
|
||||
if(ASAN)
|
||||
target_compile_options(${ROCPROFILER_TARGET} PRIVATE -fsanitize=address)
|
||||
target_link_options(${ROCPROFILER_TARGET} PRIVATE -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/exportmap -Wl,--no-undefined,-fsanitize=address)
|
||||
target_link_libraries(${ROCPROFILER_TARGET} PRIVATE ${AQLPROFILE_LIB} hsa-runtime64::hsa-runtime64 Threads::Threads atomic asan dl c stdc++ stdc++fs amd_comgr ${PCIACCESS_LIBRARIES})
|
||||
else()
|
||||
target_link_options(${ROCPROFILER_TARGET} PRIVATE -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/exportmap -Wl,--no-undefined)
|
||||
target_link_libraries(${ROCPROFILER_TARGET} PRIVATE ${AQLPROFILE_LIB} hsa-runtime64::hsa-runtime64 Threads::Threads atomic dl c stdc++ stdc++fs amd_comgr ${PCIACCESS_LIBRARIES})
|
||||
endif()
|
||||
|
||||
## Install libraries: Non versioned lib file in dev package
|
||||
install ( TARGETS ${ROCPROFILER_TARGET} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT dev NAMELINK_ONLY )
|
||||
install ( TARGETS ${ROCPROFILER_TARGET} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT runtime NAMELINK_SKIP )
|
||||
|
||||
configure_file(${PROJECT_SOURCE_DIR}/src/core/counters/metrics/basic_counters.xml ${PROJECT_BINARY_DIR}/counters/basic_counters.xml COPYONLY)
|
||||
configure_file(${PROJECT_SOURCE_DIR}/src/core/counters/metrics/derived_counters.xml ${PROJECT_BINARY_DIR}/counters/derived_counters.xml COPYONLY)
|
||||
|
||||
install(DIRECTORY
|
||||
${PROJECT_BINARY_DIR}/counters
|
||||
DESTINATION ${CMAKE_INSTALL_LIBEXECDIR}/${PROJECT_NAME}
|
||||
USE_SOURCE_PERMISSIONS
|
||||
COMPONENT runtime)
|
||||
|
||||
# ############################################################################################################################################
|
||||
@@ -0,0 +1,88 @@
|
||||
ROCPROFILER_1.0 {
|
||||
global: OnLoad;
|
||||
OnUnload;
|
||||
rocprofiler_version_major;
|
||||
rocprofiler_version_minor;
|
||||
rocprofiler_error_string;
|
||||
rocprofiler_open;
|
||||
rocprofiler_add_feature;
|
||||
rocprofiler_features_set_open;
|
||||
rocprofiler_close;
|
||||
rocprofiler_reset;
|
||||
rocprofiler_get_agent;
|
||||
rocprofiler_get_time;
|
||||
rocprofiler_set_queue_callbacks;
|
||||
rocprofiler_remove_queue_callbacks;
|
||||
rocprofiler_start_queue_callbacks;
|
||||
rocprofiler_stop_queue_callbacks;
|
||||
rocprofiler_start;
|
||||
rocprofiler_stop;
|
||||
rocprofiler_read;
|
||||
rocprofiler_get_data;
|
||||
rocprofiler_group_count;
|
||||
rocprofiler_get_group;
|
||||
rocprofiler_group_start;
|
||||
rocprofiler_group_stop;
|
||||
rocprofiler_group_read;
|
||||
rocprofiler_group_get_data;
|
||||
rocprofiler_get_metrics;
|
||||
rocprofiler_iterate_trace_data;
|
||||
rocprofiler_get_info;
|
||||
rocprofiler_iterate_info;
|
||||
rocprofiler_query_info;
|
||||
rocprofiler_queue_create_profiled;
|
||||
rocprofiler_pool_open;
|
||||
rocprofiler_pool_close;
|
||||
rocprofiler_pool_fetch;
|
||||
rocprofiler_pool_release;
|
||||
rocprofiler_pool_iterate;
|
||||
rocprofiler_pool_flush;
|
||||
rocprofiler_set_hsa_callbacks;
|
||||
local: *;
|
||||
};
|
||||
|
||||
ROCPROFILER_2.0 {
|
||||
global: HSA_AMD_TOOL_PRIORITY;
|
||||
rocprofiler_error_str;
|
||||
rocprofiler_initialize;
|
||||
rocprofiler_finalize;
|
||||
rocprofiler_get_timestamp;
|
||||
rocprofiler_iterate_counters;
|
||||
rocprofiler_query_agent_info_size;
|
||||
rocprofiler_query_agent_info;
|
||||
rocprofiler_query_queue_info_size;
|
||||
rocprofiler_query_queue_info;
|
||||
rocprofiler_query_kernel_info_size;
|
||||
rocprofiler_query_kernel_info;
|
||||
rocprofiler_query_counter_info_size;
|
||||
rocprofiler_query_counter_info;
|
||||
rocprofiler_query_roctx_tracer_api_data_info_size;
|
||||
rocprofiler_query_roctx_tracer_api_data_info;
|
||||
rocprofiler_query_hsa_tracer_api_data_info_size;
|
||||
rocprofiler_query_hsa_tracer_api_data_info;
|
||||
rocprofiler_query_hip_tracer_api_data_info_size;
|
||||
rocprofiler_query_hip_tracer_api_data_info;
|
||||
rocprofiler_flush_data;
|
||||
rocprofiler_next_record;
|
||||
rocprofiler_create_session;
|
||||
rocprofiler_create_ready_session;
|
||||
rocprofiler_create_filter;
|
||||
rocprofiler_create_buffer;
|
||||
rocprofiler_destroy_session;
|
||||
rocprofiler_destroy_filter;
|
||||
rocprofiler_destroy_buffer;
|
||||
rocprofiler_set_filter_buffer;
|
||||
rocprofiler_set_api_trace_sync_callback;
|
||||
rocprofiler_set_buffer_properties;
|
||||
rocprofiler_start_session;
|
||||
rocprofiler_terminate_session;
|
||||
rocprofiler_push_range;
|
||||
rocprofiler_pop_range;
|
||||
rocprofiler_start_replay_pass;
|
||||
rocprofiler_end_replay_pass;
|
||||
rocprofiler_device_profiling_session_create;
|
||||
rocprofiler_device_profiling_session_start;
|
||||
rocprofiler_device_profiling_session_poll;
|
||||
rocprofiler_device_profiling_session_stop;
|
||||
rocprofiler_device_profiling_session_destroy;
|
||||
} ROCPROFILER_1.0;
|
||||
@@ -0,0 +1,242 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "rocmtool.h"
|
||||
|
||||
#include <sys/syscall.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <optional>
|
||||
#include <thread>
|
||||
|
||||
#include "src/core/hardware/hsa_info.h"
|
||||
#include "src/core/hsa/hsa_support.h"
|
||||
#include "src/core/hsa/queues/queue.h"
|
||||
#include "src/utils/helper.h"
|
||||
#include "src/utils/logger.h"
|
||||
#include "src/core/memory/generic_buffer.h"
|
||||
|
||||
#define ASSERTM(exp, msg) assert(((void)msg, exp))
|
||||
|
||||
extern std::mutex sessions_pending_signal_lock;
|
||||
|
||||
static inline uint32_t GetTid() { return syscall(__NR_gettid); }
|
||||
|
||||
namespace rocmtools {
|
||||
|
||||
rocmtool* rocmtool_obj;
|
||||
|
||||
// Constructor of rocmtool
|
||||
// Takes the buffer size, a buffer callback function and a buffer flush
|
||||
// interval to allocate a buffer pool using GenericStorage Also takes the
|
||||
// replay mode (application replay/kernel replay/user replay) to set the replay
|
||||
// mode for the rocmtool class object
|
||||
rocmtool::rocmtool() : current_session_id_(rocprofiler_session_id_t{0}) {}
|
||||
|
||||
// Destructor of rocmtool
|
||||
// deletes the buffer pool
|
||||
// Iterates over its session map and resets each session in its internal
|
||||
// session map and clears them from the map. Pops labels from the range stack
|
||||
// and deletes the stack.
|
||||
rocmtool::~rocmtool() {
|
||||
// {
|
||||
// std::lock_guard<std::mutex> lock(session_map_lock_);
|
||||
// if (!sessions_.empty()) {
|
||||
// // TODO(aelwazir): throw an exception user need to destroy all created
|
||||
// // session (document)
|
||||
// // fatal("Error: Sessions are not destroyed yet!");
|
||||
// sessions_.clear();
|
||||
// }
|
||||
// }
|
||||
Counter::ClearBasicCounters();
|
||||
}
|
||||
|
||||
bool rocmtool::FindAgent(rocprofiler_agent_id_t agent_id) { return true; }
|
||||
size_t rocmtool::GetAgentInfoSize(rocprofiler_agent_info_kind_t kind, rocprofiler_agent_id_t agent_id) {
|
||||
return 0;
|
||||
}
|
||||
const char* rocmtool::GetAgentInfo(rocprofiler_agent_info_kind_t kind,
|
||||
rocprofiler_agent_id_t agent_id) {
|
||||
return "";
|
||||
}
|
||||
|
||||
// TODO(aelwazir): Implement Queue Query
|
||||
bool rocmtool::FindQueue(rocprofiler_queue_id_t queue_id) { return true; }
|
||||
size_t rocmtool::GetQueueInfoSize(rocprofiler_queue_info_kind_t kind, rocprofiler_queue_id_t queue_id) {
|
||||
return 0;
|
||||
}
|
||||
const char* rocmtool::GetQueueInfo(rocprofiler_queue_info_kind_t kind,
|
||||
rocprofiler_queue_id_t queue_id) {
|
||||
return "";
|
||||
}
|
||||
|
||||
bool rocmtool::FindSession(rocprofiler_session_id_t session_id) {
|
||||
std::lock_guard<std::mutex> lock(session_map_lock_);
|
||||
return sessions_.find(session_id.handle) != sessions_.end();
|
||||
}
|
||||
|
||||
rocprofiler_session_id_t rocmtool::CreateSession(rocprofiler_replay_mode_t replay_mode) {
|
||||
rocprofiler_session_id_t session_id = rocprofiler_session_id_t{GenerateUniqueSessionId()};
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(session_map_lock_);
|
||||
sessions_.emplace(session_id.handle, new Session(replay_mode, session_id));
|
||||
}
|
||||
return session_id;
|
||||
}
|
||||
|
||||
void rocmtool::DestroySession(rocprofiler_session_id_t session_id) {
|
||||
while (GetCurrentActiveInterruptSignalsCount() != 0) {
|
||||
}
|
||||
|
||||
// if (GetSession(session_id)->GetTracer()) {
|
||||
// GetSession(session_id)->GetTracer().reset();
|
||||
// GetSession(session_id)
|
||||
// ->GetBuffer(
|
||||
// GetSession(session_id)
|
||||
// ->GetFilter(GetSession(session_id)->GetFilterIdWithKind(ROCPROFILER_API_TRACE))
|
||||
// .GetBufferId())
|
||||
// .reset();
|
||||
// }
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(session_map_lock_);
|
||||
ASSERTM(sessions_.find(session_id.handle) != sessions_.end(),
|
||||
"Error: Couldn't find a created session with given id");
|
||||
delete sessions_.at(session_id.handle);
|
||||
sessions_.erase(session_id.handle);
|
||||
}
|
||||
}
|
||||
|
||||
bool rocmtool::FindDeviceProfilingSession(rocprofiler_session_id_t session_id) {
|
||||
std::lock_guard<std::mutex> lock(device_profiling_session_map_lock_);
|
||||
return dev_profiling_sessions_.find(session_id.handle) != dev_profiling_sessions_.end();
|
||||
}
|
||||
|
||||
rocprofiler_session_id_t rocmtool::CreateDeviceProfilingSession(std::vector<std::string> counters,
|
||||
int cpu_agent_index,
|
||||
int gpu_agent_index) {
|
||||
rocprofiler_session_id_t session_id;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(device_profiling_session_map_lock_);
|
||||
|
||||
hsa_agent_t cpu_agent;
|
||||
hsa_agent_t gpu_agent;
|
||||
find_hsa_agent_cpu(cpu_agent_index, &cpu_agent);
|
||||
find_hsa_agent_gpu(gpu_agent_index, &gpu_agent);
|
||||
|
||||
dev_profiling_sessions_.emplace(
|
||||
session_id.handle,
|
||||
new DeviceProfileSession(counters, cpu_agent, gpu_agent, &session_id.handle));
|
||||
}
|
||||
return session_id;
|
||||
}
|
||||
|
||||
void rocmtool::DestroyDeviceProfilingSession(rocprofiler_session_id_t session_id) {
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(device_profiling_session_map_lock_);
|
||||
ASSERTM(dev_profiling_sessions_.find(session_id.handle) != dev_profiling_sessions_.end(),
|
||||
"Error: Couldn't find a created session with given id");
|
||||
delete dev_profiling_sessions_.at(session_id.handle);
|
||||
dev_profiling_sessions_.erase(session_id.handle);
|
||||
}
|
||||
}
|
||||
|
||||
DeviceProfileSession* rocmtool::GetDeviceProfilingSession(rocprofiler_session_id_t session_id) {
|
||||
std::lock_guard<std::mutex> lock(device_profiling_session_map_lock_);
|
||||
assert(dev_profiling_sessions_.find(session_id.handle) != dev_profiling_sessions_.end() &&
|
||||
"Error: Can't find the session!");
|
||||
return dev_profiling_sessions_.at(session_id.handle);
|
||||
}
|
||||
|
||||
bool rocmtool::HasActiveSession() { return GetCurrentSessionId().handle > 0; }
|
||||
bool rocmtool::IsActiveSession(rocprofiler_session_id_t session_id) {
|
||||
return (GetCurrentSessionId().handle == session_id.handle);
|
||||
}
|
||||
|
||||
// Get the session by its id
|
||||
// Looks up the session object for an input session id in the internal map.
|
||||
// If a given session id doesn't exist, it throws an assertion.
|
||||
// If a session object exists for the given session id, the session object is
|
||||
// returned.
|
||||
Session* rocmtool::GetSession(rocprofiler_session_id_t session_id) {
|
||||
std::lock_guard<std::mutex> lock(session_map_lock_);
|
||||
assert(sessions_.find(session_id.handle) != sessions_.end() && "Error: Can't find the session!");
|
||||
return sessions_.at(session_id.handle);
|
||||
}
|
||||
|
||||
// Get Current Session ID
|
||||
rocprofiler_session_id_t rocmtool::GetCurrentSessionId() { return current_session_id_; }
|
||||
|
||||
void rocmtool::SetCurrentActiveSession(rocprofiler_session_id_t session_id) {
|
||||
current_session_id_ = session_id;
|
||||
}
|
||||
|
||||
uint64_t rocmtool::GetUniqueRecordId() { return records_counter_.fetch_add(1); }
|
||||
|
||||
uint64_t rocmtool::GetUniqueKernelDispatchId() {
|
||||
return kernel_dispatch_counter_.fetch_add(1, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
size_t rocmtool::GetKernelInfoSize(rocprofiler_kernel_info_kind_t kind,
|
||||
rocprofiler_kernel_id_t kernel_id) {
|
||||
switch (kind) {
|
||||
case ROCPROFILER_KERNEL_NAME:
|
||||
return GetKernelNameFromKsymbols(kernel_id.handle).size();
|
||||
default:
|
||||
warning("The provided Kernel Kind is not yet supported!");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
const char* rocmtool::GetKernelInfo(rocprofiler_kernel_info_kind_t kind,
|
||||
rocprofiler_kernel_id_t kernel_id) {
|
||||
switch (kind) {
|
||||
case ROCPROFILER_KERNEL_NAME:
|
||||
return strdup(GetKernelNameFromKsymbols(kernel_id.handle).c_str());
|
||||
default:
|
||||
warning("The provided Kernel Kind is not yet supported!");
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(aelwazir): To be implemented
|
||||
bool rocmtool::CheckFilterData(rocprofiler_filter_kind_t filter_kind,
|
||||
rocprofiler_filter_data_t filter_data) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// End of ROCMTool Class
|
||||
|
||||
rocmtool* GetROCMToolObj() { return rocmtool_obj; }
|
||||
|
||||
void InitROCMToolObj() { rocmtool_obj = new rocmtool; }
|
||||
void ResetROCMToolObj() {
|
||||
delete rocmtool_obj;
|
||||
// if (rocmtool_obj) rocmtool_obj.reset();
|
||||
}
|
||||
|
||||
rocprofiler_timestamp_t GetCurrentTimestamp() { return hsa_support::GetCurrentTimestampNS(); }
|
||||
|
||||
rocprofiler_status_t IterateCounters(rocprofiler_counters_info_callback_t counters_info_callback) {
|
||||
if (hsa_support::IterateCounters(counters_info_callback)) return ROCPROFILER_STATUS_SUCCESS;
|
||||
return ROCPROFILER_STATUS_ERROR;
|
||||
}
|
||||
|
||||
} // namespace rocmtools
|
||||
@@ -0,0 +1,122 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef SRC_TOOLS_ROCMTOOL_H_
|
||||
#define SRC_TOOLS_ROCMTOOL_H_
|
||||
|
||||
#include <hsa/hsa_ven_amd_aqlprofile.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
#include <cstdint>
|
||||
#include <functional>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <optional>
|
||||
#include <stack>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "src/core/session/session.h"
|
||||
#include "src/core/session/device_profiling.h"
|
||||
|
||||
namespace rocmtools {
|
||||
|
||||
class rocmtool {
|
||||
public:
|
||||
rocmtool();
|
||||
~rocmtool();
|
||||
|
||||
bool FindAgent(rocprofiler_agent_id_t agent_id);
|
||||
size_t GetAgentInfoSize(rocprofiler_agent_info_kind_t kind, rocprofiler_agent_id_t agent_id);
|
||||
const char* GetAgentInfo(rocprofiler_agent_info_kind_t kind, rocprofiler_agent_id_t agent_id);
|
||||
|
||||
bool FindQueue(rocprofiler_queue_id_t queue_id);
|
||||
size_t GetQueueInfoSize(rocprofiler_queue_info_kind_t kind, rocprofiler_queue_id_t queue_id);
|
||||
const char* GetQueueInfo(rocprofiler_queue_info_kind_t kind, rocprofiler_queue_id_t queue_id);
|
||||
|
||||
bool FindKernel(rocprofiler_kernel_id_t kernel_id);
|
||||
size_t GetKernelInfoSize(rocprofiler_kernel_info_kind_t kind, rocprofiler_kernel_id_t kernel_id);
|
||||
const char* GetKernelInfo(rocprofiler_kernel_info_kind_t kind, rocprofiler_kernel_id_t kernel_id);
|
||||
|
||||
// Session
|
||||
rocprofiler_session_id_t CreateSession(rocprofiler_replay_mode_t replay_mode);
|
||||
void DestroySession(rocprofiler_session_id_t session_id);
|
||||
bool HasActiveSession();
|
||||
rocprofiler_session_id_t GetCurrentSessionId();
|
||||
void SetCurrentActiveSession(rocprofiler_session_id_t session_id);
|
||||
bool FindSession(rocprofiler_session_id_t session_id);
|
||||
bool IsActiveSession(rocprofiler_session_id_t session_id);
|
||||
Session* GetSession(rocprofiler_session_id_t session_id);
|
||||
|
||||
// Device Profiling Session
|
||||
bool FindDeviceProfilingSession(rocprofiler_session_id_t session_id);
|
||||
rocprofiler_session_id_t CreateDeviceProfilingSession(std::vector<std::string> counters,
|
||||
int cpu_agent_index, int gpu_agent_index);
|
||||
void DestroyDeviceProfilingSession(rocprofiler_session_id_t session_id);
|
||||
DeviceProfileSession* GetDeviceProfilingSession(rocprofiler_session_id_t session_id);
|
||||
|
||||
|
||||
// Generic
|
||||
bool CheckFilterData(rocprofiler_filter_kind_t filter_kind, rocprofiler_filter_data_t filter_data);
|
||||
uint64_t GetUniqueRecordId();
|
||||
uint64_t GetUniqueKernelDispatchId();
|
||||
|
||||
private:
|
||||
rocprofiler_session_id_t current_session_id_{0};
|
||||
std::mutex session_map_lock_;
|
||||
std::map<uint64_t, Session*> sessions_;
|
||||
std::atomic<uint64_t> records_counter_{1};
|
||||
std::mutex device_profiling_session_map_lock_;
|
||||
std::map<uint64_t, DeviceProfileSession*> dev_profiling_sessions_;
|
||||
/*
|
||||
* XXX: Associating PC samples with a running kernel requires an identifier
|
||||
* that will be unique across all kernel executions. It is not enough to use
|
||||
* the name of a kernel or the address of a kernel object, as these will be
|
||||
* identical if the same kernel is dispatched twice. Currently, this
|
||||
* identifier is written to the `reserved2` field of the dispatch packet when
|
||||
* its launch is intercepted, but this could change: a future version of
|
||||
* ROCmtools may instead attempt to identify a kernel by a key with high
|
||||
* _probability_ of uniqueness: for example, a combination of the kernel's
|
||||
* name, the queue ID to which it was dispatched, and the offset of the queue
|
||||
* write pointer is likely sufficient to associate PC samples with a running
|
||||
* kernel and have the PC sample records consumed by the user-provided async
|
||||
* callback before the write pointer wraps to the same position in the ring
|
||||
* buffer.
|
||||
*/
|
||||
std::atomic<uint64_t> kernel_dispatch_counter_{1};
|
||||
};
|
||||
|
||||
void InitROCMToolObj();
|
||||
void ResetROCMToolObj();
|
||||
rocmtool* GetROCMToolObj();
|
||||
|
||||
rocprofiler_timestamp_t GetCurrentTimestamp();
|
||||
|
||||
rocprofiler_status_t IterateCounters(rocprofiler_counters_info_callback_t counters_info_callback);
|
||||
|
||||
} // namespace rocmtools
|
||||
|
||||
#endif // SRC_TOOLS_ROCMTOOL_H_
|
||||
@@ -0,0 +1,755 @@
|
||||
#include <atomic>
|
||||
|
||||
#include "src/core/hsa/hsa_support.h"
|
||||
#include "src/api/rocmtool.h"
|
||||
#include "src/utils/helper.h"
|
||||
#include "rocprofiler.h"
|
||||
|
||||
// TODO(aelwazir): change that to adapt with our own Exception
|
||||
// What about outside exceptions and callbacks exceptions!!
|
||||
#define API_METHOD_PREFIX \
|
||||
rocprofiler_status_t err = ROCPROFILER_STATUS_SUCCESS; \
|
||||
try {
|
||||
#define API_METHOD_SUFFIX \
|
||||
} \
|
||||
catch (rocmtools::Exception & e) { \
|
||||
std::cout << __FUNCTION__ << "(), " << e.what(); \
|
||||
} \
|
||||
return err;
|
||||
|
||||
#define API_INIT_CHECKER \
|
||||
API_METHOD_PREFIX \
|
||||
if (!api_started.load(std::memory_order_relaxed)) \
|
||||
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_NOT_INITIALIZED);
|
||||
|
||||
std::atomic<bool> api_started{false};
|
||||
|
||||
// Returns library version
|
||||
ROCPROFILER_API uint32_t rocprofiler_version_major() { return ROCPROFILER_VERSION_MAJOR; }
|
||||
ROCPROFILER_API uint32_t rocprofiler_version_minor() { return ROCPROFILER_VERSION_MINOR; }
|
||||
|
||||
// Return the error string representing the status
|
||||
ROCPROFILER_API const char* rocprofiler_error_str(rocprofiler_status_t status) {
|
||||
switch (status) {
|
||||
case ROCPROFILER_STATUS_ERROR_ALREADY_INITIALIZED:
|
||||
return "ROCMTool is already initialized\n";
|
||||
case ROCPROFILER_STATUS_ERROR_NOT_INITIALIZED:
|
||||
return "ROCMTool is not initialized or already destroyed\n";
|
||||
case ROCPROFILER_STATUS_ERROR_SESSION_MISSING_BUFFER:
|
||||
return "Missing Buffer for a session\n";
|
||||
case ROCPROFILER_STATUS_ERROR_TIMESTAMP_NOT_APPLICABLE:
|
||||
return "Timestamps can't be collected\n";
|
||||
case ROCPROFILER_STATUS_ERROR_AGENT_NOT_FOUND:
|
||||
return "Agent is not found with given identifier\n";
|
||||
case ROCPROFILER_STATUS_ERROR_AGENT_INFORMATION_MISSING:
|
||||
return "Agent information is missing for the given identifier\n";
|
||||
case ROCPROFILER_STATUS_ERROR_QUEUE_NOT_FOUND:
|
||||
return "Queue is not found for the given identifier\n";
|
||||
case ROCPROFILER_STATUS_ERROR_QUEUE_INFORMATION_MISSING:
|
||||
return "The requested information about the queue is not found\n";
|
||||
case ROCPROFILER_STATUS_ERROR_KERNEL_NOT_FOUND:
|
||||
return "Kernel is not found with given identifier\n";
|
||||
case ROCPROFILER_STATUS_ERROR_KERNEL_INFORMATION_MISSING:
|
||||
return "The requested information about the kernel is not found\n";
|
||||
case ROCPROFILER_STATUS_ERROR_COUNTER_NOT_FOUND:
|
||||
return "Counter is not found with the given identifier\n";
|
||||
case ROCPROFILER_STATUS_ERROR_COUNTER_INFORMATION_MISSING:
|
||||
return "The requested Counter information for the given kernel is "
|
||||
"missing\n";
|
||||
case ROCPROFILER_STATUS_ERROR_TRACER_API_DATA_NOT_FOUND:
|
||||
return "The requested Tracing API Data for the given data identifier is "
|
||||
"missing\n";
|
||||
case ROCPROFILER_STATUS_ERROR_TRACER_API_DATA_INFORMATION_MISSING:
|
||||
return "The requested information for the tracing API Data is missing\n";
|
||||
case ROCPROFILER_STATUS_ERROR_INCORRECT_DOMAIN:
|
||||
return "The given Domain is incorrect\n";
|
||||
case ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND:
|
||||
return "The requested Session given the session identifier is not "
|
||||
"found\n";
|
||||
case ROCPROFILER_STATUS_ERROR_CORRUPTED_SESSION_BUFFER:
|
||||
return "The requested Session Buffer given the session identifier is "
|
||||
"corrupted or deleted\n";
|
||||
case ROCPROFILER_STATUS_ERROR_RECORD_CORRUPTED:
|
||||
return "The requested record given the record identifier is corrupted "
|
||||
"or deleted\n";
|
||||
case ROCPROFILER_STATUS_ERROR_INCORRECT_REPLAY_MODE:
|
||||
return "Incorrect Replay mode\n";
|
||||
case ROCPROFILER_STATUS_ERROR_SESSION_MISSING_FILTER:
|
||||
return "Missing Filter for a session\n";
|
||||
case ROCPROFILER_STATUS_ERROR_INCORRECT_SIZE:
|
||||
return "The size given for the buffer is not applicable\n";
|
||||
case ROCPROFILER_STATUS_ERROR_INCORRECT_FLUSH_INTERVAL:
|
||||
return "Incorrect Flush interval\n";
|
||||
case ROCPROFILER_STATUS_ERROR_SESSION_FILTER_DATA_MISMATCH:
|
||||
return "The session filter can't accept the given data\n";
|
||||
case ROCPROFILER_STATUS_ERROR_FILTER_DATA_CORRUPTED:
|
||||
return "The given filter data is corrupted\n";
|
||||
case ROCPROFILER_STATUS_ERROR_CORRUPTED_LABEL_DATA:
|
||||
return "The given label is corrupted\n";
|
||||
case ROCPROFILER_STATUS_ERROR_RANGE_STACK_IS_EMPTY:
|
||||
return "There is no label in the labels stack to be popped\n";
|
||||
case ROCPROFILER_STATUS_ERROR_PASS_NOT_STARTED:
|
||||
return "There is no pass that started\n";
|
||||
case ROCPROFILER_STATUS_ERROR_HAS_ACTIVE_SESSION:
|
||||
return "There is already Active session, Can't activate two session at "
|
||||
"the same time\n";
|
||||
case ROCPROFILER_STATUS_ERROR_SESSION_NOT_ACTIVE:
|
||||
return "Can't terminate a non active session\n";
|
||||
case ROCPROFILER_STATUS_ERROR_FILTER_NOT_FOUND:
|
||||
return "The required filter is not found for the given session\n";
|
||||
case ROCPROFILER_STATUS_ERROR_BUFFER_NOT_FOUND:
|
||||
return "The required buffer is not found for the given session\n";
|
||||
case ROCPROFILER_STATUS_ERROR_FILTER_NOT_SUPPORTED:
|
||||
return "The required filter is not supported\n";
|
||||
default:
|
||||
return "Unkown error has occurred\n";
|
||||
}
|
||||
return "\n";
|
||||
}
|
||||
|
||||
// Initialize the API
|
||||
ROCPROFILER_API rocprofiler_status_t rocprofiler_initialize() {
|
||||
API_METHOD_PREFIX
|
||||
if (api_started.load(std::memory_order_relaxed))
|
||||
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_ALREADY_INITIALIZED);
|
||||
rocmtools::InitROCMToolObj();
|
||||
api_started.exchange(true, std::memory_order_release);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
// Finalize the API
|
||||
ROCPROFILER_API rocprofiler_status_t rocprofiler_finalize() {
|
||||
API_INIT_CHECKER
|
||||
rocmtools::ResetROCMToolObj();
|
||||
api_started.exchange(false, std::memory_order_release);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
ROCPROFILER_API rocprofiler_status_t rocprofiler_get_timestamp(rocprofiler_timestamp_t* timestamp) {
|
||||
API_INIT_CHECKER
|
||||
*timestamp = rocmtools::GetCurrentTimestamp();
|
||||
if (timestamp->value <= 0)
|
||||
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_TIMESTAMP_NOT_APPLICABLE);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
ROCPROFILER_API rocprofiler_status_t
|
||||
rocprofiler_iterate_counters(rocprofiler_counters_info_callback_t counters_info_callback) {
|
||||
API_INIT_CHECKER
|
||||
return rocmtools::IterateCounters(counters_info_callback);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
ROCPROFILER_API rocprofiler_status_t rocprofiler_query_agent_info_size(rocprofiler_agent_info_kind_t kind,
|
||||
rocprofiler_agent_id_t agent_id,
|
||||
size_t* data_size) {
|
||||
API_INIT_CHECKER
|
||||
if (!rocmtools::GetROCMToolObj()->FindAgent(agent_id))
|
||||
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_AGENT_NOT_FOUND);
|
||||
*data_size = rocmtools::GetROCMToolObj()->GetAgentInfoSize(kind, agent_id);
|
||||
if (*data_size <= 0) throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_AGENT_INFORMATION_MISSING);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
ROCPROFILER_API rocprofiler_status_t rocprofiler_query_agent_info(rocprofiler_agent_info_kind_t kind,
|
||||
rocprofiler_agent_id_t agent_id,
|
||||
const char** data) {
|
||||
API_INIT_CHECKER
|
||||
if (!rocmtools::GetROCMToolObj()->FindAgent(agent_id))
|
||||
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_AGENT_NOT_FOUND);
|
||||
if (!(*data = rocmtools::GetROCMToolObj()->GetAgentInfo(kind, agent_id)))
|
||||
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_AGENT_INFORMATION_MISSING);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
ROCPROFILER_API rocprofiler_status_t rocprofiler_query_queue_info_size(rocprofiler_queue_info_kind_t kind,
|
||||
rocprofiler_queue_id_t queue_id,
|
||||
size_t* data_size) {
|
||||
API_INIT_CHECKER
|
||||
if (!rocmtools::GetROCMToolObj()->FindQueue(queue_id))
|
||||
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_QUEUE_NOT_FOUND);
|
||||
*data_size = rocmtools::GetROCMToolObj()->GetQueueInfoSize(kind, queue_id);
|
||||
if (*data_size <= 0) throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_QUEUE_INFORMATION_MISSING);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
ROCPROFILER_API rocprofiler_status_t rocprofiler_query_queue_info(rocprofiler_queue_info_kind_t kind,
|
||||
rocprofiler_queue_id_t queue_id,
|
||||
const char** data) {
|
||||
API_INIT_CHECKER
|
||||
if (!rocmtools::GetROCMToolObj()->FindQueue(queue_id))
|
||||
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_QUEUE_NOT_FOUND);
|
||||
if (!(*data = rocmtools::GetROCMToolObj()->GetQueueInfo(kind, queue_id)))
|
||||
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_QUEUE_INFORMATION_MISSING);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
ROCPROFILER_API rocprofiler_status_t rocprofiler_query_kernel_info_size(rocprofiler_kernel_info_kind_t kind,
|
||||
rocprofiler_kernel_id_t kernel_id,
|
||||
size_t* data_size) {
|
||||
API_INIT_CHECKER
|
||||
// if (!rocmtools::GetROCMToolObj()->FindKernel(kernel_id))
|
||||
// throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_KERNEL_NOT_FOUND);
|
||||
*data_size = rocmtools::GetROCMToolObj()->GetKernelInfoSize(kind, kernel_id);
|
||||
if (*data_size <= 0)
|
||||
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_KERNEL_INFORMATION_MISSING);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
ROCPROFILER_API rocprofiler_status_t rocprofiler_query_kernel_info(rocprofiler_kernel_info_kind_t kind,
|
||||
rocprofiler_kernel_id_t kernel_id,
|
||||
const char** data) {
|
||||
API_INIT_CHECKER
|
||||
// if (!rocmtools::GetROCMToolObj()->FindKernel(kernel_id))
|
||||
// throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_KERNEL_NOT_FOUND);
|
||||
if (!(*data = rocmtools::GetROCMToolObj()->GetKernelInfo(kind, kernel_id)))
|
||||
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_KERNEL_INFORMATION_MISSING);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
ROCPROFILER_API rocprofiler_status_t rocprofiler_query_counter_info_size(
|
||||
rocprofiler_session_id_t session_id, rocprofiler_counter_info_kind_t kind,
|
||||
rocprofiler_counter_id_t counter_id, size_t* data_size) {
|
||||
API_INIT_CHECKER
|
||||
if (!rocmtools::GetROCMToolObj()->GetSession(session_id)->GetProfiler()->FindCounter(counter_id))
|
||||
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_COUNTER_NOT_FOUND);
|
||||
*data_size = rocmtools::GetROCMToolObj()
|
||||
->GetSession(session_id)
|
||||
->GetProfiler()
|
||||
->GetCounterInfoSize(kind, counter_id);
|
||||
if (*data_size <= 0)
|
||||
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_COUNTER_INFORMATION_MISSING);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
ROCPROFILER_API rocprofiler_status_t rocprofiler_query_counter_info(rocprofiler_session_id_t session_id,
|
||||
rocprofiler_counter_info_kind_t kind,
|
||||
rocprofiler_counter_id_t counter_id,
|
||||
const char** data) {
|
||||
API_INIT_CHECKER
|
||||
if (!rocmtools::GetROCMToolObj()->GetSession(session_id)->GetProfiler()->FindCounter(counter_id))
|
||||
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_COUNTER_NOT_FOUND);
|
||||
if (!(*data = rocmtools::GetROCMToolObj()
|
||||
->GetSession(session_id)
|
||||
->GetProfiler()
|
||||
->GetCounterInfo(kind, counter_id)))
|
||||
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_COUNTER_INFORMATION_MISSING);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
ROCPROFILER_API rocprofiler_status_t rocprofiler_query_roctx_tracer_api_data_info_size(
|
||||
rocprofiler_session_id_t session_id, rocprofiler_tracer_roctx_api_data_info_t kind,
|
||||
rocprofiler_tracer_api_data_handle_t api_data_id, rocprofiler_tracer_operation_id_t operation_id,
|
||||
size_t* data_size) {
|
||||
API_INIT_CHECKER
|
||||
// TODO(aelwazir): To be implemented
|
||||
// if (!rocmtools::GetROCMToolObj()
|
||||
// ->GetSession(session_id)
|
||||
// ->GetTracer()
|
||||
// ->FindROCTxApiData(api_data_id)) {
|
||||
// if (rocmtools::GetROCMToolObj()
|
||||
// ->GetSession(session_id)
|
||||
// ->GetTracer()
|
||||
// ->FindHSAApiData(api_data_id) ||
|
||||
// rocmtools::GetROCMToolObj()
|
||||
// ->GetSession(session_id)
|
||||
// ->GetTracer()
|
||||
// ->FindHIPApiData(api_data_id)) {
|
||||
// throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_INCORRECT_DOMAIN);
|
||||
// } else {
|
||||
// throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_TRACER_API_DATA_NOT_FOUND);
|
||||
// }
|
||||
// }
|
||||
*data_size = rocmtools::GetROCMToolObj()
|
||||
->GetSession(session_id)
|
||||
->GetTracer()
|
||||
->GetROCTxApiDataInfoSize(kind, api_data_id, operation_id);
|
||||
// if (*data_size <= 0)
|
||||
// throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_TRACER_API_DATA_INFORMATION_MISSING);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
ROCPROFILER_API rocprofiler_status_t rocprofiler_query_roctx_tracer_api_data_info(
|
||||
rocprofiler_session_id_t session_id, rocprofiler_tracer_roctx_api_data_info_t kind,
|
||||
rocprofiler_tracer_api_data_handle_t api_data_id, rocprofiler_tracer_operation_id_t operation_id,
|
||||
char** data) {
|
||||
API_INIT_CHECKER
|
||||
// TODO(aelwazir): To be implemented
|
||||
// if (!rocmtools::GetROCMToolObj()
|
||||
// ->GetSession(session_id)
|
||||
// ->GetTracer()
|
||||
// ->FindROCTxApiData(api_data_id)) {
|
||||
// if (rocmtools::GetROCMToolObj()
|
||||
// ->GetSession(session_id)
|
||||
// ->GetTracer()
|
||||
// ->FindHSAApiData(api_data_id) ||
|
||||
// rocmtools::GetROCMToolObj()
|
||||
// ->GetSession(session_id)
|
||||
// ->GetTracer()
|
||||
// ->FindHIPApiData(api_data_id)) {
|
||||
// throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_INCORRECT_DOMAIN);
|
||||
// } else {
|
||||
// throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_TRACER_API_DATA_NOT_FOUND);
|
||||
// }
|
||||
// }
|
||||
if (!(*data = rocmtools::GetROCMToolObj()
|
||||
->GetSession(session_id)
|
||||
->GetTracer()
|
||||
->GetROCTxApiDataInfo(kind, api_data_id, operation_id)))
|
||||
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_TRACER_API_DATA_INFORMATION_MISSING);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
ROCPROFILER_API rocprofiler_status_t rocprofiler_query_hsa_tracer_api_data_info_size(
|
||||
rocprofiler_session_id_t session_id, rocprofiler_tracer_hsa_api_data_info_t kind,
|
||||
rocprofiler_tracer_api_data_handle_t api_data_id, rocprofiler_tracer_operation_id_t operation_id,
|
||||
size_t* data_size) {
|
||||
API_INIT_CHECKER
|
||||
// TODO(aelwazir): To be implemented
|
||||
// if (!rocmtools::GetROCMToolObj()
|
||||
// ->GetSession(session_id)
|
||||
// ->GetTracer()
|
||||
// ->FindHSAApiData(api_data_id)) {
|
||||
// if (rocmtools::GetROCMToolObj()
|
||||
// ->GetSession(session_id)
|
||||
// ->GetTracer()
|
||||
// ->FindROCTxApiData(api_data_id) ||
|
||||
// rocmtools::GetROCMToolObj()
|
||||
// ->GetSession(session_id)
|
||||
// ->GetTracer()
|
||||
// ->FindHIPApiData(api_data_id)) {
|
||||
// throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_INCORRECT_DOMAIN);
|
||||
// } else {
|
||||
// throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_TRACER_API_DATA_NOT_FOUND);
|
||||
// }
|
||||
// }
|
||||
*data_size = rocmtools::GetROCMToolObj()
|
||||
->GetSession(session_id)
|
||||
->GetTracer()
|
||||
->GetHSAApiDataInfoSize(kind, api_data_id, operation_id);
|
||||
if (*data_size <= 0)
|
||||
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_TRACER_API_DATA_INFORMATION_MISSING);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
ROCPROFILER_API rocprofiler_status_t rocprofiler_query_hsa_tracer_api_data_info(
|
||||
rocprofiler_session_id_t session_id, rocprofiler_tracer_hsa_api_data_info_t kind,
|
||||
rocprofiler_tracer_api_data_handle_t api_data_id, rocprofiler_tracer_operation_id_t operation_id,
|
||||
char** data) {
|
||||
API_INIT_CHECKER
|
||||
// TODO(aelwazir): To be implemented
|
||||
// if (!rocmtools::GetROCMToolObj()
|
||||
// ->GetSession(session_id)
|
||||
// ->GetTracer()
|
||||
// ->FindHSAApiData(api_data_id)) {
|
||||
// if (rocmtools::GetROCMToolObj()
|
||||
// ->GetSession(session_id)
|
||||
// ->GetTracer()
|
||||
// ->FindROCTxApiData(api_data_id) ||
|
||||
// rocmtools::GetROCMToolObj()
|
||||
// ->GetSession(session_id)
|
||||
// ->GetTracer()
|
||||
// ->FindHIPApiData(api_data_id)) {
|
||||
// throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_INCORRECT_DOMAIN);
|
||||
// } else {
|
||||
// throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_TRACER_API_DATA_NOT_FOUND);
|
||||
// }
|
||||
// }
|
||||
if (!(*data = rocmtools::GetROCMToolObj()
|
||||
->GetSession(session_id)
|
||||
->GetTracer()
|
||||
->GetHSAApiDataInfo(kind, api_data_id, operation_id)))
|
||||
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_TRACER_API_DATA_INFORMATION_MISSING);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
ROCPROFILER_API rocprofiler_status_t rocprofiler_query_hip_tracer_api_data_info_size(
|
||||
rocprofiler_session_id_t session_id, rocprofiler_tracer_hip_api_data_info_t kind,
|
||||
rocprofiler_tracer_api_data_handle_t api_data_id, rocprofiler_tracer_operation_id_t operation_id,
|
||||
size_t* data_size) {
|
||||
API_INIT_CHECKER
|
||||
// TODO(aelwazir): To be implemented
|
||||
// if (!rocmtools::GetROCMToolObj()
|
||||
// ->GetSession(session_id)
|
||||
// ->GetTracer()
|
||||
// ->FindHIPApiData(api_data_id)) {
|
||||
// if (rocmtools::GetROCMToolObj()
|
||||
// ->GetSession(session_id)
|
||||
// ->GetTracer()
|
||||
// ->FindHSAApiData(api_data_id) ||
|
||||
// rocmtools::GetROCMToolObj()
|
||||
// ->GetSession(session_id)
|
||||
// ->GetTracer()
|
||||
// ->FindROCTxApiData(api_data_id)) {
|
||||
// throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_INCORRECT_DOMAIN);
|
||||
// } else {
|
||||
// throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_TRACER_API_DATA_NOT_FOUND);
|
||||
// }
|
||||
// }
|
||||
*data_size = rocmtools::GetROCMToolObj()
|
||||
->GetSession(session_id)
|
||||
->GetTracer()
|
||||
->GetHIPApiDataInfoSize(kind, api_data_id, operation_id);
|
||||
// if (*data_size <= 0)
|
||||
// throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_TRACER_API_DATA_INFORMATION_MISSING);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
ROCPROFILER_API rocprofiler_status_t rocprofiler_query_hip_tracer_api_data_info(
|
||||
rocprofiler_session_id_t session_id, rocprofiler_tracer_hip_api_data_info_t kind,
|
||||
rocprofiler_tracer_api_data_handle_t api_data_id, rocprofiler_tracer_operation_id_t operation_id,
|
||||
char** data) {
|
||||
API_INIT_CHECKER
|
||||
// TODO(aelwazir): To be implemented
|
||||
// if (!rocmtools::GetROCMToolObj()
|
||||
// ->GetSession(session_id)
|
||||
// ->GetTracer()
|
||||
// ->FindHIPApiData(api_data_id)) {
|
||||
// if (rocmtools::GetROCMToolObj()
|
||||
// ->GetSession(session_id)
|
||||
// ->GetTracer()
|
||||
// ->FindHSAApiData(api_data_id) ||
|
||||
// rocmtools::GetROCMToolObj()
|
||||
// ->GetSession(session_id)
|
||||
// ->GetTracer()
|
||||
// ->FindROCTxApiData(api_data_id)) {
|
||||
// throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_INCORRECT_DOMAIN);
|
||||
// } else {
|
||||
// throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_TRACER_API_DATA_NOT_FOUND);
|
||||
// }
|
||||
// }
|
||||
// if (!(
|
||||
*data = rocmtools::GetROCMToolObj()
|
||||
->GetSession(session_id)
|
||||
->GetTracer()
|
||||
->GetHIPApiDataInfo(kind, api_data_id, operation_id);
|
||||
// ))
|
||||
// throw
|
||||
// rocmtools::Exception(ROCPROFILER_STATUS_ERROR_TRACER_API_DATA_INFORMATION_MISSING);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
ROCPROFILER_API rocprofiler_status_t rocprofiler_flush_data(rocprofiler_session_id_t session_id,
|
||||
rocprofiler_buffer_id_t buffer_id) {
|
||||
API_INIT_CHECKER
|
||||
if (!rocmtools::GetROCMToolObj()->FindSession(session_id))
|
||||
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND);
|
||||
if (!rocmtools::GetROCMToolObj()->GetSession(session_id)->FindBuffer(buffer_id))
|
||||
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_BUFFER_NOT_FOUND);
|
||||
if (!rocmtools::GetROCMToolObj()->GetSession(session_id)->GetBuffer(buffer_id)->Flush())
|
||||
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_CORRUPTED_SESSION_BUFFER);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
#include "src/core/memory/generic_buffer.h"
|
||||
|
||||
ROCPROFILER_API rocprofiler_status_t rocprofiler_next_record(const rocprofiler_record_header_t* record,
|
||||
const rocprofiler_record_header_t** next,
|
||||
rocprofiler_session_id_t session_id,
|
||||
rocprofiler_buffer_id_t buffer_id) {
|
||||
API_INIT_CHECKER
|
||||
if (!rocmtools::GetROCMToolObj()->FindSession(session_id))
|
||||
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND);
|
||||
if (!rocmtools::GetROCMToolObj()->GetSession(session_id)->FindBuffer(buffer_id))
|
||||
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_BUFFER_NOT_FOUND);
|
||||
if (!Memory::GetNextRecord(record, next))
|
||||
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_RECORD_CORRUPTED);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
// API to create a session with a given profiling mode and input data
|
||||
ROCPROFILER_API rocprofiler_status_t rocprofiler_create_session(rocprofiler_replay_mode_t replay_mode,
|
||||
rocprofiler_session_id_t* session_id) {
|
||||
API_INIT_CHECKER
|
||||
*session_id = rocmtools::GetROCMToolObj()->CreateSession(replay_mode);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
ROCPROFILER_API rocprofiler_status_t rocprofiler_create_filter(rocprofiler_session_id_t session_id,
|
||||
rocprofiler_filter_kind_t filter_kind,
|
||||
rocprofiler_filter_data_t filter_data,
|
||||
uint64_t data_count,
|
||||
rocprofiler_filter_id_t* filter_id,
|
||||
rocprofiler_filter_property_t property) {
|
||||
API_INIT_CHECKER
|
||||
// TODO(aelwazir): CheckFilterData to be implemented
|
||||
// int error_code =
|
||||
// rocmtools::GetROCMToolObj()->CheckFilterData(filter_kind,
|
||||
// filter_data);
|
||||
// if (error_code == -1) throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_FILTER_DATA_CORRUPTED);
|
||||
// if (error_code == 0)
|
||||
// throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_SESSION_FILTER_DATA_MISMATCH);
|
||||
if (!rocmtools::GetROCMToolObj()->FindSession(session_id))
|
||||
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND);
|
||||
*filter_id = rocmtools::GetROCMToolObj()
|
||||
->GetSession(session_id)
|
||||
->CreateFilter(filter_kind, filter_data, data_count, property);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
ROCPROFILER_API rocprofiler_status_t rocprofiler_destroy_filter(rocprofiler_session_id_t session_id,
|
||||
rocprofiler_filter_id_t filter_id) {
|
||||
API_INIT_CHECKER
|
||||
if (!rocmtools::GetROCMToolObj()->FindSession(session_id))
|
||||
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND);
|
||||
if (!rocmtools::GetROCMToolObj()->GetSession(session_id)->FindFilter(filter_id))
|
||||
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_FILTER_NOT_FOUND);
|
||||
rocmtools::GetROCMToolObj()->GetSession(session_id)->DestroyFilter(filter_id);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
ROCPROFILER_API rocprofiler_status_t rocprofiler_create_buffer(
|
||||
rocprofiler_session_id_t session_id, rocprofiler_buffer_callback_t buffer_callback,
|
||||
size_t buffer_size, rocprofiler_buffer_id_t* buffer_id) {
|
||||
API_INIT_CHECKER
|
||||
if (!rocmtools::GetROCMToolObj()->FindSession(session_id))
|
||||
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND);
|
||||
*buffer_id = rocmtools::GetROCMToolObj()
|
||||
->GetSession(session_id)
|
||||
->CreateBuffer(buffer_callback, buffer_size);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
ROCPROFILER_API rocprofiler_status_t rocprofiler_set_buffer_properties(
|
||||
rocprofiler_session_id_t session_id, rocprofiler_buffer_id_t buffer_id,
|
||||
rocprofiler_buffer_property_t* buffer_properties, uint32_t buffer_properties_count) {
|
||||
API_INIT_CHECKER
|
||||
if (!rocmtools::GetROCMToolObj()->FindSession(session_id))
|
||||
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND);
|
||||
if (!rocmtools::GetROCMToolObj()->GetSession(session_id)->FindBuffer(buffer_id))
|
||||
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_BUFFER_NOT_FOUND);
|
||||
rocmtools::GetROCMToolObj()
|
||||
->GetSession(session_id)
|
||||
->GetBuffer(buffer_id)
|
||||
->SetProperties(buffer_properties, buffer_properties_count);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
ROCPROFILER_API rocprofiler_status_t rocprofiler_destroy_buffer(rocprofiler_session_id_t session_id,
|
||||
rocprofiler_buffer_id_t buffer_id) {
|
||||
API_INIT_CHECKER
|
||||
if (!rocmtools::GetROCMToolObj()->FindSession(session_id))
|
||||
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND);
|
||||
if (!rocmtools::GetROCMToolObj()->GetSession(session_id)->FindBuffer(buffer_id))
|
||||
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_BUFFER_NOT_FOUND);
|
||||
rocmtools::GetROCMToolObj()->GetSession(session_id)->DestroyBuffer(buffer_id);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
ROCPROFILER_API rocprofiler_status_t rocprofiler_set_filter_buffer(rocprofiler_session_id_t session_id,
|
||||
rocprofiler_filter_id_t filter_id,
|
||||
rocprofiler_buffer_id_t buffer_id) {
|
||||
API_INIT_CHECKER
|
||||
if (!rocmtools::GetROCMToolObj()->FindSession(session_id))
|
||||
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND);
|
||||
if (!rocmtools::GetROCMToolObj()->GetSession(session_id)->FindBuffer(buffer_id))
|
||||
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_BUFFER_NOT_FOUND);
|
||||
if (!rocmtools::GetROCMToolObj()->GetSession(session_id)->FindFilter(filter_id))
|
||||
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_FILTER_NOT_FOUND);
|
||||
if (!rocmtools::GetROCMToolObj()
|
||||
->GetSession(session_id)
|
||||
->CheckFilterBufferSize(filter_id, buffer_id))
|
||||
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_INCORRECT_SIZE);
|
||||
rocmtools::GetROCMToolObj()->GetSession(session_id)->GetFilter(filter_id)->SetBufferId(buffer_id);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
ROCPROFILER_API rocprofiler_status_t rocprofiler_set_api_trace_sync_callback(
|
||||
rocprofiler_session_id_t session_id, rocprofiler_filter_id_t filter_id,
|
||||
rocprofiler_sync_callback_t callback) {
|
||||
API_INIT_CHECKER
|
||||
if (!rocmtools::GetROCMToolObj()->FindSession(session_id))
|
||||
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND);
|
||||
if (!rocmtools::GetROCMToolObj()->GetSession(session_id)->FindFilter(filter_id))
|
||||
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_FILTER_NOT_FOUND);
|
||||
if (rocmtools::GetROCMToolObj()->GetSession(session_id)->GetFilter(filter_id)->GetKind() !=
|
||||
ROCPROFILER_API_TRACE)
|
||||
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_FILTER_NOT_SUPPORTED);
|
||||
rocmtools::GetROCMToolObj()->GetSession(session_id)->GetFilter(filter_id)->SetCallback(callback);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
ROCPROFILER_API rocprofiler_status_t rocprofiler_create_ready_session(
|
||||
rocprofiler_replay_mode_t replay_mode, rocprofiler_filter_kind_t filter_kind,
|
||||
rocprofiler_filter_data_t filter_data, uint64_t data_count, size_t buffer_size,
|
||||
rocprofiler_buffer_callback_t buffer_callback, rocprofiler_session_id_t* session_id,
|
||||
rocprofiler_filter_property_t property, rocprofiler_sync_callback_t callback) {
|
||||
API_INIT_CHECKER
|
||||
// TODO(aelwazir): CheckFilterData to be implemented
|
||||
// int error_code =
|
||||
// rocmtools::GetROCMToolObj()->CheckFilterData(filter_kind,
|
||||
// filter_data);
|
||||
// if (error_code == -1) throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_FILTER_DATA_CORRUPTED);
|
||||
// if (error_code == 0)
|
||||
// throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_SESSION_FILTER_DATA_MISMATCH);
|
||||
*session_id = rocmtools::GetROCMToolObj()->CreateSession(replay_mode);
|
||||
rocprofiler_filter_id_t filter_id =
|
||||
rocmtools::GetROCMToolObj()
|
||||
->GetSession(*session_id)
|
||||
->CreateFilter(filter_kind, filter_data, data_count, property);
|
||||
rocprofiler_buffer_id_t buffer_id = rocmtools::GetROCMToolObj()
|
||||
->GetSession(*session_id)
|
||||
->CreateBuffer(buffer_callback, buffer_size);
|
||||
if (filter_kind == ROCPROFILER_API_TRACE)
|
||||
rocmtools::GetROCMToolObj()
|
||||
->GetSession(*session_id)
|
||||
->GetFilter(filter_id)
|
||||
->SetCallback(callback);
|
||||
if (!rocmtools::GetROCMToolObj()
|
||||
->GetSession(*session_id)
|
||||
->CheckFilterBufferSize(filter_id, buffer_id))
|
||||
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_INCORRECT_SIZE);
|
||||
rocmtools::GetROCMToolObj()
|
||||
->GetSession(*session_id)
|
||||
->GetFilter(filter_id)
|
||||
->SetBufferId(buffer_id);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
// API to destroy a session by id
|
||||
ROCPROFILER_API rocprofiler_status_t rocprofiler_destroy_session(rocprofiler_session_id_t session_id) {
|
||||
API_INIT_CHECKER
|
||||
if (!rocmtools::GetROCMToolObj()->FindSession(session_id))
|
||||
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND);
|
||||
rocmtools::GetROCMToolObj()->DestroySession(session_id);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
// API to activate a session by id
|
||||
ROCPROFILER_API rocprofiler_status_t rocprofiler_start_session(rocprofiler_session_id_t session_id) {
|
||||
API_INIT_CHECKER
|
||||
if (!rocmtools::GetROCMToolObj()->FindSession(session_id))
|
||||
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND);
|
||||
if (!rocmtools::GetROCMToolObj()->GetSession(session_id)->HasFilter())
|
||||
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_SESSION_MISSING_FILTER);
|
||||
if (!rocmtools::GetROCMToolObj()->GetSession(session_id)->HasBuffer())
|
||||
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_SESSION_MISSING_BUFFER);
|
||||
if (rocmtools::GetROCMToolObj()->HasActiveSession())
|
||||
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_HAS_ACTIVE_SESSION);
|
||||
rocmtools::GetROCMToolObj()->GetSession(session_id)->Start();
|
||||
rocmtools::GetROCMToolObj()->SetCurrentActiveSession(session_id);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
// API to deactivate a session by id
|
||||
ROCPROFILER_API rocprofiler_status_t rocprofiler_terminate_session(rocprofiler_session_id_t session_id) {
|
||||
API_INIT_CHECKER
|
||||
if (!rocmtools::GetROCMToolObj()->FindSession(session_id))
|
||||
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND);
|
||||
if (!rocmtools::GetROCMToolObj()->IsActiveSession(session_id))
|
||||
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_ACTIVE);
|
||||
rocmtools::GetROCMToolObj()->GetSession(session_id)->Terminate();
|
||||
rocmtools::GetROCMToolObj()->SetCurrentActiveSession(rocprofiler_session_id_t{0});
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
|
||||
// API to push a custom label for defining a code section
|
||||
ROCPROFILER_API rocprofiler_status_t rocprofiler_push_range(rocprofiler_session_id_t session_id,
|
||||
const char* label) {
|
||||
API_INIT_CHECKER
|
||||
if (!label) throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_CORRUPTED_LABEL_DATA);
|
||||
rocmtools::GetROCMToolObj()->GetSession(session_id)->PushRangeLabels(label);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
// API to pop a custom label defined for a code section
|
||||
ROCPROFILER_API rocprofiler_status_t rocprofiler_pop_range(rocprofiler_session_id_t session_id) {
|
||||
API_INIT_CHECKER
|
||||
if (!rocmtools::GetROCMToolObj()->GetSession(session_id)->PopRangeLabels())
|
||||
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_RANGE_STACK_IS_EMPTY);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
ROCPROFILER_API rocprofiler_status_t rocprofiler_start_replay_pass(rocprofiler_session_id_t session_id) {
|
||||
API_INIT_CHECKER
|
||||
if (!rocmtools::GetROCMToolObj()->FindSession(session_id))
|
||||
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_SESSION_NOT_FOUND);
|
||||
rocmtools::GetROCMToolObj()->GetSession(session_id)->GetProfiler()->StartReplayPass(session_id);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
ROCPROFILER_API rocprofiler_status_t rocprofiler_end_replay_pass(rocprofiler_session_id_t session_id) {
|
||||
API_INIT_CHECKER
|
||||
if (!rocmtools::GetROCMToolObj()->GetSession(session_id)->GetProfiler()->HasActivePass())
|
||||
throw rocmtools::Exception(ROCPROFILER_STATUS_ERROR_PASS_NOT_STARTED);
|
||||
rocmtools::GetROCMToolObj()->GetSession(session_id)->GetProfiler()->EndReplayPass();
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
ROCPROFILER_API rocprofiler_status_t rocprofiler_device_profiling_session_create(
|
||||
const char** counter_names, uint64_t num_counters, rocprofiler_session_id_t* session_id,
|
||||
int cpu_index, int gpu_index) {
|
||||
API_METHOD_PREFIX
|
||||
std::vector<std::string> counters(counter_names, counter_names + num_counters);
|
||||
*session_id =
|
||||
rocmtools::GetROCMToolObj()->CreateDeviceProfilingSession(counters, cpu_index, gpu_index);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
// API to start a device profiling session
|
||||
ROCPROFILER_API rocprofiler_status_t
|
||||
rocprofiler_device_profiling_session_start(rocprofiler_session_id_t session_id) {
|
||||
API_METHOD_PREFIX
|
||||
rocmtools::GetROCMToolObj()->GetDeviceProfilingSession(session_id)->StartSession();
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
// API to poll a device profiling session
|
||||
ROCPROFILER_API rocprofiler_status_t rocprofiler_device_profiling_session_poll(
|
||||
rocprofiler_session_id_t session_id, rocprofiler_device_profile_metric_t* data) {
|
||||
API_METHOD_PREFIX
|
||||
rocmtools::GetROCMToolObj()->GetDeviceProfilingSession(session_id)->PollMetrics(data);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
// API to stop a device profiling session
|
||||
ROCPROFILER_API rocprofiler_status_t
|
||||
rocprofiler_device_profiling_session_stop(rocprofiler_session_id_t session_id) {
|
||||
API_METHOD_PREFIX
|
||||
rocmtools::GetROCMToolObj()->GetDeviceProfilingSession(session_id)->StopSession();
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
// API to destroy a device profiling session
|
||||
ROCPROFILER_API rocprofiler_status_t
|
||||
rocprofiler_device_profiling_session_destroy(rocprofiler_session_id_t session_id) {
|
||||
API_METHOD_PREFIX
|
||||
rocmtools::GetROCMToolObj()->DestroyDeviceProfilingSession(session_id);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
|
||||
// static bool started{false};
|
||||
|
||||
extern "C" {
|
||||
|
||||
// TODO(aelwazir): To be enabled if old API is deprecated
|
||||
|
||||
// The HSA_AMD_TOOL_PRIORITY variable must be a constant value type
|
||||
// initialized by the loader itself, not by code during _init. 'extern const'
|
||||
// seems do that although that is not a guarantee.
|
||||
// ROCPROFILER_EXPORT extern const uint32_t HSA_AMD_TOOL_PRIORITY = 25;
|
||||
|
||||
/**
|
||||
* @brief Callback function called upon loading the HSA.
|
||||
* The function updates the core api table function pointers to point to the
|
||||
* interceptor functions in this file.
|
||||
*/
|
||||
// ROCPROFILER_EXPORT bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
|
||||
// uint64_t failed_tool_count, const char* const* failed_tool_names) {
|
||||
// if (started) rocmtools::fatal("HSA Tool started already!");
|
||||
// started = true;
|
||||
// rocmtools::hsa_support::Initialize(table);
|
||||
// return true;
|
||||
// }
|
||||
|
||||
/**
|
||||
* @brief Callback function upon unloading the HSA.
|
||||
*/
|
||||
// ROCPROFILER_EXPORT void OnUnload() {
|
||||
// if (!started) rocmtools::fatal("HSA Tool hasn't started yet!");
|
||||
// rocmtools::hsa_support::Finalize();
|
||||
// }
|
||||
|
||||
} // extern "C"
|
||||
@@ -32,10 +32,10 @@ THE SOFTWARE.
|
||||
#include <string>
|
||||
|
||||
// Tracer messages protocol
|
||||
#define USE_PROF_API
|
||||
#include <prof_protocol.h>
|
||||
|
||||
#include "core/context.h"
|
||||
#include "inc/rocprofiler.h"
|
||||
#include "util/hsa_rsrc_factory.h"
|
||||
|
||||
#define PUBLIC_API __attribute__((visibility("default")))
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
#ifndef _SRC_CORE_ACTIVITY_H
|
||||
#define _SRC_CORE_ACTIVITY_H
|
||||
|
||||
#define ROCPROFILER_V1
|
||||
|
||||
#ifdef ROCP_INTERNAL_BUILD
|
||||
#include "inc/rocprofiler.h"
|
||||
#else
|
||||
|
||||
@@ -0,0 +1,86 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef SRC_CORE_COUNTERS_BASIC_BASIC_COUNTER_H_
|
||||
#define SRC_CORE_COUNTERS_BASIC_BASIC_COUNTER_H_
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <map>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "src/core/counters/counter.h"
|
||||
|
||||
#define ASSERTM(exp, msg) assert(((void)msg, exp))
|
||||
|
||||
namespace Counter {
|
||||
|
||||
class BasicCounter : Counter {
|
||||
public:
|
||||
BasicCounter(uint64_t event_id, std::string block_id, std::string name, std::string description,
|
||||
std::string gpu_name);
|
||||
~BasicCounter();
|
||||
|
||||
uint64_t GetEventId();
|
||||
std::string GetBlockId();
|
||||
std::string GetName();
|
||||
uint64_t GetBasicCounterID();
|
||||
bool GetValue(uint64_t* value, int64_t instance_id);
|
||||
uint64_t GetValue(int64_t instance_id = -1);
|
||||
|
||||
uint64_t avr(int64_t instances_count);
|
||||
uint64_t max(int64_t instances_count);
|
||||
uint64_t min(int64_t instances_count);
|
||||
uint64_t sum(int64_t instances_count);
|
||||
|
||||
private:
|
||||
void* counter_hw_info;
|
||||
std::unordered_map<int64_t, uint64_t> instances_values_;
|
||||
uint64_t event_id_;
|
||||
std::string block_id_;
|
||||
};
|
||||
|
||||
uint64_t operator+(BasicCounter counter, const uint64_t number);
|
||||
uint64_t operator*(BasicCounter counter, const uint64_t number);
|
||||
uint64_t operator/(BasicCounter counter, const uint64_t number);
|
||||
uint64_t operator-(BasicCounter counter, const uint64_t number);
|
||||
uint64_t operator^(BasicCounter counter, const uint64_t number);
|
||||
|
||||
uint64_t operator+(BasicCounter counter1, BasicCounter counter2);
|
||||
uint64_t operator*(BasicCounter counter1, BasicCounter counter2);
|
||||
uint64_t operator/(BasicCounter counter1, BasicCounter counter2);
|
||||
uint64_t operator-(BasicCounter counter1, BasicCounter counter2);
|
||||
uint64_t operator^(BasicCounter counter1, BasicCounter counter2);
|
||||
|
||||
BasicCounter* GetGeneratedBasicCounter(uint64_t id);
|
||||
void ClearBasicCounters();
|
||||
|
||||
uint64_t GetBasicCounter(const char* name, const char* gpu_name);
|
||||
|
||||
} // namespace Counter
|
||||
|
||||
#endif // SRC_CORE_COUNTERS_BASIC_BASIC_COUNTER_H_
|
||||
Some files were not shown because too many files have changed in this diff Show More
Referens i nytt ärende
Block a user