Fixing rocprofilerv2 naming and CMake issues
Change-Id: Ib6d336349a056731e5c0f35151296d6fea671360
[ROCm/rocprofiler commit: dc69331379]
This commit is contained in:
@@ -22,6 +22,11 @@
|
||||
|
||||
cmake_minimum_required(VERSION 3.18.0)
|
||||
|
||||
# Build is not supported on Windows plaform
|
||||
if(WIN32)
|
||||
message(FATAL_ERROR "Windows build is not supported.")
|
||||
endif()
|
||||
|
||||
# Set module name and project name.
|
||||
set(ROCPROFILER_NAME "rocprofiler")
|
||||
set(ROCPROFILER_TARGET "${ROCPROFILER_NAME}64")
|
||||
@@ -37,11 +42,6 @@ if(NOT DEFINED ROCM_PATH)
|
||||
CACHE STRING "Default ROCM installation directory")
|
||||
endif()
|
||||
|
||||
# Build is not supported on Windows plaform
|
||||
if(WIN32)
|
||||
message(FATAL_ERROR "Windows build is not supported.")
|
||||
endif()
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
add_compile_options(-Wall)
|
||||
|
||||
@@ -1,60 +0,0 @@
|
||||
---
|
||||
Language: Cpp
|
||||
# BasedOnStyle: Google
|
||||
AccessModifierOffset: -1
|
||||
ConstructorInitializerIndentWidth: 4
|
||||
AlignEscapedNewlinesLeft: false
|
||||
AlignTrailingComments: true
|
||||
AlignConsecutiveAssignments: false
|
||||
AlignOperands: false
|
||||
AllowAllParametersOfDeclarationOnNextLine: true
|
||||
AllowShortBlocksOnASingleLine: false
|
||||
AllowShortIfStatementsOnASingleLine: true
|
||||
AllowShortLoopsOnASingleLine: true
|
||||
AllowShortFunctionsOnASingleLine: All
|
||||
AlwaysBreakAfterDefinitionReturnType: false
|
||||
AlwaysBreakTemplateDeclarations: false
|
||||
AlwaysBreakBeforeMultilineStrings: true
|
||||
BreakBeforeBinaryOperators: false
|
||||
BreakBeforeTernaryOperators: true
|
||||
BreakConstructorInitializersBeforeComma: false
|
||||
BinPackParameters: true
|
||||
ColumnLimit: 100
|
||||
ConstructorInitializerAllOnOneLineOrOnePerLine: true
|
||||
ExperimentalAutoDetectBinPacking: false
|
||||
IndentCaseLabels: true
|
||||
IndentWrappedFunctionNames: false
|
||||
IndentFunctionDeclarationAfterType: false
|
||||
MaxEmptyLinesToKeep: 2
|
||||
KeepEmptyLinesAtTheStartOfBlocks: false
|
||||
NamespaceIndentation: None
|
||||
ObjCSpaceAfterProperty: false
|
||||
ObjCSpaceBeforeProtocolList: false
|
||||
PenaltyBreakBeforeFirstCallParameter: 1
|
||||
PenaltyBreakComment: 300
|
||||
PenaltyBreakString: 1000
|
||||
PenaltyBreakFirstLessLess: 120
|
||||
PenaltyExcessCharacter: 1000000
|
||||
PenaltyReturnTypeOnItsOwnLine: 200
|
||||
DerivePointerAlignment: false
|
||||
PointerAlignment: Left
|
||||
SpacesBeforeTrailingComments: 2
|
||||
Cpp11BracedListStyle: true
|
||||
Standard: Auto
|
||||
IndentWidth: 2
|
||||
TabWidth: 8
|
||||
UseTab: Never
|
||||
BreakBeforeBraces: Attach
|
||||
SpacesInParentheses: false
|
||||
SpacesInAngles: false
|
||||
SpaceInEmptyParentheses: false
|
||||
SpacesInCStyleCastParentheses: false
|
||||
SpacesInContainerLiterals: true
|
||||
SpaceBeforeAssignmentOperators: true
|
||||
ContinuationIndentWidth: 4
|
||||
CommentPragmas: '^ IWYU pragma:'
|
||||
ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ]
|
||||
SpaceBeforeParens: ControlStatements
|
||||
DisableFormat: false
|
||||
SortIncludes: false
|
||||
...
|
||||
@@ -36,13 +36,13 @@ usage() {
|
||||
echo -e "-o | --output-file For the output file name"
|
||||
echo -e "-d | --output-directory For adding output path where the output files will be saved"
|
||||
echo -e "-fi | --flush-interval For adding a flush interval in milliseconds, every \"flush interval\" the buffers will be flushed"
|
||||
echo -e "\n###ATT Plugin options: ###"
|
||||
if [ $RUN_FROM_BUILD == 1 ]; then
|
||||
ATT_PATH=$ROCPROFV2_DIR/build/plugin/att/att/att.py
|
||||
else
|
||||
ATT_PATH=$ROCPROFV2_DIR/../libexec/rocprofiler/att/att.py
|
||||
fi
|
||||
eval "python3 $ATT_PATH --help"
|
||||
# echo -e "\n###ATT Plugin options: ###"
|
||||
# if [ $RUN_FROM_BUILD == 1 ]; then
|
||||
# ATT_PATH=$ROCPROFV2_DIR/build/plugin/att/att/att.py
|
||||
# else
|
||||
# ATT_PATH=$ROCPROFV2_DIR/../libexec/rocprofiler/att/att.py
|
||||
# fi
|
||||
# eval "python3 $ATT_PATH --help"
|
||||
exit 1
|
||||
}
|
||||
|
||||
@@ -207,11 +207,11 @@ while [ 1 ] ; do
|
||||
exit 1
|
||||
fi
|
||||
if [ "$2" = "att" ] ; then
|
||||
#if [ $RUN_FROM_BUILD == 1 ]; then
|
||||
# export ROCMTOOLS_ATT_PARSER=build/libatt_parser.so
|
||||
#else
|
||||
# export ROCMTOOLS_ATT_PARSER=$ROCPROFV2_DIR/../lib/rocprofiler/libatt_parser.so
|
||||
#fi
|
||||
if [ $RUN_FROM_BUILD == 1 ]; then
|
||||
ATT_PATH=$ROCPROFV2_DIR/build/plugin/att/att/att.py
|
||||
else
|
||||
ATT_PATH=$ROCPROFV2_DIR/../libexec/rocprofiler/att/att.py
|
||||
fi
|
||||
ATT_ARGV=$3
|
||||
shift
|
||||
|
||||
|
||||
@@ -1,60 +0,0 @@
|
||||
---
|
||||
Language: Cpp
|
||||
BasedOnStyle: Google
|
||||
AccessModifierOffset: -1
|
||||
ConstructorInitializerIndentWidth: 4
|
||||
AlignEscapedNewlinesLeft: false
|
||||
AlignTrailingComments: true
|
||||
AlignConsecutiveAssignments: false
|
||||
AlignOperands: false
|
||||
AllowAllParametersOfDeclarationOnNextLine: true
|
||||
AllowShortBlocksOnASingleLine: false
|
||||
AllowShortIfStatementsOnASingleLine: true
|
||||
AllowShortLoopsOnASingleLine: true
|
||||
AllowShortFunctionsOnASingleLine: All
|
||||
AlwaysBreakAfterDefinitionReturnType: false
|
||||
AlwaysBreakTemplateDeclarations: false
|
||||
AlwaysBreakBeforeMultilineStrings: true
|
||||
BreakBeforeBinaryOperators: false
|
||||
BreakBeforeTernaryOperators: true
|
||||
BreakConstructorInitializersBeforeComma: false
|
||||
BinPackParameters: true
|
||||
ColumnLimit: 100
|
||||
ConstructorInitializerAllOnOneLineOrOnePerLine: true
|
||||
ExperimentalAutoDetectBinPacking: false
|
||||
IndentCaseLabels: true
|
||||
IndentWrappedFunctionNames: false
|
||||
IndentFunctionDeclarationAfterType: false
|
||||
MaxEmptyLinesToKeep: 2
|
||||
KeepEmptyLinesAtTheStartOfBlocks: false
|
||||
NamespaceIndentation: None
|
||||
ObjCSpaceAfterProperty: false
|
||||
ObjCSpaceBeforeProtocolList: false
|
||||
PenaltyBreakBeforeFirstCallParameter: 1
|
||||
PenaltyBreakComment: 300
|
||||
PenaltyBreakString: 1000
|
||||
PenaltyBreakFirstLessLess: 120
|
||||
PenaltyExcessCharacter: 1000000
|
||||
PenaltyReturnTypeOnItsOwnLine: 200
|
||||
DerivePointerAlignment: false
|
||||
PointerAlignment: Left
|
||||
SpacesBeforeTrailingComments: 2
|
||||
Cpp11BracedListStyle: true
|
||||
Standard: Auto
|
||||
IndentWidth: 2
|
||||
TabWidth: 8
|
||||
UseTab: Never
|
||||
BreakBeforeBraces: Attach
|
||||
SpacesInParentheses: false
|
||||
SpacesInAngles: false
|
||||
SpaceInEmptyParentheses: false
|
||||
SpacesInCStyleCastParentheses: false
|
||||
SpacesInContainerLiterals: true
|
||||
SpaceBeforeAssignmentOperators: true
|
||||
ContinuationIndentWidth: 4
|
||||
CommentPragmas: '^ IWYU pragma:'
|
||||
ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ]
|
||||
SpaceBeforeParens: ControlStatements
|
||||
DisableFormat: false
|
||||
SortIncludes: false
|
||||
...
|
||||
@@ -1,91 +0,0 @@
|
||||
#!/bin/bash -e
|
||||
|
||||
################################################################################
|
||||
# Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to
|
||||
# deal in the Software without restriction, including without limitation the
|
||||
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
# sell copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
################################################################################
|
||||
|
||||
SRC_DIR=$(dirname "$0")
|
||||
COMPONENT="rocmtools"
|
||||
ROCM_PATH="${ROCM_PATH:=/opt/rocm}"
|
||||
LD_RUNPATH_FLAG=" -Wl,--enable-new-dtags -Wl,--rpath,$ROCM_PATH/lib:$ROCM_PATH/lib64"
|
||||
|
||||
usage() {
|
||||
echo -e "ROCMTools Build Script Usage:"
|
||||
echo -e "\nTo run ./run.sh PARAMs, PARAMs can be the following:"
|
||||
echo -e "-h | --help For showing this message"
|
||||
echo -e "-b | --build For compiling"
|
||||
echo -e "-cb | --clean-build For full clean build"
|
||||
echo -e "-act | --asan-clean-build For compiling with ASAN library attached"
|
||||
exit 1
|
||||
}
|
||||
|
||||
while [ 1 ] ; do
|
||||
if [[ "$1" = "-h" || "$1" = "--help" ]] ; then
|
||||
usage
|
||||
exit 1
|
||||
elif [[ "$1" = "-b" || "$1" = "--build" ]] ; then
|
||||
TO_CLEAN=no
|
||||
shift
|
||||
elif [[ "$1" = "-acb" || "$1" = "--asan-clean-build" ]] ; then
|
||||
ASAN=True TO_CLEAN=yes
|
||||
shift
|
||||
elif [[ "$1" = "-cb" || "$1" = "--clean-build" ]] ; then
|
||||
TO_CLEAN=yes
|
||||
shift
|
||||
elif [[ "$1" = "-"* || "$1" = "--"* ]] ; then
|
||||
echo -e "Wrong option \"$1\", Please use the following options:\n"
|
||||
usage
|
||||
exit 1
|
||||
else
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
umask 022
|
||||
|
||||
if [ -z "$ROCPROFILER_ROOT" ]; then ROCPROFILER_ROOT=$SRC_DIR; fi
|
||||
if [ -z "$BUILD_DIR" ] ; then BUILD_DIR=build; fi
|
||||
if [ -z "$BUILD_TYPE" ] ; then BUILD_TYPE="RelWithDebInfo"; fi
|
||||
if [ -z "$PACKAGE_ROOT" ] ; then PACKAGE_ROOT=$ROCM_PATH; fi
|
||||
if [ -z "$PREFIX_PATH" ] ; then PREFIX_PATH=$PACKAGE_ROOT; fi
|
||||
if [ -z "$HIP_VDI" ] ; then HIP_VDI=0; fi
|
||||
if [ -n "$ROCM_RPATH" ] ; then LD_RUNPATH_FLAG=" -Wl,--enable-new-dtags -Wl,--rpath,${ROCM_RPATH}"; fi
|
||||
if [ -z "$TO_CLEAN" ] ; then TO_CLEAN=yes; fi
|
||||
if [ -z "$ASAN" ] ; then ASAN=False; fi
|
||||
|
||||
ROCPROFILER_ROOT=$(cd $ROCPROFILER_ROOT && echo $PWD)
|
||||
|
||||
if [ "$TO_CLEAN" = "yes" ] ; then rm -rf $BUILD_DIR; fi
|
||||
mkdir -p $BUILD_DIR
|
||||
pushd $BUILD_DIR
|
||||
|
||||
cmake \
|
||||
-DCMAKE_EXPORT_COMPILE_COMMANDS=TRUE \
|
||||
-DCMAKE_BUILD_TYPE=$BUILD_TYPE \
|
||||
-DCMAKE_MODULE_PATH=$ROCM_PATH/hip/cmake \
|
||||
-DCMAKE_PREFIX_PATH="$PREFIX_PATH" \
|
||||
-DCMAKE_INSTALL_PREFIX="$PACKAGE_ROOT" \
|
||||
-DCMAKE_SHARED_LINKER_FLAGS="$LD_RUNPATH_FLAG" \
|
||||
$ROCPROFILER_ROOT
|
||||
|
||||
make -j
|
||||
|
||||
exit 0
|
||||
File diff soppresso perché troppo grande
Carica Diff
@@ -1,92 +0,0 @@
|
||||
# rocmtools
|
||||
|
||||
|
||||
|
||||
## Getting started
|
||||
|
||||
To make it easy for you to get started with GitLab, here's a list of recommended next steps.
|
||||
|
||||
Already a pro? Just edit this README.md and make it your own. Want to make it easy? [Use the template at the bottom](#editing-this-readme)!
|
||||
|
||||
## Add your files
|
||||
|
||||
- [ ] [Create](https://docs.gitlab.com/ee/user/project/repository/web_editor.html#create-a-file) or [upload](https://docs.gitlab.com/ee/user/project/repository/web_editor.html#upload-a-file) files
|
||||
- [ ] [Add files using the command line](https://docs.gitlab.com/ee/gitlab-basics/add-file.html#add-a-file-using-the-command-line) or push an existing Git repository with the following command:
|
||||
|
||||
```
|
||||
cd existing_repo
|
||||
git remote add origin http://gitlab1.amd.com/vinodtipparaju/rocmtools.git
|
||||
git branch -M main
|
||||
git push -uf origin main
|
||||
```
|
||||
|
||||
## Integrate with your tools
|
||||
|
||||
- [ ] [Set up project integrations](http://gitlab1.amd.com/vinodtipparaju/rocmtools/-/settings/integrations)
|
||||
|
||||
## Collaborate with your team
|
||||
|
||||
- [ ] [Invite team members and collaborators](https://docs.gitlab.com/ee/user/project/members/)
|
||||
- [ ] [Create a new merge request](https://docs.gitlab.com/ee/user/project/merge_requests/creating_merge_requests.html)
|
||||
- [ ] [Automatically close issues from merge requests](https://docs.gitlab.com/ee/user/project/issues/managing_issues.html#closing-issues-automatically)
|
||||
- [ ] [Enable merge request approvals](https://docs.gitlab.com/ee/user/project/merge_requests/approvals/)
|
||||
- [ ] [Automatically merge when pipeline succeeds](https://docs.gitlab.com/ee/user/project/merge_requests/merge_when_pipeline_succeeds.html)
|
||||
|
||||
## Test and Deploy
|
||||
|
||||
Use the built-in continuous integration in GitLab.
|
||||
|
||||
- [ ] [Get started with GitLab CI/CD](https://docs.gitlab.com/ee/ci/quick_start/index.html)
|
||||
- [ ] [Analyze your code for known vulnerabilities with Static Application Security Testing(SAST)](https://docs.gitlab.com/ee/user/application_security/sast/)
|
||||
- [ ] [Deploy to Kubernetes, Amazon EC2, or Amazon ECS using Auto Deploy](https://docs.gitlab.com/ee/topics/autodevops/requirements.html)
|
||||
- [ ] [Use pull-based deployments for improved Kubernetes management](https://docs.gitlab.com/ee/user/clusters/agent/)
|
||||
- [ ] [Set up protected environments](https://docs.gitlab.com/ee/ci/environments/protected_environments.html)
|
||||
|
||||
***
|
||||
|
||||
# Editing this README
|
||||
|
||||
When you're ready to make this README your own, just edit this file and use the handy template below (or feel free to structure it however you want - this is just a starting point!). Thank you to [makeareadme.com](https://www.makeareadme.com/) for this template.
|
||||
|
||||
## Suggestions for a good README
|
||||
Every project is different, so consider which of these sections apply to yours. The sections used in the template are suggestions for most open source projects. Also keep in mind that while a README can be too long and detailed, too long is better than too short. If you think your README is too long, consider utilizing another form of documentation rather than cutting out information.
|
||||
|
||||
## Name
|
||||
Choose a self-explaining name for your project.
|
||||
|
||||
## Description
|
||||
Let people know what your project can do specifically. Provide context and add a link to any reference visitors might be unfamiliar with. A list of Features or a Background subsection can also be added here. If there are alternatives to your project, this is a good place to list differentiating factors.
|
||||
|
||||
## Badges
|
||||
On some READMEs, you may see small images that convey metadata, such as whether or not all the tests are passing for the project. You can use Shields to add some to your README. Many services also have instructions for adding a badge.
|
||||
|
||||
## Visuals
|
||||
Depending on what you are making, it can be a good idea to include screenshots or even a video (you'll frequently see GIFs rather than actual videos). Tools like ttygif can help, but check out Asciinema for a more sophisticated method.
|
||||
|
||||
## Installation
|
||||
Within a particular ecosystem, there may be a common way of installing things, such as using Yarn, NuGet, or Homebrew. However, consider the possibility that whoever is reading your README is a novice and would like more guidance. Listing specific steps helps remove ambiguity and gets people to using your project as quickly as possible. If it only runs in a specific context like a particular programming language version or operating system or has dependencies that have to be installed manually, also add a Requirements subsection.
|
||||
|
||||
## Usage
|
||||
Use examples liberally, and show the expected output if you can. It's helpful to have inline the smallest example of usage that you can demonstrate, while providing links to more sophisticated examples if they are too long to reasonably include in the README.
|
||||
|
||||
## Support
|
||||
Tell people where they can go to for help. It can be any combination of an issue tracker, a chat room, an email address, etc.
|
||||
|
||||
## Roadmap
|
||||
If you have ideas for releases in the future, it is a good idea to list them in the README.
|
||||
|
||||
## Contributing
|
||||
State if you are open to contributions and what your requirements are for accepting them.
|
||||
|
||||
For people who want to make changes to your project, it's helpful to have some documentation on how to get started. Perhaps there is a script that they should run or some environment variables that they need to set. Make these steps explicit. These instructions could also be useful to your future self.
|
||||
|
||||
You can also document commands to lint the code or run tests. These steps help to ensure high code quality and reduce the likelihood that the changes inadvertently break something. Having instructions for running tests is especially helpful if it requires external setup, such as starting a Selenium server for testing in a browser.
|
||||
|
||||
## Authors and acknowledgment
|
||||
Show your appreciation to those who have contributed to the project.
|
||||
|
||||
## License
|
||||
For open source projects, say how it is licensed.
|
||||
|
||||
## Project status
|
||||
If you have run out of energy or time for your project, put a note at the top of the README saying that development has slowed down or stopped completely. Someone may choose to fork your project or volunteer to step in as a maintainer or owner, allowing your project to keep going. You can also make an explicit request for maintainers.
|
||||
File binario non mostrato.
@@ -1,25 +0,0 @@
|
||||
################################################################################
|
||||
## Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
##
|
||||
## Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
## of this software and associated documentation files (the "Software"), to
|
||||
## deal in the Software without restriction, including without limitation the
|
||||
## rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
## sell copies of the Software, and to permit persons to whom the Software is
|
||||
## furnished to do so, subject to the following conditions:
|
||||
##
|
||||
## The above copyright notice and this permission notice shall be included in
|
||||
## all copies or substantial portions of the Software.
|
||||
##
|
||||
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
## FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
## IN THE SOFTWARE.
|
||||
################################################################################
|
||||
|
||||
add_subdirectory(file)
|
||||
add_subdirectory(perfetto)
|
||||
add_subdirectory(ctf)
|
||||
@@ -1 +0,0 @@
|
||||
README.html
|
||||
@@ -1,161 +0,0 @@
|
||||
################################################################################
|
||||
## Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
##
|
||||
## Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
## of this software and associated documentation files (the "Software"), to
|
||||
## deal in the Software without restriction, including without limitation the
|
||||
## rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
## sell copies of the Software, and to permit persons to whom the Software is
|
||||
## furnished to do so, subject to the following conditions:
|
||||
##
|
||||
## The above copyright notice and this permission notice shall be included in
|
||||
## all copies or substantial portions of the Software.
|
||||
##
|
||||
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
## FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
## IN THE SOFTWARE.
|
||||
################################################################################
|
||||
|
||||
# Plugin shared object.
|
||||
add_library(ctf_plugin SHARED
|
||||
ctf.cpp
|
||||
plugin.cpp
|
||||
barectf.c "${CMAKE_CURRENT_BINARY_DIR}/barectf.h"
|
||||
${PROJECT_SOURCE_DIR}/src/utils/helper.cpp
|
||||
hsa_begin.cpp.i hsa_end.cpp.i
|
||||
hip_begin.cpp.i hip_end.cpp.i)
|
||||
set_target_properties(ctf_plugin PROPERTIES
|
||||
CXX_VISIBILITY_PRESET hidden
|
||||
LINK_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/../exportmap"
|
||||
LIBRARY_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}")
|
||||
set(METADATA_STREAM_FILE_DIR "${CMAKE_INSTALL_DATADIR}/${PROJECT_NAME}/plugin/ctf")
|
||||
target_compile_definitions(ctf_plugin PRIVATE
|
||||
HIP_PROF_HIP_API_STRING=1
|
||||
__HIP_PLATFORM_HCC__=1
|
||||
CTF_PLUGIN_METADATA_FILE_PATH="${CMAKE_INSTALL_PREFIX}/${METADATA_STREAM_FILE_DIR}/metadata")
|
||||
target_include_directories(ctf_plugin PRIVATE
|
||||
"${PROJECT_SOURCE_DIR}/inc"
|
||||
"${PROJECT_SOURCE_DIR}"
|
||||
"${CMAKE_BINARY_DIR}/src/api"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}")
|
||||
target_link_options(ctf_plugin PRIVATE
|
||||
"-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../exportmap"
|
||||
-Wl,--no-undefined)
|
||||
target_link_libraries(ctf_plugin PRIVATE
|
||||
${ROCPROFILER_TARGET}
|
||||
hsa-runtime64::hsa-runtime64
|
||||
systemd
|
||||
stdc++fs
|
||||
dl)
|
||||
install(TARGETS ctf_plugin LIBRARY
|
||||
DESTINATION "${CMAKE_INSTALL_LIBDIR}/${PROJECT_NAME}"
|
||||
COMPONENT runtime)
|
||||
|
||||
# `gen_api_files.py` and `gen_env_yaml.py` require Python 3,
|
||||
# CppHeaderParser, PyYAML, and barectf.
|
||||
find_package(Python3 COMPONENTS Interpreter REQUIRED)
|
||||
|
||||
message("Python: ${Python3_EXECUTABLE})")
|
||||
|
||||
execute_process(COMMAND Python3::Interpreter -c "print('hello')")
|
||||
|
||||
function(check_py3_pkg pkg_name)
|
||||
execute_process(COMMAND "${Python3_EXECUTABLE}" -c "import ${pkg_name}"
|
||||
RESULT_VARIABLE PY3_IMPORT_RES
|
||||
OUTPUT_QUIET)
|
||||
|
||||
if(NOT (${PY3_IMPORT_RES} EQUAL 0))
|
||||
message(FATAL_ERROR "Cannot find Python 3 package `${pkg_name}`")
|
||||
endif()
|
||||
|
||||
message(STATUS "Found Python 3 package `${pkg_name}`")
|
||||
endfunction()
|
||||
|
||||
check_py3_pkg(CppHeaderParser)
|
||||
check_py3_pkg(yaml)
|
||||
find_program(BARECTF_RES barectf REQUIRED)
|
||||
|
||||
# Generate barectf YAML and C++ files for HSA API.
|
||||
get_property(HSA_RUNTIME_INCLUDE_DIRS
|
||||
TARGET hsa-runtime64::hsa-runtime64
|
||||
PROPERTY INTERFACE_INCLUDE_DIRECTORIES)
|
||||
find_file(HSA_H hsa.h
|
||||
PATHS ${HSA_RUNTIME_INCLUDE_DIRS}
|
||||
PATH_SUFFIXES hsa
|
||||
NO_DEFAULT_PATH
|
||||
REQUIRED)
|
||||
get_filename_component(HSA_RUNTIME_INC_PATH "${HSA_H}" DIRECTORY)
|
||||
add_custom_command(
|
||||
OUTPUT hsa_erts.yaml hsa_begin.cpp.i hsa_end.cpp.i
|
||||
COMMAND ${CMAKE_C_COMPILER} -E "${HSA_RUNTIME_INC_PATH}/hsa.h" -o hsa.h.i
|
||||
COMMAND ${CMAKE_C_COMPILER} -E "${HSA_RUNTIME_INC_PATH}/hsa_ext_amd.h"
|
||||
-o hsa_ext_amd.h.i
|
||||
COMMAND ${CMAKE_COMMAND} -E cat hsa.h.i
|
||||
hsa_ext_amd.h.i
|
||||
"${CMAKE_BINARY_DIR}/src/api/hsa_prof_str.h"
|
||||
> hsa_input.h
|
||||
COMMAND "${Python3_EXECUTABLE}" "${CMAKE_CURRENT_SOURCE_DIR}/gen_api_files.py"
|
||||
hsa hsa_input.h
|
||||
BYPRODUCTS hsa.h.i hsa_ext_amd.h.i hsa_input.h
|
||||
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/gen_api_files.py"
|
||||
"${HSA_RUNTIME_INC_PATH}/hsa.h"
|
||||
"${HSA_RUNTIME_INC_PATH}/hsa_ext_amd.h"
|
||||
"${CMAKE_BINARY_DIR}/src/api/hsa_prof_str.h"
|
||||
COMMENT "Generating HSA API files for the `ctf` plugin...")
|
||||
|
||||
# Generate barectf YAML and C++ files for HIP API.
|
||||
get_property(HIP_INCLUDE_DIRS TARGET hip::amdhip64
|
||||
PROPERTY INTERFACE_INCLUDE_DIRECTORIES)
|
||||
find_file(HIP_RUNTIME_API_H hip_runtime_api.h
|
||||
PATHS ${HIP_INCLUDE_DIRS}
|
||||
PATH_SUFFIXES hip
|
||||
NO_DEFAULT_PATH
|
||||
REQUIRED)
|
||||
find_file(HIP_PROF_STR_H hip_prof_str.h
|
||||
PATHS ${HIP_INCLUDE_DIRS}
|
||||
PATH_SUFFIXES hip hip/amd_detail
|
||||
NO_DEFAULT_PATH
|
||||
REQUIRED)
|
||||
list(TRANSFORM HIP_INCLUDE_DIRS PREPEND -I)
|
||||
add_custom_command(
|
||||
OUTPUT hip_erts.yaml hip_begin.cpp.i hip_end.cpp.i
|
||||
COMMAND ${CMAKE_C_COMPILER} ${HIP_INCLUDE_DIRS}
|
||||
-E "${HIP_RUNTIME_API_H}"
|
||||
-D__HIP_PLATFORM_HCC__=1
|
||||
-D__HIP_ROCclr__=1
|
||||
-o hip_runtime_api.h.i
|
||||
COMMAND cat hip_runtime_api.h.i "${HIP_PROF_STR_H}" > hip_input.h
|
||||
BYPRODUCTS hip_runtime_api.h.i hip_input.h
|
||||
COMMAND "${Python3_EXECUTABLE}" "${CMAKE_CURRENT_SOURCE_DIR}/gen_api_files.py"
|
||||
hip hip_input.h
|
||||
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/gen_api_files.py"
|
||||
"${HIP_RUNTIME_API_H}"
|
||||
"${HIP_PROF_STR_H}"
|
||||
COMMENT "Generating HIP API files for the `ctf` plugin...")
|
||||
|
||||
# Generate `env.yaml` (trace environment for barectf).
|
||||
add_custom_command(
|
||||
OUTPUT env.yaml
|
||||
COMMAND "${Python3_EXECUTABLE}" "${CMAKE_CURRENT_SOURCE_DIR}/gen_env_yaml.py"
|
||||
${PROJECT_VERSION}
|
||||
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/gen_env_yaml.py"
|
||||
COMMENT "Generating `env.yaml`...")
|
||||
|
||||
# Generate raw CTF tracer with barectf.
|
||||
add_custom_command(
|
||||
OUTPUT barectf.c barectf.h barectf-bitfield.h metadata
|
||||
COMMAND "${BARECTF_RES}" gen "-I${CMAKE_CURRENT_BINARY_DIR}"
|
||||
"-I${CMAKE_CURRENT_SOURCE_DIR}"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/config.yaml"
|
||||
DEPENDS hsa_erts.yaml
|
||||
hip_erts.yaml
|
||||
env.yaml
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/config.yaml"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/dst_base.yaml"
|
||||
COMMENT "Generating raw CTF tracer with barectf...")
|
||||
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/metadata"
|
||||
DESTINATION "${METADATA_STREAM_FILE_DIR}")
|
||||
@@ -1,260 +0,0 @@
|
||||
= CTF plugin for ROCMTools
|
||||
13 December 2022
|
||||
Philippe Proulx
|
||||
|
||||
This plugin writes the received ROCMTools tracer and profiler records to
|
||||
a https://diamon.org/ctf/[CTF] trace.
|
||||
|
||||
== Build requirements
|
||||
|
||||
* Python ≥ 3.10
|
||||
* barectf ≥ 3.1.1 (`pip3 install barectf`)
|
||||
* PyYAML (`apt-get install python3-yaml`)
|
||||
* CppHeaderParser (`pip3 install CppHeaderParser`)
|
||||
|
||||
== Usage
|
||||
|
||||
Once installed, you may load this plugin with `rocprofv2` using
|
||||
the `--plugin ctf` command-line arguments.
|
||||
|
||||
This plugin honours the `OUTPUT_PATH` environment variable which
|
||||
`rocprofv2` sets with the `-d` option. If you pass `-d my-dir` to
|
||||
`rocprofv2`, then the plugin will write the CTF trace to the
|
||||
`my-dir/trace` directory.
|
||||
|
||||
IMPORTANT: This plugin performs important cleanup tasks at finalization
|
||||
time, so the resulting CTF trace could be corrupted if the plugin is
|
||||
never finalized.
|
||||
|
||||
Once the plugin is finalized, open the resulting trace directory with
|
||||
either https://babeltrace.org/[Babeltrace{nbsp}2] or
|
||||
https://www.eclipse.org/tracecompass/[Trace Compass] to view or analyze
|
||||
it.
|
||||
|
||||
=== Event record types
|
||||
|
||||
This plugin writes to different CTF data streams having different types.
|
||||
On the file system, the prefix of a data stream file name indicates the
|
||||
data stream type, that is:
|
||||
|
||||
`roctx_`::
|
||||
rocTX messages.
|
||||
+
|
||||
Each CTF event record is named `roctx` and corresponds to a rocTX
|
||||
tracer record.
|
||||
+
|
||||
The fields are:
|
||||
+
|
||||
--
|
||||
[horizontal]
|
||||
`thread_id`::
|
||||
Thread ID.
|
||||
|
||||
`id`::
|
||||
rocTX ID.
|
||||
|
||||
`msg`::
|
||||
rocTX message.
|
||||
--
|
||||
|
||||
`hsa_api_`::
|
||||
HSA API beginning and end function calls.
|
||||
+
|
||||
All CTF event records have the following common fields:
|
||||
+
|
||||
--
|
||||
[horizontal]
|
||||
`thread_id`::
|
||||
Thread ID.
|
||||
|
||||
`queue_id`::
|
||||
Queue ID.
|
||||
|
||||
`agent_id`::
|
||||
Agent ID.
|
||||
|
||||
`correlation_id`::
|
||||
Correlation ID.
|
||||
--
|
||||
+
|
||||
For each ROCMTools HSA API tracer record for the HSA function named
|
||||
`__name__`, this plugin writes two event records:
|
||||
+
|
||||
`__name___begin`:::
|
||||
Beginning of the function call.
|
||||
+
|
||||
The event record contains fields which correspond to most of the
|
||||
parameters of the HSA function.
|
||||
|
||||
`__name___end`:::
|
||||
End of the function call.
|
||||
|
||||
`hip_api_`::
|
||||
HIP API beginning and end function calls.
|
||||
+
|
||||
All CTF event records have the following common fields:
|
||||
+
|
||||
--
|
||||
[horizontal]
|
||||
`thread_id`::
|
||||
Thread ID.
|
||||
|
||||
`queue_id`::
|
||||
Queue ID.
|
||||
|
||||
`agent_id`::
|
||||
Agent ID.
|
||||
|
||||
`correlation_id`::
|
||||
Correlation ID.
|
||||
|
||||
`kernel_name`::
|
||||
Kernel name (empty string if not available).
|
||||
--
|
||||
+
|
||||
For each ROCMTools HIP API tracer record for the HIP function named
|
||||
`__name__`, this plugin writes two event records:
|
||||
+
|
||||
`__name__Begin`:::
|
||||
Beginning of the function call.
|
||||
+
|
||||
The event record contains fields which correspond to most of the
|
||||
parameters of the HIP function.
|
||||
|
||||
`__name__End`:::
|
||||
End of the function call.
|
||||
|
||||
`api_ops_`::
|
||||
HSA/HIP API beginning and end operations.
|
||||
+
|
||||
All CTF event records have the following common fields:
|
||||
+
|
||||
--
|
||||
[horizontal]
|
||||
`thread_id`::
|
||||
Thread ID.
|
||||
|
||||
`queue_id`::
|
||||
Queue ID.
|
||||
|
||||
`agent_id`::
|
||||
Agent ID.
|
||||
|
||||
`correlation_id`::
|
||||
Correlation ID.
|
||||
--
|
||||
+
|
||||
The possible CTF event records are:
|
||||
+
|
||||
`hsa_op_begin`:::
|
||||
HSA API operation beginning.
|
||||
|
||||
`hsa_op_end`:::
|
||||
HSA API operation end.
|
||||
|
||||
`hip_op_begin`:::
|
||||
HIP API operation beginning.
|
||||
+
|
||||
Such an event record also has the field `kernel_name` which is the
|
||||
kernel name (empty string if not available).
|
||||
|
||||
`hip_op_end`:::
|
||||
HIP API operation end.
|
||||
|
||||
`profiler_`::
|
||||
Profiler records.
|
||||
+
|
||||
All CTF event records have the following common fields:
|
||||
+
|
||||
--
|
||||
[horizontal]
|
||||
`dispatch`::
|
||||
Dispatch ID.
|
||||
|
||||
`gpu_id`::
|
||||
GPU ID.
|
||||
|
||||
`queue_id`::
|
||||
Queue ID.
|
||||
|
||||
`queue_index`::
|
||||
Queue index.
|
||||
|
||||
`process_id`::
|
||||
Process ID.
|
||||
|
||||
`thread_id`::
|
||||
Thread ID.
|
||||
|
||||
`kernel_id`::
|
||||
Kernel ID.
|
||||
|
||||
`kernel_name`::
|
||||
Kernel name (empty string if not available).
|
||||
|
||||
`counter_names`::
|
||||
Array of counter names, each one having a corresponding integral
|
||||
value in the `counter_values` field.
|
||||
|
||||
`counter_values`::
|
||||
Array of integers, each one being the value of a counter of which
|
||||
the name is a corresponding string in the `counter_names` field.
|
||||
--
|
||||
+
|
||||
The possible CTF event records are:
|
||||
+
|
||||
`profiler_record`:::
|
||||
Profiler record.
|
||||
|
||||
`profiler_record_with_kernel_properties`:::
|
||||
Profiler record with kernel properties.
|
||||
+
|
||||
Such an event record also has the following fields:
|
||||
+
|
||||
--
|
||||
`grid_size`::
|
||||
Grid size.
|
||||
|
||||
`workgroup_size`::
|
||||
Workgroup size.
|
||||
|
||||
`lds_size`::
|
||||
Local memory size.
|
||||
|
||||
`scratch_size`::
|
||||
Scratch size.
|
||||
|
||||
`arch_vgpr_count`::
|
||||
Architecture vector general purpose register count.
|
||||
|
||||
`accum_vgpr_count`::
|
||||
Accum. vector general purpose register count
|
||||
|
||||
`sgpr_count`::
|
||||
Scalar general purpose register count.
|
||||
|
||||
`wave_size`::
|
||||
Wavefront size.
|
||||
|
||||
`signal_handle`::
|
||||
Signal handle.
|
||||
--
|
||||
|
||||
`hsa_handles_`::
|
||||
HSA handle type mappings.
|
||||
+
|
||||
Each CTF event record is named `hsa_handle_type` and maps an HSA handle
|
||||
to a processor unit type (CPU or GPU).
|
||||
+
|
||||
The clock value of those event records is irrelevant (always{nbsp}0).
|
||||
+
|
||||
The fields are:
|
||||
+
|
||||
--
|
||||
[horizontal]
|
||||
`handle`::
|
||||
HSA handle.
|
||||
|
||||
`type`::
|
||||
Processor unit type (`CPU` or `GPU` enumeration label).
|
||||
--
|
||||
@@ -1,67 +0,0 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef PLUGIN_CTF_BARECTF_EVENT_RECORD_H
|
||||
#define PLUGIN_CTF_BARECTF_EVENT_RECORD_H
|
||||
|
||||
#include <memory>
|
||||
#include <cstdint>
|
||||
|
||||
struct barectf_default_ctx;
|
||||
|
||||
namespace rocm_ctf {
|
||||
|
||||
// Abstract base class of any barectf event record.
|
||||
//
|
||||
// A concrete event record class must implement Write() which must call
|
||||
// a corresponding barectf tracing function.
|
||||
//
|
||||
// `CtxT` is the specific type of the barectf context which Write()
|
||||
// receives.
|
||||
template <typename CtxT> class BarectfEventRecord {
|
||||
protected:
|
||||
// Builds a barectf event record having the clock value `clock_val`.
|
||||
explicit BarectfEventRecord(const std::uint64_t clock_val) noexcept : clock_val_{clock_val} {}
|
||||
|
||||
public:
|
||||
// Shared pointer to const barectf event record.
|
||||
using SP = std::shared_ptr<const BarectfEventRecord>;
|
||||
|
||||
virtual ~BarectfEventRecord() = default;
|
||||
|
||||
// Disabled copy operations to make this class simpler.
|
||||
BarectfEventRecord(const BarectfEventRecord&) = delete;
|
||||
BarectfEventRecord& operator=(const BarectfEventRecord&) = delete;
|
||||
|
||||
// Clock value of this event record.
|
||||
std::uint64_t GetClockVal() const noexcept { return clock_val_; }
|
||||
|
||||
// Calls a corresponding barectf tracing function using the barectf
|
||||
// context `barectf_ctx`.
|
||||
virtual void Write(CtxT& barectf_ctx) const = 0;
|
||||
|
||||
private:
|
||||
// Clock value.
|
||||
std::uint64_t clock_val_;
|
||||
};
|
||||
|
||||
} // namespace rocm_ctf
|
||||
|
||||
#endif // PLUGIN_CTF_BARECTF_EVENT_RECORD_H
|
||||
@@ -1,192 +0,0 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef PLUGIN_CTF_BARECTF_PLATFORM_H
|
||||
#define PLUGIN_CTF_BARECTF_PLATFORM_H
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstdint>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
#include <functional>
|
||||
#include <experimental/filesystem>
|
||||
|
||||
#include "barectf.h"
|
||||
|
||||
namespace rocm_ctf {
|
||||
|
||||
template <typename> class BarectfWriter;
|
||||
|
||||
// A barectf platform for any barectf writer.
|
||||
//
|
||||
// The user doesn't deal directly with such an object: it's closely
|
||||
// coupled with a barectf writer.
|
||||
//
|
||||
// Each platform takes care of a single CTF data stream file.
|
||||
//
|
||||
// After building such a platform, get the raw barectf context with
|
||||
// GetCtx() to call tracing functions. The platform must still exist
|
||||
// when calling a tracing function.
|
||||
//
|
||||
// Such a platform opens the data stream file on construction and closes
|
||||
// it on destruction.
|
||||
//
|
||||
// `DescrT` is the specific barectf platform descriptor. It must be a
|
||||
// structure having:
|
||||
//
|
||||
// `Ctx`:
|
||||
// Specific barectf context type.
|
||||
//
|
||||
// `static void OpenPacket(Ctx&)`:
|
||||
// Packet opening function.
|
||||
//
|
||||
// `static void ClosePacket(Ctx&)`:
|
||||
// Packet closing function.
|
||||
template <typename DescrT> class BarectfPlatform final {
|
||||
friend class BarectfWriter<DescrT>;
|
||||
|
||||
private:
|
||||
// Builds a barectf platform.
|
||||
//
|
||||
// The platform writes CTF packets of size `packet_size` bytes to the
|
||||
// CTF data stream file `data_stream_file_path`.
|
||||
//
|
||||
// For each event record to write, the platform reads `clock_val` to
|
||||
// know the current timestamp.
|
||||
explicit BarectfPlatform(const std::size_t packet_size,
|
||||
const std::experimental::filesystem::path& data_stream_file_path,
|
||||
const std::uint64_t& clock_val)
|
||||
: clock_val_{&clock_val}, buffer_(packet_size) {
|
||||
// Initialize barectf callbacks.
|
||||
barectf_platform_callbacks callbacks;
|
||||
|
||||
callbacks.default_clock_get_value = GetClockCb;
|
||||
callbacks.is_backend_full = IsBackendFullCb;
|
||||
callbacks.open_packet = OpenPacketCb;
|
||||
callbacks.close_packet = ClosePacketCb;
|
||||
|
||||
// Configure exceptions so that stream operations throw instead of
|
||||
// just setting flags on error.
|
||||
output_.exceptions(std::ofstream::failbit | std::ofstream::badbit);
|
||||
|
||||
// Open CTF data stream output file in binary mode.
|
||||
output_.open(data_stream_file_path, std::ios_base::out | std::ios_base::binary);
|
||||
|
||||
// Initialize the raw barectf context.
|
||||
barectf_init(&ctx_, buffer_.data(), buffer_.size(), callbacks, this);
|
||||
|
||||
// Open the initial packet.
|
||||
OpenPacketCb();
|
||||
}
|
||||
|
||||
public:
|
||||
// Disabled copy operations to make this class simpler.
|
||||
BarectfPlatform(const BarectfPlatform&) = delete;
|
||||
BarectfPlatform& operator=(const BarectfPlatform&) = delete;
|
||||
|
||||
// Closes/writes any last CTF packet and closes the data stream file.
|
||||
~BarectfPlatform() {
|
||||
if (barectf_packet_is_open(&ctx_) && !barectf_packet_is_empty(&ctx_)) {
|
||||
// Close and write last CTF packet (not empty).
|
||||
ClosePacketCb();
|
||||
}
|
||||
|
||||
// Close data stream output file.
|
||||
output_.close();
|
||||
}
|
||||
|
||||
// Returns the raw barectf context of this platform.
|
||||
const typename DescrT::Ctx& GetCtx() const noexcept { return ctx_; }
|
||||
typename DescrT::Ctx& GetCtx() noexcept { return ctx_; }
|
||||
|
||||
private:
|
||||
static BarectfPlatform& AsPlatform(void* const data) noexcept {
|
||||
return *static_cast<BarectfPlatform*>(data);
|
||||
}
|
||||
|
||||
// Four callbacks for barectf.
|
||||
//
|
||||
// Those four functions receive an instance of this class as `data`.
|
||||
|
||||
static std::uint64_t GetClockCb(void* const data) noexcept {
|
||||
// Forward to instance method.
|
||||
return AsPlatform(data).GetClockCb();
|
||||
}
|
||||
|
||||
static int IsBackendFullCb(void* const data) noexcept {
|
||||
// Forward to instance method.
|
||||
return AsPlatform(data).IsBackendFullCb();
|
||||
}
|
||||
|
||||
static void OpenPacketCb(void* const data) {
|
||||
// Forward to instance method.
|
||||
AsPlatform(data).OpenPacketCb();
|
||||
}
|
||||
|
||||
static void ClosePacketCb(void* const data) {
|
||||
// Forward to instance method.
|
||||
AsPlatform(data).ClosePacketCb();
|
||||
}
|
||||
|
||||
// Instance version of the "get clock value" callback.
|
||||
std::uint64_t GetClockCb() noexcept { return *clock_val_; }
|
||||
|
||||
// Instance version of the "is the back end full?" callback.
|
||||
int IsBackendFullCb() noexcept {
|
||||
// Never full.
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Instance version of the "open packet" callback.
|
||||
void OpenPacketCb() {
|
||||
// Forward to user (descriptor) function.
|
||||
DescrT::OpenPacket(ctx_);
|
||||
}
|
||||
|
||||
// Instance version of the "close packet" callback.
|
||||
void ClosePacketCb() {
|
||||
// Forward to user (descriptor) function to finalize the packet.
|
||||
DescrT::ClosePacket(ctx_);
|
||||
|
||||
// Write to the data stream file.
|
||||
WriteCurrentPacket();
|
||||
}
|
||||
|
||||
// Writes the current CTF packet (`buffer_`) to the data stream file.
|
||||
void WriteCurrentPacket() {
|
||||
output_.write(reinterpret_cast<const char*>(buffer_.data()), buffer_.size());
|
||||
}
|
||||
|
||||
// Clock value pointer.
|
||||
const std::uint64_t* clock_val_;
|
||||
|
||||
// CTF data stream output file stream.
|
||||
std::ofstream output_;
|
||||
|
||||
// Raw barectf context.
|
||||
typename DescrT::Ctx ctx_;
|
||||
|
||||
// CTF packet buffer.
|
||||
std::vector<std::uint8_t> buffer_;
|
||||
};
|
||||
|
||||
} // namespace rocm_ctf
|
||||
|
||||
#endif // PLUGIN_CTF_BARECTF_PLATFORM_H
|
||||
@@ -1,124 +0,0 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef PLUGIN_CTF_BARECTF_TRACER_H
|
||||
#define PLUGIN_CTF_BARECTF_TRACER_H
|
||||
|
||||
#include <cstdlib>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <experimental/filesystem>
|
||||
|
||||
#include "barectf_event_record.h"
|
||||
#include "barectf_writer.h"
|
||||
|
||||
namespace rocm_ctf {
|
||||
|
||||
// A barectf tracer offers the AddEventRecord() method to add an event
|
||||
// record which it will ultimately write to some CTF data stream file
|
||||
// within some specified CTF trace directory.
|
||||
//
|
||||
// One important feature of such a tracer is that you don't need to add
|
||||
// event records in order of time. A barectf tracer manages one or more
|
||||
// barectf writers, each one managing a single barectf platform/context
|
||||
// (CTF data stream file).
|
||||
//
|
||||
// All the CTF data stream files which a barectf tracer indirectly
|
||||
// manages share a common specified prefix. You must not use the same
|
||||
// prefix for two barectf tracers writing to the same CTF trace
|
||||
// directory.
|
||||
//
|
||||
// `PlatformDescrT` is the specific barectf platform descriptor (see the
|
||||
// documentation of the `BarectfPlatform` class template).
|
||||
template <typename PlatformDescrT> class BarectfTracer final {
|
||||
public:
|
||||
// Specific barectf event record type.
|
||||
using EventRecord = typename BarectfWriter<PlatformDescrT>::EventRecord;
|
||||
|
||||
// Builds a barectf tracer to write CTF packets of size `packet_size`
|
||||
// bytes to CTF data stream files having the prefix
|
||||
// `data_stream_file_name_prefix` within the CTF trace directory
|
||||
// `trace_dir`.
|
||||
//
|
||||
// The internal barectf writers manage event record queues having a
|
||||
// maximum size of `max_writer_queue_size`. Increasing
|
||||
// `max_writer_queue_size` increases the memory footprint of the
|
||||
// tracer, but may reduce the number of required CTF data stream files
|
||||
// to ensure time-ordered event records.
|
||||
explicit BarectfTracer(const std::size_t packet_size,
|
||||
std::experimental::filesystem::path trace_dir,
|
||||
const char* const data_stream_file_name_prefix,
|
||||
const std::size_t max_writer_queue_size = 200)
|
||||
: packet_size_{packet_size},
|
||||
trace_dir_{std::move(trace_dir)},
|
||||
data_stream_file_name_prefix_{data_stream_file_name_prefix},
|
||||
max_writer_queue_size_{max_writer_queue_size} {}
|
||||
|
||||
// Disabled copy operations to make this class simpler.
|
||||
BarectfTracer(const BarectfTracer&) = delete;
|
||||
BarectfTracer& operator=(const BarectfTracer&) = delete;
|
||||
|
||||
// Adds the event record `event_record` to this tracer.
|
||||
//
|
||||
// The clock value of `event_record` may be less than the clock value
|
||||
// of previously added event records.
|
||||
void AddEventRecord(typename EventRecord::SP event_record) {
|
||||
// Try to find a barectf writer to accept `event_record`.
|
||||
for (auto& writer : writers_) {
|
||||
if (writer->MayAddEventRecord(*event_record)) {
|
||||
// Found: add the event record to this writer and return.
|
||||
writer->AddEventRecord(std::move(event_record));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// No barectf writer found: create a new one.
|
||||
std::ostringstream ss;
|
||||
|
||||
ss << data_stream_file_name_prefix_ << writers_.size();
|
||||
writers_.emplace_back(new BarectfWriter<PlatformDescrT>{packet_size_, trace_dir_ / ss.str(),
|
||||
max_writer_queue_size_});
|
||||
|
||||
// Add the event record to this new barectf writer.
|
||||
assert(writers_.back()->MayAddEventRecord(*event_record));
|
||||
writers_.back()->AddEventRecord(std::move(event_record));
|
||||
}
|
||||
|
||||
private:
|
||||
// CTF packet size.
|
||||
std::size_t packet_size_;
|
||||
|
||||
// CTF trace directory.
|
||||
std::experimental::filesystem::path trace_dir_;
|
||||
|
||||
// CTF data stream file name prefix.
|
||||
std::string data_stream_file_name_prefix_;
|
||||
|
||||
// Maximum event record queue size of a barectf writer.
|
||||
std::size_t max_writer_queue_size_;
|
||||
|
||||
// barectf writers.
|
||||
std::vector<std::unique_ptr<BarectfWriter<PlatformDescrT>>> writers_;
|
||||
};
|
||||
|
||||
} // namespace rocm_ctf
|
||||
|
||||
#endif // PLUGIN_CTF_BARECTF_TRACER_H
|
||||
@@ -1,178 +0,0 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef PLUGIN_CTF_BARECTF_WRITER_H
|
||||
#define PLUGIN_CTF_BARECTF_WRITER_H
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdlib>
|
||||
#include <cstdint>
|
||||
#include <cassert>
|
||||
#include <queue>
|
||||
#include <utility>
|
||||
#include <experimental/filesystem>
|
||||
|
||||
#include "barectf_platform.h"
|
||||
#include "barectf_event_record.h"
|
||||
|
||||
namespace rocm_ctf {
|
||||
|
||||
template <typename> class BarectfTracer;
|
||||
|
||||
// A barectf writer manages a queue of event records, writing them
|
||||
// through barectf when needed.
|
||||
//
|
||||
// Such an object makes it possible to add some event record with a
|
||||
// clock value V and then some other event record of which the clock
|
||||
// value is less than V. The barectf writer ensures that actual barectf
|
||||
// tracing functions are called chronologically, a requirement of CTF.
|
||||
//
|
||||
// A barectf writer keeps event records in memory until its queue is
|
||||
// full (you provide the maximum queue size at construction time), in
|
||||
// which case it writes the oldest event record to some current CTF
|
||||
// packet through a barectf tracing function.
|
||||
//
|
||||
// Call MayAddEventRecord() to check whether or not you may add an event
|
||||
// record to the barectf writer, and then AddEventRecord() if you may.
|
||||
//
|
||||
// A barectf writer writes all its remaining event records on
|
||||
// destruction.
|
||||
//
|
||||
// `PlatformDescrT` is the specific barectf platform descriptor (see the
|
||||
// documentation of the `BarectfPlatform` class template).
|
||||
template <typename PlatformDescrT> class BarectfWriter final {
|
||||
friend class BarectfTracer<PlatformDescrT>;
|
||||
|
||||
public:
|
||||
// Specific barectf event record type.
|
||||
using EventRecord = BarectfEventRecord<typename PlatformDescrT::Ctx>;
|
||||
|
||||
private:
|
||||
// Builds a barectf writer to write CTF packets of size `packet_size`
|
||||
// bytes to the CTF data stream file `data_stream_file_path`.
|
||||
//
|
||||
// The built barectf writer manages an event record queue having a
|
||||
// maximum size of `max_queue_size`.
|
||||
explicit BarectfWriter(const std::size_t packet_size,
|
||||
const std::experimental::filesystem::path& data_stream_file_path,
|
||||
const std::size_t max_queue_size)
|
||||
: platform_{packet_size, data_stream_file_path, clock_val_},
|
||||
max_queue_size_{max_queue_size} {}
|
||||
|
||||
public:
|
||||
// Writes all its remaining event records.
|
||||
~BarectfWriter() {
|
||||
// Write all the remaining event records from the oldest to the
|
||||
// newest.
|
||||
while (!queue_.empty()) {
|
||||
WriteOldestEventRecord();
|
||||
}
|
||||
}
|
||||
|
||||
// Disabled copy operations to make this class simpler.
|
||||
BarectfWriter(const BarectfWriter&) = delete;
|
||||
BarectfWriter& operator=(const BarectfWriter&) = delete;
|
||||
|
||||
// Whether or not you may add the event record `event_record` to this
|
||||
// writer with AddEventRecord().
|
||||
bool MayAddEventRecord(const EventRecord& event_record) const noexcept {
|
||||
if (queue_.empty()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// One may only add an event record if its clock value is greater
|
||||
// than or equal to the clock value of the most recently written
|
||||
// event record.
|
||||
return event_record.GetClockVal() >= clock_val_;
|
||||
}
|
||||
|
||||
// Adds the event record `event_record` to this writer.
|
||||
//
|
||||
// `MayAddEventRecord(*event_record)` must return `true`.
|
||||
void AddEventRecord(typename EventRecord::SP event_record) {
|
||||
assert(MayAddEventRecord(*event_record) && "May add event record");
|
||||
|
||||
// Add event record to queue.
|
||||
queue_.emplace(std::move(event_record));
|
||||
|
||||
if (queue_.size() > max_queue_size_) {
|
||||
// Queue is too large: write the oldest event record now to
|
||||
// satisfy the requirement.
|
||||
WriteOldestEventRecord();
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
// Comparison type for `queue_`.
|
||||
struct EventRecordQueueCompare final {
|
||||
bool operator()(const typename EventRecord::SP& left,
|
||||
const typename EventRecord::SP& right) const noexcept {
|
||||
// "Greater than" so that the top element of the queue is the
|
||||
// oldest event record.
|
||||
return left->GetClockVal() > right->GetClockVal();
|
||||
}
|
||||
};
|
||||
|
||||
// Oldest event record within `queue_`.
|
||||
//
|
||||
// `queue_` must not be empty.
|
||||
const EventRecord& GetOldestEventRecord() const noexcept {
|
||||
assert(!queue_.empty() && "Queue isn't empty");
|
||||
return *queue_.top();
|
||||
}
|
||||
|
||||
// Writes the oldest event record through a barectf tracing function
|
||||
// and removes it from the event record queue.
|
||||
void WriteOldestEventRecord() {
|
||||
auto& oldest_event_record = GetOldestEventRecord();
|
||||
|
||||
// When calling a barectf tracing function, it calls the clock value
|
||||
// accessor callback of the platform, which itself reads from
|
||||
// `clock_val_`.
|
||||
clock_val_ = oldest_event_record.GetClockVal();
|
||||
|
||||
// Forward to a barectf tracing function.
|
||||
oldest_event_record.Write(platform_.GetCtx());
|
||||
|
||||
// Remove from queue.
|
||||
queue_.pop();
|
||||
}
|
||||
|
||||
// barectf platform (manages file I/O).
|
||||
BarectfPlatform<PlatformDescrT> platform_;
|
||||
|
||||
// Current clock value for `platform_`.
|
||||
//
|
||||
// This is also the clock value of the most recently written event
|
||||
// record, therefore that MayAddEventRecord() can rely on this.
|
||||
std::uint64_t clock_val_ = 0;
|
||||
|
||||
// Maximum size of `queue_` below.
|
||||
std::size_t max_queue_size_;
|
||||
|
||||
// Event record queue.
|
||||
std::priority_queue<typename EventRecord::SP, std::vector<typename EventRecord::SP>,
|
||||
EventRecordQueueCompare>
|
||||
queue_;
|
||||
};
|
||||
|
||||
} // namespace rocm_ctf
|
||||
|
||||
#endif // PLUGIN_CTF_BARECTF_WRITER_H
|
||||
@@ -1,165 +0,0 @@
|
||||
################################################################################
|
||||
# Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to
|
||||
# deal in the Software without restriction, including without limitation the
|
||||
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
# sell copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
################################################################################
|
||||
|
||||
%YAML 1.2
|
||||
--- !<tag:barectf.org,2020/3/config>
|
||||
trace:
|
||||
$include:
|
||||
# Environment (generated file).
|
||||
- env.yaml
|
||||
type:
|
||||
$include:
|
||||
- stdint.yaml
|
||||
- stdmisc.yaml
|
||||
native-byte-order: little-endian
|
||||
clock-types:
|
||||
default:
|
||||
origin-is-unix-epoch: true
|
||||
$c-type: uint64_t
|
||||
data-stream-types:
|
||||
hsa_api:
|
||||
event-record-common-context-field-type:
|
||||
class: struct
|
||||
members:
|
||||
- _thread_id: uint32
|
||||
- _queue_id: uint32
|
||||
- _agent_id: uint32
|
||||
- _correlation_id: uint64
|
||||
$include:
|
||||
# Base.
|
||||
- dst_base.yaml
|
||||
|
||||
# HSA API event record types (generated file).
|
||||
- hsa_erts.yaml
|
||||
hip_api:
|
||||
event-record-common-context-field-type:
|
||||
class: struct
|
||||
members:
|
||||
- _thread_id: uint32
|
||||
- _queue_id: uint32
|
||||
- _agent_id: uint32
|
||||
- _correlation_id: uint64
|
||||
- _kernel_name: str
|
||||
$include:
|
||||
# Base.
|
||||
- dst_base.yaml
|
||||
|
||||
# HIP API event record types (generated file).
|
||||
- hip_erts.yaml
|
||||
roctx:
|
||||
$include:
|
||||
# Base
|
||||
- dst_base.yaml
|
||||
event-record-common-context-field-type:
|
||||
class: struct
|
||||
members:
|
||||
- _thread_id: uint32
|
||||
event-record-types:
|
||||
roctx:
|
||||
payload-field-type:
|
||||
class: struct
|
||||
members:
|
||||
- _id: sint64
|
||||
- _msg: str
|
||||
hsa_handles:
|
||||
$include:
|
||||
# Base.
|
||||
- dst_base.yaml
|
||||
event-record-types:
|
||||
hsa_handle_type:
|
||||
payload-field-type:
|
||||
class: struct
|
||||
members:
|
||||
- _handle: uint64
|
||||
- _type:
|
||||
field-type:
|
||||
class: uenum
|
||||
size: 8
|
||||
mappings:
|
||||
CPU: [0]
|
||||
GPU: [1]
|
||||
api_ops:
|
||||
$include:
|
||||
# Base.
|
||||
- dst_base.yaml
|
||||
event-record-common-context-field-type:
|
||||
class: struct
|
||||
members:
|
||||
- _thread_id: uint32
|
||||
- _queue_id: uint32
|
||||
- _agent_id: uint32
|
||||
- _correlation_id: uint64
|
||||
event-record-types:
|
||||
hsa_op_begin:
|
||||
payload-field-type:
|
||||
class: struct
|
||||
hsa_op_end:
|
||||
payload-field-type:
|
||||
class: struct
|
||||
hip_op_begin:
|
||||
payload-field-type:
|
||||
class: struct
|
||||
members:
|
||||
- _kernel_name: str
|
||||
hip_op_end:
|
||||
payload-field-type:
|
||||
class: struct
|
||||
profiler:
|
||||
$include:
|
||||
# Base.
|
||||
- dst_base.yaml
|
||||
event-record-common-context-field-type:
|
||||
class: struct
|
||||
members:
|
||||
- _dispatch: uint64
|
||||
- _gpu_id: uint64
|
||||
- _queue_id: uint64
|
||||
- _queue_index: uint64
|
||||
- _process_id: uint32
|
||||
- _thread_id: uint32
|
||||
- _kernel_id: uint64
|
||||
- _kernel_name: str
|
||||
- _counter_names:
|
||||
field-type:
|
||||
class: dynamic-array
|
||||
element-field-type: str
|
||||
- _counter_values:
|
||||
field-type:
|
||||
class: dynamic-array
|
||||
element-field-type: uint64
|
||||
event-record-types:
|
||||
profiler_record:
|
||||
payload-field-type:
|
||||
class: struct
|
||||
profiler_record_with_kernel_properties:
|
||||
payload-field-type:
|
||||
class: struct
|
||||
members:
|
||||
- _grid_size: uint64
|
||||
- _workgroup_size: uint64
|
||||
- _lds_size: uint64
|
||||
- _scratch_size: uint64
|
||||
- _arch_vgpr_count: uint64
|
||||
- _accum_vgpr_count: uint64
|
||||
- _sgpr_count: uint64
|
||||
- _wave_size: uint64
|
||||
- _signal_handle: uint64
|
||||
@@ -1,107 +0,0 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include <cassert>
|
||||
#include <stdexcept>
|
||||
#include <iostream>
|
||||
#include <experimental/filesystem>
|
||||
|
||||
#include "rocprofiler.h"
|
||||
#include "rocprofiler_plugin.h"
|
||||
|
||||
#include "plugin.h"
|
||||
|
||||
namespace fs = std::experimental::filesystem;
|
||||
|
||||
namespace {
|
||||
|
||||
// Global plugin instance
|
||||
rocm_ctf::Plugin* the_plugin = nullptr;
|
||||
|
||||
} // namespace
|
||||
|
||||
ROCPROFILER_EXPORT int rocprofiler_plugin_initialize(const uint32_t rocprofiler_major_version,
|
||||
const uint32_t rocprofiler_minor_version) {
|
||||
if (rocprofiler_major_version != ROCPROFILER_VERSION_MAJOR ||
|
||||
rocprofiler_minor_version < ROCPROFILER_VERSION_MINOR) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (the_plugin) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
const auto output_dir = getenv("OUTPUT_PATH");
|
||||
|
||||
if (!output_dir) {
|
||||
std::cerr << "rocprofiler_plugin_initialize(): "
|
||||
<< "`OUTPUT_PATH` environment variable isn't set" << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Create the plugin instance.
|
||||
try {
|
||||
the_plugin = new rocm_ctf::Plugin{256 * 1024, fs::path{output_dir} / "trace",
|
||||
CTF_PLUGIN_METADATA_FILE_PATH};
|
||||
} catch (const std::exception& exc) {
|
||||
std::cerr << "rocprofiler_plugin_initialize(): " << exc.what() << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
ROCPROFILER_EXPORT void rocprofiler_plugin_finalize() {
|
||||
delete the_plugin;
|
||||
the_plugin = nullptr;
|
||||
}
|
||||
|
||||
ROCPROFILER_EXPORT int rocprofiler_plugin_write_buffer_records(
|
||||
const rocprofiler_record_header_t* const begin, const rocprofiler_record_header_t* const end,
|
||||
const rocprofiler_session_id_t session_id, const rocprofiler_buffer_id_t buffer_id) {
|
||||
assert(the_plugin);
|
||||
|
||||
try {
|
||||
the_plugin->HandleBufferRecords(begin, end, session_id, buffer_id);
|
||||
} catch (const std::exception& exc) {
|
||||
std::cerr << "rocprofiler_plugin_write_buffer_records(): " << exc.what() << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
ROCPROFILER_EXPORT int rocprofiler_plugin_write_record(const rocprofiler_record_tracer_t record,
|
||||
const rocprofiler_session_id_t session_id) {
|
||||
assert(the_plugin);
|
||||
|
||||
if (record.header.id.handle == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
try {
|
||||
the_plugin->HandleTracerRecord(record, session_id);
|
||||
} catch (const std::exception& exc) {
|
||||
std::cerr << "rocprofiler_plugin_write_record(): " << exc.what() << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1,28 +0,0 @@
|
||||
################################################################################
|
||||
# Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to
|
||||
# deal in the Software without restriction, including without limitation the
|
||||
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
# sell copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
################################################################################
|
||||
|
||||
$default-clock-type-name: default
|
||||
$features:
|
||||
packet:
|
||||
beginning-timestamp-field-type: false
|
||||
discarded-event-records-counter-snapshot-field-type: false
|
||||
end-timestamp-field-type: false
|
||||
@@ -1,645 +0,0 @@
|
||||
################################################################################
|
||||
# Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to
|
||||
# deal in the Software without restriction, including without limitation the
|
||||
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
# sell copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
################################################################################
|
||||
|
||||
import os
|
||||
import os.path
|
||||
import sys
|
||||
import re
|
||||
import yaml
|
||||
import CppHeaderParser
|
||||
|
||||
|
||||
# Numeric field type (abstract).
|
||||
class _NumericFt:
|
||||
# Returns the C++ expression to cast the expression `expr` to the C
|
||||
# type of this field type.
|
||||
def cast(self, expr):
|
||||
return f'static_cast<{self.c_type}>({expr})'
|
||||
|
||||
|
||||
# Integer field type (abstract).
|
||||
class _IntFt(_NumericFt):
|
||||
def __init__(self, size, pref_disp_base='dec'):
|
||||
self._size = size
|
||||
self._pref_disp_base = pref_disp_base
|
||||
|
||||
# Size (bits).
|
||||
@property
|
||||
def size(self):
|
||||
return self._size
|
||||
|
||||
# Preferred display base (`dec` or `hex`).
|
||||
@property
|
||||
def pref_disp_base(self):
|
||||
return self._pref_disp_base
|
||||
|
||||
# Equivalent barectf field type in YAML.
|
||||
@property
|
||||
def barectf_yaml(self):
|
||||
return {
|
||||
'size': self._size,
|
||||
'preferred-display-base': self._pref_disp_base,
|
||||
}
|
||||
|
||||
|
||||
# Signed integer field type.
|
||||
class _SIntFt(_IntFt):
|
||||
# Equivalent barectf field type in YAML.
|
||||
@property
|
||||
def barectf_yaml(self):
|
||||
ret = super().barectf_yaml
|
||||
ret['class'] = 'sint'
|
||||
return ret
|
||||
|
||||
# Equivalent C type
|
||||
@property
|
||||
def c_type(self):
|
||||
return f'std::int{self._size}_t'
|
||||
|
||||
|
||||
# Unsigned integer field type.
|
||||
class _UIntFt(_IntFt):
|
||||
# Equivalent barectf field type in YAML.
|
||||
@property
|
||||
def barectf_yaml(self):
|
||||
ret = super().barectf_yaml
|
||||
ret['class'] = 'uint'
|
||||
return ret
|
||||
|
||||
# Equivalent C type.
|
||||
@property
|
||||
def c_type(self):
|
||||
return f'std::uint{self._size}_t'
|
||||
|
||||
|
||||
# Pointer field type.
|
||||
class _PointerFt(_UIntFt):
|
||||
def __init__(self):
|
||||
super().__init__(64, 'hex')
|
||||
|
||||
# Returns the C++ expression to cast the expression `expr` to the C
|
||||
# type of this field type.
|
||||
def cast(self, expr):
|
||||
return f'static_cast<{self.c_type}>(reinterpret_cast<std::uintptr_t>({expr}))'
|
||||
|
||||
|
||||
# Enumeration field type (abstract).
|
||||
class _EnumFt(_IntFt):
|
||||
def __init__(self, size, mappings):
|
||||
super().__init__(size)
|
||||
self._mappings = mappings.copy()
|
||||
|
||||
# Mappings (names to integers).
|
||||
@property
|
||||
def mappings(self):
|
||||
return self._mappings
|
||||
|
||||
# Equivalent barectf field type in YAML.
|
||||
@property
|
||||
def barectf_yaml(self):
|
||||
ret = super().barectf_yaml
|
||||
mappings = {}
|
||||
|
||||
for name, val in self._mappings.items():
|
||||
mappings[name] = [val]
|
||||
|
||||
ret['mappings'] = mappings
|
||||
return ret
|
||||
|
||||
|
||||
# Unsigned enumeration field type.
|
||||
class _UEnumFt(_EnumFt, _UIntFt):
|
||||
# Equivalent barectf field type in YAML.
|
||||
@property
|
||||
def barectf_yaml(self):
|
||||
ret = super().barectf_yaml
|
||||
ret['class'] = 'uenum'
|
||||
return ret
|
||||
|
||||
|
||||
# Signed enumeration field type.
|
||||
class _SEnumFt(_EnumFt, _UIntFt):
|
||||
# Equivalent barectf field type in YAML.
|
||||
@property
|
||||
def barectf_yaml(self):
|
||||
ret = super().barectf_yaml
|
||||
ret['class'] = 'senum'
|
||||
return ret
|
||||
|
||||
|
||||
# Optional string field type.
|
||||
class _OptStrFt:
|
||||
# Equivalent barectf field type in YAML.
|
||||
@property
|
||||
def barectf_yaml(self):
|
||||
return {
|
||||
'class': 'str',
|
||||
}
|
||||
|
||||
|
||||
# String field type.
|
||||
class _StrFt(_OptStrFt):
|
||||
pass
|
||||
|
||||
|
||||
# Floating-point number field type.
|
||||
class _FloatFt(_NumericFt):
|
||||
def __init__(self, size):
|
||||
self._size = size
|
||||
|
||||
# Size (bits): 32 or 64.
|
||||
@property
|
||||
def size(self):
|
||||
return self._size
|
||||
|
||||
# Equivalent barectf field type in YAML.
|
||||
@property
|
||||
def barectf_yaml(self):
|
||||
return {
|
||||
'class': 'real',
|
||||
'size': self._size,
|
||||
}
|
||||
|
||||
# Equivalent C type.
|
||||
@property
|
||||
def c_type(self):
|
||||
if self._size == 32:
|
||||
return 'float'
|
||||
else:
|
||||
assert self._size == 64
|
||||
return 'double'
|
||||
|
||||
|
||||
# Event record type.
|
||||
class _Ert:
|
||||
def __init__(self, api_func_name, members):
|
||||
self._api_func_name = api_func_name
|
||||
self._members = members
|
||||
|
||||
# API function name
|
||||
@property
|
||||
def api_func_name(self):
|
||||
return self._api_func_name
|
||||
|
||||
# Parameters of function (list of `_ErtMember`).
|
||||
@property
|
||||
def members(self):
|
||||
return self._members
|
||||
|
||||
|
||||
# Beginning event record type.
|
||||
class _BeginErt(_Ert):
|
||||
# Name of event record type depending on the API prefix.
|
||||
def name(self, api_prefix):
|
||||
suffix = '_begin' if api_prefix == 'hsa' else 'Begin'
|
||||
return f'{self._api_func_name}{suffix}'
|
||||
|
||||
|
||||
# End event record type.
|
||||
class _EndErt(_Ert):
|
||||
# Name of event record type depending on the API prefix.
|
||||
def name(self, api_prefix):
|
||||
suffix = '_end' if api_prefix == 'hsa' else 'End'
|
||||
return f'{self._api_func_name}{suffix}'
|
||||
|
||||
|
||||
# Event record type member.
|
||||
class _ErtMember:
|
||||
def __init__(self, access, member_names, ft):
|
||||
self._access = access
|
||||
self._member_names = member_names.copy()
|
||||
self._ft = ft
|
||||
|
||||
# C++ access expression.
|
||||
@property
|
||||
def access(self):
|
||||
return self._access
|
||||
|
||||
# List of member names.
|
||||
@property
|
||||
def member_names(self):
|
||||
return self._member_names
|
||||
|
||||
# Equivalent field type.
|
||||
@property
|
||||
def ft(self):
|
||||
return self._ft
|
||||
|
||||
|
||||
# Makes sure some condition is satisfied, or prints the error message
|
||||
# `error_msg` and quits with exit status 1 otherwise.
|
||||
#
|
||||
# This is an unconditional assertion.
|
||||
def _make_sure(cond, error_msg):
|
||||
if not cond:
|
||||
print(f'Error: {error_msg}', file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def _enumerator_effective_val(enum_val):
|
||||
# Try the value, but this value may be a string (an
|
||||
# enumerator/definition).
|
||||
val = enum_val.get('value')
|
||||
|
||||
if type(val) is int:
|
||||
return val
|
||||
|
||||
# Try the raw value.
|
||||
val = enum_val.get('raw_value')
|
||||
|
||||
if val is not None:
|
||||
if type(val) is int:
|
||||
# Raw value is already an integer.
|
||||
return val
|
||||
else:
|
||||
# Try to parse the raw value string as an integer.
|
||||
try:
|
||||
return int(val, 0)
|
||||
except:
|
||||
pass
|
||||
|
||||
_make_sure(False,
|
||||
f'Cannot get the integral value of enumerator `{enum_val["name"]}`')
|
||||
|
||||
|
||||
# Returns the equivalent field type of the C type `c_type`.
|
||||
def _number_ft_from_c_type(cpp_header, c_type):
|
||||
# Check for known enumeration.
|
||||
m = re.match(r'(?:enum\s+)?(\w+)', c_type)
|
||||
|
||||
if m:
|
||||
size = 32
|
||||
|
||||
for enum_info in cpp_header.enums:
|
||||
if m.group(1) == enum_info.get('name'):
|
||||
# Fill enumeration field type mappings.
|
||||
mappings = {
|
||||
str(v['name']): _enumerator_effective_val(v)
|
||||
for v in enum_info['values']
|
||||
}
|
||||
|
||||
if len(mappings) == 0:
|
||||
return _SIntFt(64)
|
||||
|
||||
if max(mappings.values()) >= 2**31 or min(mappings.values()) < -2**31:
|
||||
size = 64
|
||||
|
||||
_make_sure(len(mappings) > 0, f'Enumeration `{enum_info["name"]}` is empty')
|
||||
|
||||
# Create corresponding enumeration field type.
|
||||
return _SEnumFt(size, mappings)
|
||||
|
||||
# Find corresponding basic field type.
|
||||
is_unsigned = 'unsigned' in c_type
|
||||
|
||||
if 'long' in c_type:
|
||||
if is_unsigned:
|
||||
return _UIntFt(64)
|
||||
else:
|
||||
return _SIntFt(64)
|
||||
elif 'short' in c_type:
|
||||
if is_unsigned:
|
||||
return _UIntFt(16)
|
||||
else:
|
||||
return _SIntFt(16)
|
||||
elif 'char' in c_type:
|
||||
if is_unsigned:
|
||||
return _UIntFt(8)
|
||||
else:
|
||||
return _SIntFt(8)
|
||||
elif 'float' in c_type:
|
||||
return _FloatFt(32)
|
||||
elif 'double' in c_type:
|
||||
return _FloatFt(64)
|
||||
else:
|
||||
# Assume `int` (often an unresolved C enumeration).
|
||||
if is_unsigned:
|
||||
return _UIntFt(32)
|
||||
else:
|
||||
return _SIntFt(32)
|
||||
|
||||
|
||||
# Returns whether or not a property has a pointer type.
|
||||
def _prop_is_pointer(prop, c_type):
|
||||
if prop['pointer'] or prop['function_pointer']:
|
||||
return True
|
||||
|
||||
if prop['array'] and 'array_size' in prop:
|
||||
return True
|
||||
|
||||
if prop['unresolved']:
|
||||
# HSA API function pointers.
|
||||
if prop['name'] in ('callback', 'handler'):
|
||||
return True
|
||||
|
||||
# HIP API function pointers.
|
||||
if c_type.endswith('Fn_t'):
|
||||
return True
|
||||
|
||||
# Check the C type itself.
|
||||
if '*' in c_type or '*' in prop.get('raw_type', ''):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
# Returns a list of event record type member objects for the structure
|
||||
# `struct` considering the initial C++ access expression `access` and
|
||||
# member names `member_names`.
|
||||
def _get_ert_members_for_struct(cpp_header, struct, access, member_names):
|
||||
members = []
|
||||
member_names = member_names.copy()
|
||||
member_names.append(None)
|
||||
props = struct['properties']['public']
|
||||
|
||||
for index, prop in enumerate(props):
|
||||
# Property name.
|
||||
name = prop['name']
|
||||
|
||||
# Member names, access, and C type.
|
||||
member_names[-1] = str(name)
|
||||
this_access = f'{access}.{name}'
|
||||
c_type = prop['type']
|
||||
aliases = prop['aliases']
|
||||
|
||||
# Skip no type.
|
||||
if c_type == '':
|
||||
continue
|
||||
|
||||
# Skip unnamed or union.
|
||||
if name == '' or 'union' in name or re.match(r'\bunion\b', c_type):
|
||||
continue
|
||||
|
||||
# Check for known C type alias.
|
||||
while True:
|
||||
c_type_alias = cpp_header.typedefs.get(c_type)
|
||||
|
||||
if c_type_alias is None:
|
||||
break
|
||||
|
||||
c_type = c_type_alias
|
||||
|
||||
# Check for C string.
|
||||
if re.match(r'^((const\s+char)|(char\s+const)|char)\s*\*$',
|
||||
c_type.strip()):
|
||||
members.append(_ErtMember(this_access, member_names, _OptStrFt()))
|
||||
continue
|
||||
|
||||
# Check for pointer.
|
||||
if _prop_is_pointer(prop, c_type):
|
||||
# Pointer: use numeric value.
|
||||
members.append(_ErtMember(this_access, member_names, _PointerFt()))
|
||||
continue
|
||||
|
||||
# Check for substructure.
|
||||
sub_struct = cpp_header.classes.get(c_type)
|
||||
|
||||
if sub_struct is None and len(aliases) == 1:
|
||||
sub_struct = cpp_header.classes.get(aliases[0])
|
||||
|
||||
if sub_struct is not None:
|
||||
members += _get_ert_members_for_struct(cpp_header, sub_struct,
|
||||
this_access, member_names)
|
||||
continue
|
||||
|
||||
# Use a basic field type.
|
||||
members.append(_ErtMember(this_access, member_names,
|
||||
_number_ft_from_c_type(cpp_header, c_type)))
|
||||
|
||||
return members
|
||||
|
||||
|
||||
# Returns the beginning and end event record type objects for the
|
||||
# callback data structure `struct`.
|
||||
def _erts_from_cb_data_struct(api_prefix, cpp_header, retval_info, struct):
|
||||
# The location of the `args` union within the nested structures of
|
||||
# `struct`.
|
||||
args_nested_cls_index = 0
|
||||
|
||||
# Create return value members (to be used later).
|
||||
if retval_info is not None:
|
||||
args_nested_cls_index = 1
|
||||
retval_members = {}
|
||||
nested_classes = struct['nested_classes']
|
||||
_make_sure(len(nested_classes) >= 1,
|
||||
f"Return value union doesn't exist in `{struct['name']}`")
|
||||
retval_union = nested_classes[0]
|
||||
|
||||
for prop in retval_union['properties']['public']:
|
||||
name = str(prop['name'])
|
||||
member = _ErtMember(f'GetApiData().{name}', ['retval'],
|
||||
_number_ft_from_c_type(cpp_header, prop['type']))
|
||||
retval_members[prop['name']] = member
|
||||
|
||||
# Make sure we have everything we need.
|
||||
for api_func_name, retval_name in retval_info.items():
|
||||
if retval_name is not None:
|
||||
_make_sure(retval_name in retval_members,
|
||||
f"Return value union member `{retval_name}` doesn't exist (function {api_func_name}())")
|
||||
|
||||
# Create beginning/end event record type objects.
|
||||
begin_erts = []
|
||||
end_erts = []
|
||||
nested_classes = struct['nested_classes'][args_nested_cls_index]['nested_classes']
|
||||
props = struct['nested_classes'][args_nested_cls_index]['properties']['public']
|
||||
_make_sure(len(nested_classes) == len(props),
|
||||
f'Mismatch between nested structure and member count in `{struct["name"]}`')
|
||||
|
||||
for index, prop in enumerate(props):
|
||||
# API function name is the name of the member.
|
||||
api_func_name = str(prop['name'])
|
||||
|
||||
# Get the parameters.
|
||||
members = _get_ert_members_for_struct(cpp_header,
|
||||
nested_classes[index],
|
||||
f'GetApiData().args.{api_func_name}',
|
||||
[])
|
||||
|
||||
# Append new beginning event record type object.
|
||||
begin_erts.append(_BeginErt(api_func_name, members))
|
||||
|
||||
# Append new end event record type object if possible.
|
||||
ret_members = []
|
||||
|
||||
if retval_info is not None:
|
||||
retval_type = retval_info.get(api_func_name)
|
||||
|
||||
if retval_type is not None:
|
||||
ret_members.append(retval_members[retval_type])
|
||||
|
||||
end_erts.append(_EndErt(api_func_name, ret_members))
|
||||
|
||||
return begin_erts, end_erts
|
||||
|
||||
|
||||
# Creates and returns the return value information dictionary.
|
||||
#
|
||||
# This dictionary maps API function names to the member to get within
|
||||
# the callback data structure.
|
||||
#
|
||||
# This only applies to the HSA API: for other APIs, this function
|
||||
# returns `None`.
|
||||
def _get_retval_info(path):
|
||||
if 'hsa' not in os.path.basename(path):
|
||||
return
|
||||
|
||||
retval_info = {}
|
||||
cur_api_func_name = None
|
||||
|
||||
with open(path) as f:
|
||||
for line in f:
|
||||
if 'out << ")' in line and cur_api_func_name is not None:
|
||||
m = re.search(r'api_data.(\w+_retval)', line)
|
||||
retval_info[cur_api_func_name] = m.group(1) if m else None
|
||||
else:
|
||||
m = re.search(r'out << "(hsa_\w+)\(";', line)
|
||||
|
||||
if m:
|
||||
cur_api_func_name = m.group(1)
|
||||
|
||||
return retval_info
|
||||
|
||||
|
||||
# Returns a partial barectf data stream type in YAML with the event
|
||||
# record types `erts`.
|
||||
def _yaml_dst_from_erts(api_prefix, erts):
|
||||
# Base.
|
||||
yaml_erts = {}
|
||||
yaml_dst = {
|
||||
'event-record-types': yaml_erts,
|
||||
}
|
||||
|
||||
# Create one event record type per API function.
|
||||
for ert in erts:
|
||||
# Base.
|
||||
yaml_members = []
|
||||
yaml_ert = {
|
||||
'payload-field-type': {
|
||||
'class': 'struct',
|
||||
'members': yaml_members,
|
||||
},
|
||||
}
|
||||
|
||||
# Create one structure field type member per member.
|
||||
for member in ert.members:
|
||||
# barectf doesn't support nested CTF structures, so join
|
||||
# individual member names with `__` to flatten.
|
||||
yaml_members.append({
|
||||
'_' + '__'.join(member.member_names): {
|
||||
'field-type': member.ft.barectf_yaml,
|
||||
},
|
||||
})
|
||||
|
||||
# Add event record type.
|
||||
yaml_erts[ert.name(api_prefix)] = yaml_ert
|
||||
|
||||
# Convert to YAML.
|
||||
return yaml.dump(yaml_dst)
|
||||
|
||||
|
||||
# Returns the C++ switch statement which calls the correct barectf
|
||||
# tracing function depending on the API function operation ID.
|
||||
def _cpp_switch_statement_from_erts(api_prefix, erts):
|
||||
lines = []
|
||||
lines.append('switch (GetOp()) {')
|
||||
|
||||
for ert in erts:
|
||||
lines.append(f' case {api_prefix.upper()}_API_ID_{ert.api_func_name}:')
|
||||
lines.append(f' barectf_{api_prefix}_api_trace_{ert.name(api_prefix)}(')
|
||||
lines.append(f' &barectf_ctx,')
|
||||
lines.append(f' GetThreadId(),')
|
||||
lines.append(f' GetQueueId(),')
|
||||
lines.append(f' GetAgentId(),')
|
||||
lines.append(f' GetCorrelationId(),')
|
||||
|
||||
if api_prefix == 'hip':
|
||||
lines.append(f' GetKernelName().c_str(),')
|
||||
|
||||
if len(ert.members) == 0:
|
||||
# Remove last comma.
|
||||
lines[-1] = lines[-1].replace(',', '')
|
||||
|
||||
for index, member in enumerate(ert.members):
|
||||
if type(member.ft) is _OptStrFt:
|
||||
# Only dereference C string if not null, otherwise use
|
||||
# an empty string.
|
||||
lines.append(f' {member.access} ? {member.access} : ""')
|
||||
elif type(member.ft) is _StrFt:
|
||||
lines.append(f' {member.access}')
|
||||
else:
|
||||
lines.append(f' {member.ft.cast(member.access)}')
|
||||
|
||||
if index + 1 < len(ert.members):
|
||||
lines[-1] += ','
|
||||
|
||||
lines.append(' );')
|
||||
lines.append(' break;')
|
||||
|
||||
lines.append('}')
|
||||
return lines
|
||||
|
||||
|
||||
# Processes the complete API header file `path`.
|
||||
def _process_file(api_prefix, path):
|
||||
# Create `CppHeader` object.
|
||||
try:
|
||||
cpp_header = CppHeaderParser.CppHeader(path)
|
||||
except CppHeaderParser.CppParseError as exc:
|
||||
print(exc, file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
# Get return value information dictionary.
|
||||
retval_info = _get_retval_info(path)
|
||||
|
||||
# Find callback data structure.
|
||||
for struct_name, struct in cpp_header.classes.items():
|
||||
if re.match(r'^' + api_prefix + r'_api_data\w+$', struct_name):
|
||||
# Process callback data structure.
|
||||
begin_erts, end_erts = _erts_from_cb_data_struct(api_prefix,
|
||||
cpp_header,
|
||||
retval_info,
|
||||
struct)
|
||||
|
||||
# Write barectf YAML file.
|
||||
with open(f'{api_prefix}_erts.yaml', 'w') as f:
|
||||
f.write(_yaml_dst_from_erts(api_prefix, begin_erts + end_erts))
|
||||
|
||||
# Write C++ code (beginning event record).
|
||||
with open(f'{api_prefix}_begin.cpp.i', 'w') as f:
|
||||
f.write('\n'.join(_cpp_switch_statement_from_erts(api_prefix,
|
||||
begin_erts)))
|
||||
|
||||
# Write C++ code (end event record).
|
||||
with open(f'{api_prefix}_end.cpp.i', 'w') as f:
|
||||
f.write('\n'.join(_cpp_switch_statement_from_erts(api_prefix,
|
||||
end_erts)))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# Disable `CppHeaderParser` printing to standard output.
|
||||
CppHeaderParser.CppHeaderParser.print_warnings = 0
|
||||
CppHeaderParser.CppHeaderParser.print_errors = 0
|
||||
CppHeaderParser.CppHeaderParser.debug = 0
|
||||
CppHeaderParser.CppHeaderParser.debug_trace = 0
|
||||
|
||||
# Process the complete API header file.
|
||||
_process_file(sys.argv[1], sys.argv[2])
|
||||
@@ -1,33 +0,0 @@
|
||||
################################################################################
|
||||
# Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to
|
||||
# deal in the Software without restriction, including without limitation the
|
||||
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
# sell copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
################################################################################
|
||||
|
||||
import sys
|
||||
import yaml
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
with open('env.yaml', 'w') as f:
|
||||
f.write(yaml.dump({
|
||||
'environment': {
|
||||
'rocprofiler_version': sys.argv[1],
|
||||
}
|
||||
}))
|
||||
@@ -1,869 +0,0 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdlib>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
#include <iostream>
|
||||
#include <utility>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <limits>
|
||||
#include <fstream>
|
||||
#include <experimental/filesystem>
|
||||
#include <time.h>
|
||||
|
||||
#include <hsa/hsa.h>
|
||||
#include <hsa/hsa_ext_amd.h>
|
||||
#include "hsa_prof_str.h"
|
||||
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <hip/amd_detail/hip_prof_str.h>
|
||||
|
||||
#include "rocprofiler.h"
|
||||
#include "rocprofiler_plugin.h"
|
||||
#include "../utils.h"
|
||||
|
||||
#include "barectf.h"
|
||||
#include "barectf_event_record.h"
|
||||
#include "barectf_tracer.h"
|
||||
#include "plugin.h"
|
||||
|
||||
namespace fs = std::experimental::filesystem;
|
||||
|
||||
namespace rocm_ctf {
|
||||
namespace {
|
||||
|
||||
// Abstract tracer event record using the barectf context type `CtxT`.
|
||||
template <typename CtxT> class TracerEventRecord : public BarectfEventRecord<CtxT> {
|
||||
protected:
|
||||
explicit TracerEventRecord(const rocprofiler_record_tracer_t& record, const std::uint64_t clock_val)
|
||||
: BarectfEventRecord<CtxT>{clock_val},
|
||||
op_{record.operation_id.id},
|
||||
thread_id_{record.thread_id.value},
|
||||
queue_id_{record.queue_id.handle},
|
||||
agent_id_{record.agent_id.handle},
|
||||
correlation_id_{record.correlation_id.value} {}
|
||||
|
||||
std::uint32_t GetOp() const noexcept { return op_; }
|
||||
std::uint32_t GetThreadId() const noexcept { return thread_id_; }
|
||||
std::uint64_t GetQueueId() const noexcept { return queue_id_; }
|
||||
std::uint64_t GetAgentId() const noexcept { return agent_id_; }
|
||||
std::uint64_t GetCorrelationId() const noexcept { return correlation_id_; }
|
||||
|
||||
private:
|
||||
std::uint32_t op_;
|
||||
std::uint32_t thread_id_;
|
||||
std::uint64_t queue_id_;
|
||||
std::uint64_t agent_id_;
|
||||
std::uint64_t correlation_id_;
|
||||
};
|
||||
|
||||
// Returns the beginning clock value of the tracer or profiler record
|
||||
// `record`.
|
||||
template <typename RecordT> std::uint64_t GetRecordBeginClockVal(const RecordT& record) {
|
||||
return record.timestamps.begin.value;
|
||||
}
|
||||
|
||||
// Returns the end clock value of the tracer or profiler record
|
||||
// `record`.
|
||||
template <typename RecordT> std::uint64_t GetRecordEndClockVal(const RecordT& record) {
|
||||
return record.timestamps.end.value;
|
||||
}
|
||||
|
||||
// Queries allocated string data using the size query function
|
||||
// `query_size_func` and the data query function `query_data_func`,
|
||||
// returning the corresponding string and freeing temporary allocated
|
||||
// memory.
|
||||
//
|
||||
// Returns an empty string if anything goes wrong.
|
||||
template <typename QuerySizeFuncT, typename QueryDataFuncT>
|
||||
std::string QueryAllocStr(QuerySizeFuncT&& query_size_func, QueryDataFuncT&& query_data_func) {
|
||||
// Query size first.
|
||||
std::size_t size = 0;
|
||||
[[maybe_unused]] auto ret = query_size_func(&size);
|
||||
|
||||
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query size");
|
||||
|
||||
if (size == 0) {
|
||||
// No size: return empty string.
|
||||
return {};
|
||||
}
|
||||
|
||||
// Query data (allocated by query_data_func()).
|
||||
char* alloc_str = nullptr;
|
||||
|
||||
ret = query_data_func(&alloc_str);
|
||||
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query data");
|
||||
|
||||
if (!alloc_str) {
|
||||
// No data: return empty string.
|
||||
return {};
|
||||
}
|
||||
|
||||
// Allocate return value.
|
||||
std::string str_ret{alloc_str};
|
||||
|
||||
// Free allocated data.
|
||||
std::free(alloc_str);
|
||||
|
||||
// Return string object.
|
||||
return str_ret;
|
||||
}
|
||||
|
||||
// rocTX event record.
|
||||
class RocTxEventRecord final : public TracerEventRecord<barectf_roctx_ctx> {
|
||||
public:
|
||||
explicit RocTxEventRecord(const rocprofiler_record_tracer_t& record,
|
||||
const rocprofiler_session_id_t session_id)
|
||||
: TracerEventRecord<barectf_roctx_ctx>{record, GetRecordBeginClockVal(record)},
|
||||
id_{QueryId(record, session_id)},
|
||||
msg_{QueryMsg(record, session_id)} {}
|
||||
|
||||
void Write(barectf_roctx_ctx& barectf_ctx) const override {
|
||||
barectf_roctx_trace_roctx(&barectf_ctx, GetThreadId(), id_, msg_.c_str());
|
||||
}
|
||||
|
||||
private:
|
||||
// Queries and returns the rocTX message of the record `record` and
|
||||
// session ID `session_id`.
|
||||
//
|
||||
// Returns an empty string if not available.
|
||||
static std::string QueryMsg(const rocprofiler_record_tracer_t& record,
|
||||
const rocprofiler_session_id_t session_id) {
|
||||
// Query size first.
|
||||
std::size_t msg_size = 0;
|
||||
[[maybe_unused]] auto ret = rocprofiler_query_roctx_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_ROCTX_MESSAGE, record.api_data_handle, record.operation_id,
|
||||
&msg_size);
|
||||
|
||||
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query rocTX message size");
|
||||
|
||||
if (msg_size == 0) {
|
||||
// No size: return empty string.
|
||||
return {};
|
||||
}
|
||||
|
||||
// Query data (borrowed from the record: no need to free).
|
||||
char* msg = nullptr;
|
||||
|
||||
ret = rocprofiler_query_roctx_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_ROCTX_MESSAGE, record.api_data_handle, record.operation_id, &msg);
|
||||
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query rocTX message");
|
||||
|
||||
if (!msg) {
|
||||
// No data: return empty string.
|
||||
return {};
|
||||
}
|
||||
|
||||
return rocmtools::cxx_demangle(msg);
|
||||
}
|
||||
|
||||
// Queries and returns the rocTX ID of the record `record` and the
|
||||
// session ID `session_id`.
|
||||
//
|
||||
// Returns 0 if anything goes wrong.
|
||||
static std::uint64_t QueryId(const rocprofiler_record_tracer_t& record,
|
||||
const rocprofiler_session_id_t session_id) {
|
||||
try {
|
||||
return std::stoull(QueryAllocStr(
|
||||
[&record, session_id](const auto size) {
|
||||
return rocprofiler_query_roctx_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_ROCTX_ID, record.api_data_handle, record.operation_id, size);
|
||||
},
|
||||
[&record, session_id](const auto str) {
|
||||
return rocprofiler_query_roctx_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_ROCTX_ID, record.api_data_handle, record.operation_id, str);
|
||||
}));
|
||||
} catch (...) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
std::uint64_t id_;
|
||||
std::string msg_;
|
||||
};
|
||||
|
||||
// Abstract HSA API event record.
|
||||
class HsaApiEventRecord : public TracerEventRecord<barectf_hsa_api_ctx> {
|
||||
protected:
|
||||
explicit HsaApiEventRecord(const rocprofiler_record_tracer_t& record,
|
||||
const rocprofiler_session_id_t session_id, const std::uint64_t clock_val)
|
||||
: TracerEventRecord<barectf_hsa_api_ctx>{record, clock_val},
|
||||
api_data_{QueryApiData(record, session_id)} {}
|
||||
|
||||
const hsa_api_data_t& GetApiData() const noexcept { return api_data_; }
|
||||
|
||||
private:
|
||||
// Queries and returns the API data of the record `record` and session
|
||||
// ID `session_id`.
|
||||
static const hsa_api_data_t& QueryApiData(const rocprofiler_record_tracer_t& record,
|
||||
const rocprofiler_session_id_t session_id) {
|
||||
// Query size first (only for assertions).
|
||||
[[maybe_unused]] std::size_t size = 0;
|
||||
[[maybe_unused]] auto ret = rocprofiler_query_hsa_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_HSA_API_DATA, record.api_data_handle, record.operation_id, &size);
|
||||
|
||||
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query HSA API data size");
|
||||
assert(size > 0);
|
||||
|
||||
// Query data (borrowed from the record).
|
||||
char* data = nullptr;
|
||||
ret = rocprofiler_query_hsa_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_HSA_API_DATA, record.api_data_handle, record.operation_id, &data);
|
||||
|
||||
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query HSA API data");
|
||||
assert(data);
|
||||
|
||||
// Reinterpret as an HSA API data pointer.
|
||||
return *reinterpret_cast<const hsa_api_data_t*>(data);
|
||||
}
|
||||
|
||||
hsa_api_data_t api_data_;
|
||||
};
|
||||
|
||||
// HSA API event record (beginning).
|
||||
class HsaApiEventRecordBegin final : public HsaApiEventRecord {
|
||||
public:
|
||||
explicit HsaApiEventRecordBegin(const rocprofiler_record_tracer_t& record,
|
||||
const rocprofiler_session_id_t session_id)
|
||||
: HsaApiEventRecord{record, session_id, GetRecordBeginClockVal(record)} {}
|
||||
|
||||
void Write(barectf_hsa_api_ctx& barectf_ctx) const override {
|
||||
// Include generated switch statement.
|
||||
#include "hsa_begin.cpp.i"
|
||||
}
|
||||
};
|
||||
|
||||
// HSA API event record (end).
|
||||
class HsaApiEventRecordEnd final : public HsaApiEventRecord {
|
||||
public:
|
||||
explicit HsaApiEventRecordEnd(const rocprofiler_record_tracer_t& record,
|
||||
const rocprofiler_session_id_t session_id)
|
||||
: HsaApiEventRecord{record, session_id, GetRecordEndClockVal(record)} {}
|
||||
|
||||
void Write(barectf_hsa_api_ctx& barectf_ctx) const override {
|
||||
// Include generated switch statement.
|
||||
#include "hsa_end.cpp.i"
|
||||
}
|
||||
};
|
||||
|
||||
// Abstract HIP API event record.
|
||||
class HipApiEventRecord : public TracerEventRecord<barectf_hip_api_ctx> {
|
||||
protected:
|
||||
explicit HipApiEventRecord(const rocprofiler_record_tracer_t& record,
|
||||
const rocprofiler_session_id_t session_id, const std::uint64_t clock_val)
|
||||
: TracerEventRecord<barectf_hip_api_ctx>{record, clock_val},
|
||||
api_data_{QueryApiData(record, session_id)},
|
||||
kernel_name_{QueryKernelName(record, session_id)} {}
|
||||
|
||||
const hip_api_data_t& GetApiData() const noexcept { return api_data_; }
|
||||
const std::string& GetKernelName() const noexcept { return kernel_name_; }
|
||||
|
||||
private:
|
||||
// Queries and returns the API data of the record `record` and session
|
||||
// ID `session_id`.
|
||||
static const hip_api_data_t& QueryApiData(const rocprofiler_record_tracer_t& record,
|
||||
const rocprofiler_session_id_t session_id) {
|
||||
// Query size first (only for assertions).
|
||||
[[maybe_unused]] std::size_t size = 0;
|
||||
[[maybe_unused]] auto ret = rocprofiler_query_hip_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_HIP_API_DATA, record.api_data_handle, record.operation_id, &size);
|
||||
|
||||
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query HIP API data size");
|
||||
assert(size > 0);
|
||||
|
||||
// Query data (borrowed from the record).
|
||||
char* data = nullptr;
|
||||
|
||||
ret = rocprofiler_query_hip_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_HIP_API_DATA, record.api_data_handle, record.operation_id, &data);
|
||||
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query HIP API data");
|
||||
assert(data);
|
||||
|
||||
// Reinterpret as an HIP API data pointer.
|
||||
return *reinterpret_cast<const hip_api_data_t*>(data);
|
||||
}
|
||||
|
||||
// Queries and returns the kernel name of the record `record` and
|
||||
// session ID `session_id`.
|
||||
//
|
||||
// Returns an empty string if not available.
|
||||
static std::string QueryKernelName(const rocprofiler_record_tracer_t& record,
|
||||
const rocprofiler_session_id_t session_id) {
|
||||
const auto kernel_name = QueryAllocStr(
|
||||
[&record, session_id](const auto size) {
|
||||
return rocprofiler_query_hip_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_HIP_KERNEL_NAME, record.api_data_handle, record.operation_id,
|
||||
size);
|
||||
},
|
||||
[&record, session_id](const auto str) {
|
||||
return rocprofiler_query_hip_tracer_api_data_info(session_id, ROCPROFILER_HIP_KERNEL_NAME,
|
||||
record.api_data_handle,
|
||||
record.operation_id, str);
|
||||
});
|
||||
|
||||
if (kernel_name.size() > 1) {
|
||||
// Return demangled version.
|
||||
return rocmtools::cxx_demangle(kernel_name);
|
||||
}
|
||||
|
||||
return kernel_name;
|
||||
}
|
||||
|
||||
hip_api_data_t api_data_;
|
||||
std::string kernel_name_;
|
||||
};
|
||||
|
||||
// HIP API event record (beginning).
|
||||
class HipApiEventRecordBegin final : public HipApiEventRecord {
|
||||
public:
|
||||
explicit HipApiEventRecordBegin(const rocprofiler_record_tracer_t& record,
|
||||
const rocprofiler_session_id_t session_id)
|
||||
: HipApiEventRecord{record, session_id, GetRecordBeginClockVal(record)} {}
|
||||
|
||||
void Write(barectf_hip_api_ctx& barectf_ctx) const override {
|
||||
// Include generated switch statement.
|
||||
#include "hip_begin.cpp.i"
|
||||
}
|
||||
};
|
||||
|
||||
// HIP API event record (end).
|
||||
class HipApiEventRecordEnd final : public HipApiEventRecord {
|
||||
public:
|
||||
explicit HipApiEventRecordEnd(const rocprofiler_record_tracer_t& record,
|
||||
const rocprofiler_session_id_t session_id)
|
||||
: HipApiEventRecord{record, session_id, GetRecordEndClockVal(record)} {}
|
||||
|
||||
void Write(barectf_hip_api_ctx& barectf_ctx) const override {
|
||||
// Include generated switch statement.
|
||||
#include "hip_end.cpp.i"
|
||||
}
|
||||
};
|
||||
|
||||
// HSA API handle type event record.
|
||||
class HsaHandleTypeEventRecord final : public BarectfEventRecord<barectf_hsa_handles_ctx> {
|
||||
public:
|
||||
enum class Type {
|
||||
CPU = 0,
|
||||
GPU = 1,
|
||||
};
|
||||
|
||||
explicit HsaHandleTypeEventRecord(const std::uint64_t handle, const Type type)
|
||||
: BarectfEventRecord<barectf_hsa_handles_ctx>{0}, handle_{handle}, type_{type} {}
|
||||
|
||||
void Write(barectf_hsa_handles_ctx& barectf_ctx) const override {
|
||||
barectf_hsa_handles_trace_hsa_handle_type(&barectf_ctx, handle_,
|
||||
static_cast<std::uint8_t>(type_));
|
||||
}
|
||||
|
||||
private:
|
||||
std::uint64_t handle_;
|
||||
Type type_;
|
||||
};
|
||||
|
||||
// Abstract API operation event record.
|
||||
class ApiOpEventRecord : public TracerEventRecord<barectf_api_ops_ctx> {
|
||||
protected:
|
||||
explicit ApiOpEventRecord(const rocprofiler_record_tracer_t& record, const std::uint64_t clock_val)
|
||||
: TracerEventRecord<barectf_api_ops_ctx>{record, clock_val} {}
|
||||
};
|
||||
|
||||
// HSA API operation event record (beginning).
|
||||
class HsaOpEventRecordBegin final : public ApiOpEventRecord {
|
||||
public:
|
||||
explicit HsaOpEventRecordBegin(const rocprofiler_record_tracer_t& record)
|
||||
: ApiOpEventRecord{record, GetRecordBeginClockVal(record)} {}
|
||||
|
||||
void Write(barectf_api_ops_ctx& barectf_ctx) const override {
|
||||
barectf_api_ops_trace_hsa_op_begin(&barectf_ctx, GetThreadId(), GetQueueId(), GetAgentId(),
|
||||
GetCorrelationId());
|
||||
}
|
||||
};
|
||||
|
||||
// HSA API operation event record (end).
|
||||
class HsaOpEventRecordEnd final : public ApiOpEventRecord {
|
||||
public:
|
||||
explicit HsaOpEventRecordEnd(const rocprofiler_record_tracer_t& record)
|
||||
: ApiOpEventRecord{record, GetRecordEndClockVal(record)} {}
|
||||
|
||||
void Write(barectf_api_ops_ctx& barectf_ctx) const override {
|
||||
barectf_api_ops_trace_hsa_op_end(&barectf_ctx, GetThreadId(), GetQueueId(), GetAgentId(),
|
||||
GetCorrelationId());
|
||||
}
|
||||
};
|
||||
|
||||
// HIP API operation event record (beginning).
|
||||
class HipOpEventRecordBegin final : public ApiOpEventRecord {
|
||||
public:
|
||||
explicit HipOpEventRecordBegin(const rocprofiler_record_tracer_t& record)
|
||||
: ApiOpEventRecord{record, GetRecordBeginClockVal(record)},
|
||||
kernel_name_{QueryKernelName(record)} {}
|
||||
|
||||
void Write(barectf_api_ops_ctx& barectf_ctx) const override {
|
||||
barectf_api_ops_trace_hip_op_begin(&barectf_ctx, GetThreadId(), GetQueueId(), GetAgentId(),
|
||||
GetCorrelationId(), kernel_name_.c_str());
|
||||
}
|
||||
|
||||
private:
|
||||
// Queries and returns the kernel name of the record `record`.
|
||||
//
|
||||
// Returns an empty string if not available.
|
||||
static std::string QueryKernelName(const rocprofiler_record_tracer_t& record) {
|
||||
if (record.operation_id.id == 0) {
|
||||
if (const auto api_handle = record.api_data_handle.handle) {
|
||||
const auto str = reinterpret_cast<const char*>(api_handle);
|
||||
|
||||
if (std::strlen(str) > 1) {
|
||||
// Return demangled version.
|
||||
return rocmtools::cxx_demangle(str);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
std::string kernel_name_;
|
||||
};
|
||||
|
||||
// HIP API operation event record (end).
|
||||
class HipOpEventRecordEnd final : public ApiOpEventRecord {
|
||||
public:
|
||||
explicit HipOpEventRecordEnd(const rocprofiler_record_tracer_t& record)
|
||||
: ApiOpEventRecord{record, GetRecordEndClockVal(record)} {}
|
||||
|
||||
void Write(barectf_api_ops_ctx& barectf_ctx) const override {
|
||||
barectf_api_ops_trace_hip_op_end(&barectf_ctx, GetThreadId(), GetQueueId(), GetAgentId(),
|
||||
GetCorrelationId());
|
||||
}
|
||||
};
|
||||
|
||||
// Profiler record base.
|
||||
class ProfilerEventRecord : public BarectfEventRecord<barectf_profiler_ctx> {
|
||||
public:
|
||||
explicit ProfilerEventRecord(const rocprofiler_record_profiler_t& record,
|
||||
const rocprofiler_session_id_t session_id)
|
||||
: BarectfEventRecord<barectf_profiler_ctx>{GetRecordBeginClockVal(record)},
|
||||
dispatch_{record.header.id.handle},
|
||||
gpu_id_{record.gpu_id.handle},
|
||||
queue_id_{record.queue_id.handle},
|
||||
queue_index_{record.queue_idx.value},
|
||||
process_id_{GetPid()},
|
||||
thread_id_{record.thread_id.value},
|
||||
kernel_id_{record.kernel_id.handle},
|
||||
kernel_name_{QueryKernelName(record)},
|
||||
counter_infos_{QueryCounterInfos(record, session_id)} {}
|
||||
|
||||
void Write(barectf_profiler_ctx& barectf_ctx) const override {
|
||||
barectf_profiler_trace_profiler_record(
|
||||
&barectf_ctx, dispatch_, gpu_id_, queue_id_, queue_index_, process_id_, thread_id_,
|
||||
kernel_id_, kernel_name_.c_str(), counter_infos_.names.size(), counter_infos_.names.data(),
|
||||
counter_infos_.values.size(), counter_infos_.values.data());
|
||||
}
|
||||
|
||||
protected:
|
||||
// Counter infos.
|
||||
//
|
||||
// `names[i]` names the counter value `values[i]`.
|
||||
struct CounterInfos final {
|
||||
// `names_storage` owns the strings while the elements of `names`
|
||||
// point to the internal C strings of `names_storage`.
|
||||
//
|
||||
// This is needed because barectf expects an array of contiguous
|
||||
// C string pointers.
|
||||
std::vector<std::string> names_storage;
|
||||
std::vector<const char*> names;
|
||||
|
||||
// Counter values.
|
||||
std::vector<std::uint64_t> values;
|
||||
};
|
||||
|
||||
std::uint64_t GetDispatch() const noexcept { return dispatch_; }
|
||||
std::uint64_t GetGpuId() const noexcept { return gpu_id_; }
|
||||
std::uint64_t GetQueueId() const noexcept { return queue_id_; }
|
||||
std::uint64_t GetQueueIndex() const noexcept { return queue_index_; }
|
||||
std::uint32_t GetProcessId() const noexcept { return process_id_; }
|
||||
std::uint32_t GetThreadId() const noexcept { return thread_id_; }
|
||||
std::uint64_t GetKernelId() const noexcept { return kernel_id_; }
|
||||
const std::string& GetKernelName() const noexcept { return kernel_name_; }
|
||||
const CounterInfos& GetCounterInfos() const noexcept { return counter_infos_; }
|
||||
|
||||
private:
|
||||
// Queries and returns the kernel name of the record `record`.
|
||||
//
|
||||
// Returns an empty string if not available.
|
||||
static std::string QueryKernelName(const rocprofiler_record_profiler_t& record) {
|
||||
const auto kernel_name = QueryAllocStr(
|
||||
[&record](const auto size) {
|
||||
return rocprofiler_query_kernel_info_size(ROCPROFILER_KERNEL_NAME, record.kernel_id, size);
|
||||
},
|
||||
[&record](const auto str) {
|
||||
return rocprofiler_query_kernel_info(ROCPROFILER_KERNEL_NAME, record.kernel_id,
|
||||
const_cast<const char**>(str));
|
||||
});
|
||||
|
||||
if (kernel_name.size() <= 1) {
|
||||
return {};
|
||||
}
|
||||
|
||||
// Return truncated and demangled version.
|
||||
return rocmtools::truncate_name(rocmtools::cxx_demangle(kernel_name));
|
||||
}
|
||||
|
||||
// Queries and returns the counter infos of the record `record` and
|
||||
// session ID `session_id`.
|
||||
static CounterInfos QueryCounterInfos(const rocprofiler_record_profiler_t& record,
|
||||
const rocprofiler_session_id_t session_id) {
|
||||
if (!record.counters) {
|
||||
// No counters.
|
||||
return {};
|
||||
}
|
||||
|
||||
CounterInfos infos;
|
||||
|
||||
for (std::size_t i = 0; i < record.counters_count.value; ++i) {
|
||||
auto& counter = record.counters[i];
|
||||
|
||||
if (counter.counter_handler.handle == 0) {
|
||||
// Not available: continue.
|
||||
continue;
|
||||
}
|
||||
|
||||
// Query counter name size first
|
||||
std::size_t counter_name_size = 0;
|
||||
[[maybe_unused]] auto ret = rocprofiler_query_counter_info_size(
|
||||
session_id, ROCPROFILER_COUNTER_NAME, counter.counter_handler, &counter_name_size);
|
||||
|
||||
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query counter name size");
|
||||
|
||||
if (counter_name_size == 0) {
|
||||
// No size: continue.
|
||||
continue;
|
||||
}
|
||||
|
||||
// Query counter name (borrowed from `record`: no need to free).
|
||||
const char* counter_name = nullptr;
|
||||
|
||||
ret = rocprofiler_query_counter_info(session_id, ROCPROFILER_COUNTER_NAME,
|
||||
counter.counter_handler, &counter_name);
|
||||
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Query counter name");
|
||||
|
||||
if (!counter_name) {
|
||||
// Not available: continue.
|
||||
continue;
|
||||
}
|
||||
|
||||
// Push back infos.
|
||||
infos.names_storage.emplace_back(counter_name);
|
||||
infos.names.push_back(infos.names_storage.back().c_str());
|
||||
infos.values.push_back(counter.value.value);
|
||||
}
|
||||
|
||||
return infos;
|
||||
}
|
||||
|
||||
std::uint64_t dispatch_;
|
||||
std::uint64_t gpu_id_;
|
||||
std::uint64_t queue_id_;
|
||||
std::uint64_t queue_index_;
|
||||
std::uint32_t process_id_;
|
||||
std::uint32_t thread_id_;
|
||||
std::uint64_t kernel_id_;
|
||||
std::string kernel_name_;
|
||||
CounterInfos counter_infos_;
|
||||
};
|
||||
|
||||
// Profiler record base.
|
||||
class ProfilerWithKernelPropsEventRecord final : public ProfilerEventRecord {
|
||||
private:
|
||||
// According to `plugin/file/file.cpp`:
|
||||
//
|
||||
// > Taken from rocprofiler: The size hasn't changed in recent past
|
||||
static constexpr std::uint32_t lds_block_size_ = 128 * 4;
|
||||
|
||||
public:
|
||||
explicit ProfilerWithKernelPropsEventRecord(const rocprofiler_record_profiler_t& record,
|
||||
const rocprofiler_session_id_t session_id)
|
||||
: ProfilerEventRecord{record, session_id},
|
||||
grid_size_{record.kernel_properties.grid_size},
|
||||
workgroup_size_{record.kernel_properties.workgroup_size},
|
||||
lds_size_{
|
||||
((record.kernel_properties.lds_size + (lds_block_size_ - 1)) & ~(lds_block_size_ - 1))},
|
||||
scratch_size_{record.kernel_properties.scratch_size},
|
||||
arch_vgpr_count_{record.kernel_properties.arch_vgpr_count},
|
||||
accum_vgpr_count_{record.kernel_properties.accum_vgpr_count},
|
||||
sgpr_count_{record.kernel_properties.sgpr_count},
|
||||
wave_size_{record.kernel_properties.wave_size},
|
||||
signal_handle_{record.kernel_properties.signal_handle} {}
|
||||
|
||||
void Write(barectf_profiler_ctx& barectf_ctx) const override {
|
||||
barectf_profiler_trace_profiler_record_with_kernel_properties(
|
||||
&barectf_ctx, GetDispatch(), GetGpuId(), GetQueueId(), GetQueueIndex(), GetProcessId(),
|
||||
GetThreadId(), GetKernelId(), GetKernelName().c_str(), GetCounterInfos().names.size(),
|
||||
GetCounterInfos().names.data(), GetCounterInfos().values.size(),
|
||||
GetCounterInfos().values.data(), grid_size_, workgroup_size_, lds_size_, scratch_size_,
|
||||
arch_vgpr_count_, accum_vgpr_count_, sgpr_count_, wave_size_, signal_handle_);
|
||||
}
|
||||
|
||||
private:
|
||||
std::uint64_t grid_size_;
|
||||
std::uint64_t workgroup_size_;
|
||||
std::uint64_t lds_size_;
|
||||
std::uint64_t scratch_size_;
|
||||
std::uint64_t arch_vgpr_count_;
|
||||
std::uint64_t accum_vgpr_count_;
|
||||
std::uint64_t sgpr_count_;
|
||||
std::uint64_t wave_size_;
|
||||
std::uint64_t signal_handle_;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
Plugin::Plugin(const std::size_t packet_size, const fs::path& trace_dir,
|
||||
const fs::path& metadata_stream_path)
|
||||
: roctx_tracer_{packet_size, trace_dir, "roctx_"},
|
||||
hsa_api_tracer_{packet_size, trace_dir, "hsa_api_"},
|
||||
hip_api_tracer_{packet_size, trace_dir, "hip_api_"},
|
||||
api_ops_tracer_{packet_size, trace_dir, "api_ops_"},
|
||||
hsa_handles_tracer_{packet_size, trace_dir, "hsa_handles_"},
|
||||
profiler_tracer_{packet_size, trace_dir, "profiler_"} {
|
||||
// Make sure the trace directory doesn't exist.
|
||||
if (fs::exists(trace_dir)) {
|
||||
std::ostringstream ss;
|
||||
|
||||
ss << "CTF trace directory `" << trace_dir.string() << "` already exists";
|
||||
throw std::runtime_error{ss.str()};
|
||||
}
|
||||
|
||||
// Make sure the metadata stream file exists.
|
||||
if (!fs::exists(metadata_stream_path)) {
|
||||
std::ostringstream ss;
|
||||
|
||||
ss << "CTF metadata stream file `" << metadata_stream_path.string() << "` doesn't exist";
|
||||
throw std::runtime_error{ss.str()};
|
||||
}
|
||||
|
||||
// Create trace directory.
|
||||
if (!fs::create_directory(trace_dir)) {
|
||||
std::ostringstream ss;
|
||||
|
||||
ss << "Cannot create the CTF trace directory `" << trace_dir.string() << "`";
|
||||
throw std::runtime_error{ss.str()};
|
||||
}
|
||||
|
||||
// Copy adjusted metadata stream file to trace directory.
|
||||
try {
|
||||
CopyAdjustedMetadataStreamFile(metadata_stream_path, trace_dir);
|
||||
} catch (const std::exception& exc) {
|
||||
std::ostringstream ss;
|
||||
|
||||
ss << "Cannot adjust and copy metadata stream file `" << metadata_stream_path.string()
|
||||
<< "` to the CTF trace directory `" << trace_dir.string() << "`: " << exc.what();
|
||||
throw std::runtime_error{ss.str()};
|
||||
}
|
||||
|
||||
// Write HSA handle type event records.
|
||||
WriteHsaHandleTypes();
|
||||
}
|
||||
|
||||
void Plugin::HandleTracerRecord(const rocprofiler_record_tracer_t& record,
|
||||
const rocprofiler_session_id_t session_id) {
|
||||
std::lock_guard<std::mutex> lock{lock_};
|
||||
|
||||
// Depending on the domain, create and add an event record to the
|
||||
// corresponding tracer.
|
||||
switch (record.domain) {
|
||||
case ACTIVITY_DOMAIN_ROCTX:
|
||||
roctx_tracer_.AddEventRecord(std::make_shared<const RocTxEventRecord>(record, session_id));
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HSA_API: {
|
||||
hsa_api_tracer_.AddEventRecord(
|
||||
std::make_shared<const HsaApiEventRecordBegin>(record, session_id));
|
||||
hsa_api_tracer_.AddEventRecord(
|
||||
std::make_shared<const HsaApiEventRecordEnd>(record, session_id));
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HIP_API: {
|
||||
hip_api_tracer_.AddEventRecord(
|
||||
std::make_shared<const HipApiEventRecordBegin>(record, session_id));
|
||||
hip_api_tracer_.AddEventRecord(
|
||||
std::make_shared<const HipApiEventRecordEnd>(record, session_id));
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HSA_OPS:
|
||||
api_ops_tracer_.AddEventRecord(std::make_shared<const HsaOpEventRecordBegin>(record));
|
||||
api_ops_tracer_.AddEventRecord(std::make_shared<const HsaOpEventRecordEnd>(record));
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HIP_OPS:
|
||||
api_ops_tracer_.AddEventRecord(std::make_shared<const HipOpEventRecordBegin>(record));
|
||||
api_ops_tracer_.AddEventRecord(std::make_shared<const HipOpEventRecordEnd>(record));
|
||||
break;
|
||||
default:
|
||||
// Warn
|
||||
std::cerr << "rocm_ctf::Plugin::HandleTracerRecord(): "
|
||||
<< "ignoring record for unknown domain #" << record.domain << std::endl;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void Plugin::HandleProfilerRecord(const rocprofiler_record_profiler_t& record,
|
||||
const rocprofiler_session_id_t session_id) {
|
||||
std::lock_guard<std::mutex> lock{lock_};
|
||||
profiler_tracer_.AddEventRecord(
|
||||
std::make_shared<const ProfilerWithKernelPropsEventRecord>(record, session_id));
|
||||
}
|
||||
|
||||
void Plugin::HandleBufferRecords(const rocprofiler_record_header_t* begin,
|
||||
const rocprofiler_record_header_t* const end,
|
||||
const rocprofiler_session_id_t session_id,
|
||||
const rocprofiler_buffer_id_t buffer_id) {
|
||||
while (begin && begin < end) {
|
||||
if (begin->kind == ROCPROFILER_TRACER_RECORD) {
|
||||
HandleTracerRecord(*reinterpret_cast<const rocprofiler_record_tracer_t*>(begin), session_id);
|
||||
} else {
|
||||
assert(begin->kind == ROCPROFILER_PROFILER_RECORD);
|
||||
HandleProfilerRecord(*reinterpret_cast<const rocprofiler_record_profiler_t*>(begin),
|
||||
session_id);
|
||||
}
|
||||
|
||||
rocprofiler_next_record(begin, &begin, session_id, buffer_id);
|
||||
}
|
||||
}
|
||||
|
||||
void Plugin::WriteHsaHandleTypes() {
|
||||
[[maybe_unused]] const auto status = hsa_iterate_agents(
|
||||
[](const auto agent, const auto user_data) {
|
||||
auto& tracer = *static_cast<HsaHandlesTracer*>(user_data);
|
||||
hsa_device_type_t type;
|
||||
|
||||
if (hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &type) != HSA_STATUS_SUCCESS) {
|
||||
return HSA_STATUS_ERROR;
|
||||
}
|
||||
|
||||
using Type = HsaHandleTypeEventRecord::Type;
|
||||
|
||||
auto event_record = std::make_shared<HsaHandleTypeEventRecord>(
|
||||
agent.handle, type == HSA_DEVICE_TYPE_CPU ? Type::CPU : Type::GPU);
|
||||
|
||||
tracer.AddEventRecord(std::move(event_record));
|
||||
return HSA_STATUS_SUCCESS;
|
||||
},
|
||||
&hsa_handles_tracer_);
|
||||
|
||||
assert(status == HSA_STATUS_SUCCESS && "Iterate HSA agents");
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr std::uint64_t ns_per_s = 1'000'000'000ULL;
|
||||
|
||||
// Samples the ROCMTools clock and returns the value.
|
||||
std::uint64_t GetClkVal() {
|
||||
rocprofiler_timestamp_t ts;
|
||||
[[maybe_unused]] const auto ret = rocprofiler_get_timestamp(&ts);
|
||||
|
||||
assert(ret == ROCPROFILER_STATUS_SUCCESS && "Get timestamp");
|
||||
return ts.value;
|
||||
}
|
||||
|
||||
// Updates `offset` and `delta`, if needed, to a more accurate clock
|
||||
// class offset and a smaller ROCMTools clock value delta.
|
||||
//
|
||||
// This function samples the ROCMTools clock twice, also sampling the
|
||||
// real-time clock in between, and uses the average ROCMTools clock
|
||||
// value to approximate the actual clock class offset.
|
||||
//
|
||||
// This strategy is based on the measure_single_clock_offset() function
|
||||
// of the LTTng-tools project <https://lttng.org/>.
|
||||
void UpdateClkClsOffsetAndDelta(std::uint64_t& offset, std::uint64_t& delta) {
|
||||
// Sample ROCMTools clock (first time).
|
||||
const auto rocm_clk_val1 = GetClkVal();
|
||||
|
||||
// Sample real-time clock.
|
||||
timespec realtime_spec = {0, 0};
|
||||
[[maybe_unused]] const auto ret = clock_gettime(CLOCK_REALTIME, &realtime_spec);
|
||||
|
||||
assert(ret == 0);
|
||||
|
||||
// Sample ROCMTools clock (second time).
|
||||
const auto rocm_clk_val2 = GetClkVal();
|
||||
|
||||
// Compute the current ROCMTools clock value delta.
|
||||
const auto this_delta = rocm_clk_val2 - rocm_clk_val1;
|
||||
|
||||
if (this_delta > delta) {
|
||||
// Discard larger delta.
|
||||
return;
|
||||
}
|
||||
|
||||
// Compute the average ROCMTools clock value.
|
||||
const auto rocm_clk_val_avg = (rocm_clk_val1 + rocm_clk_val2) >> 1;
|
||||
|
||||
// Compute the real-time clock value in nanoseconds.
|
||||
const auto realtime_ns =
|
||||
(static_cast<std::uint64_t>(realtime_spec.tv_sec) * ns_per_s) + realtime_spec.tv_nsec;
|
||||
|
||||
// Update clock class offset and delta.
|
||||
assert(rocm_clk_val_avg < realtime_ns);
|
||||
offset = realtime_ns - rocm_clk_val_avg;
|
||||
delta = this_delta;
|
||||
}
|
||||
|
||||
// Computes and returns the most possible accurate clock class offset.
|
||||
std::uint64_t GetMetadataClkClsOffset() {
|
||||
std::uint64_t offset = 0;
|
||||
std::uint64_t delta = std::numeric_limits<std::uint64_t>::max();
|
||||
|
||||
// Best effort to find the most accurate offset.
|
||||
for (auto i = 0U; i < 50U; ++i) {
|
||||
UpdateClkClsOffsetAndDelta(offset, delta);
|
||||
}
|
||||
|
||||
return offset;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void Plugin::CopyAdjustedMetadataStreamFile(const fs::path& metadata_stream_path,
|
||||
const fs::path& trace_dir) {
|
||||
// Load installed metadata stream file contents.
|
||||
std::string metadata;
|
||||
std::getline(std::ifstream{metadata_stream_path}, metadata, '\0');
|
||||
|
||||
// Replace the original `offset` property.
|
||||
{
|
||||
static constexpr auto offset_term = "offset = 0;";
|
||||
std::ostringstream ss;
|
||||
|
||||
ss << "offset = " << GetMetadataClkClsOffset() << ';';
|
||||
metadata.replace(metadata.find(offset_term), std::strlen(offset_term), ss.str());
|
||||
}
|
||||
|
||||
// Write adjusted metadata stream to trace directory.
|
||||
{
|
||||
std::ofstream output{trace_dir / "metadata"};
|
||||
|
||||
output.write(metadata.data(), metadata.size());
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace rocm_ctf
|
||||
@@ -1,146 +0,0 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef PLUGIN_CTF_PLUGIN_H
|
||||
#define PLUGIN_CTF_PLUGIN_H
|
||||
|
||||
#include <mutex>
|
||||
#include <cstdlib>
|
||||
#include <experimental/filesystem>
|
||||
|
||||
#include "rocprofiler.h"
|
||||
#include "rocprofiler_plugin.h"
|
||||
|
||||
#include "barectf.h"
|
||||
#include "barectf_tracer.h"
|
||||
|
||||
namespace rocm_ctf {
|
||||
|
||||
// CTF plugin.
|
||||
//
|
||||
// Build a plugin instance, and then call HandleTracerRecord(),
|
||||
// HandleProfilerRecord(), and HandleBufferRecords() to add event
|
||||
// records.
|
||||
//
|
||||
// A plugin instance performs important tasks at destruction time.
|
||||
class Plugin final {
|
||||
public:
|
||||
// Builds a plugin instance to write a CTF trace in the `trace_dir`
|
||||
// directory with packets of size `packet_size` bytes.
|
||||
//
|
||||
// `trace_dir` must not exist.
|
||||
//
|
||||
// This constructor immediately adjusts and copies the metadata stream
|
||||
// file `metadata_stream_path` to the trace directory (`trace_dir`).
|
||||
explicit Plugin(std::size_t packet_size, const std::experimental::filesystem::path& trace_dir,
|
||||
const std::experimental::filesystem::path& metadata_stream_path);
|
||||
|
||||
// Handles a tracer record.
|
||||
void HandleTracerRecord(const rocprofiler_record_tracer_t& record,
|
||||
rocprofiler_session_id_t session_id);
|
||||
|
||||
// Handles a profiler record.
|
||||
void HandleProfilerRecord(const rocprofiler_record_profiler_t& record,
|
||||
rocprofiler_session_id_t session_id);
|
||||
|
||||
// Handles tracer or profiler records from `begin` to `end`
|
||||
// (excluded).
|
||||
void HandleBufferRecords(const rocprofiler_record_header_t* begin,
|
||||
const rocprofiler_record_header_t* end, rocprofiler_session_id_t session_id,
|
||||
rocprofiler_buffer_id_t buffer_id);
|
||||
|
||||
private:
|
||||
// rocTX barectf platform descriptor.
|
||||
struct RocTxPlatformDescr final {
|
||||
using Ctx = barectf_roctx_ctx;
|
||||
|
||||
static void OpenPacket(Ctx& ctx) { barectf_roctx_open_packet(&ctx); }
|
||||
static void ClosePacket(Ctx& ctx) { barectf_roctx_close_packet(&ctx); }
|
||||
};
|
||||
|
||||
// HSA API barectf platform descriptor.
|
||||
struct HsaApiPlatformDescr final {
|
||||
using Ctx = barectf_hsa_api_ctx;
|
||||
|
||||
static void OpenPacket(Ctx& ctx) { barectf_hsa_api_open_packet(&ctx); }
|
||||
static void ClosePacket(Ctx& ctx) { barectf_hsa_api_close_packet(&ctx); }
|
||||
};
|
||||
|
||||
// HIP API barectf platform descriptor.
|
||||
struct HipApiPlatformDescr final {
|
||||
using Ctx = barectf_hip_api_ctx;
|
||||
|
||||
static void OpenPacket(Ctx& ctx) { barectf_hip_api_open_packet(&ctx); }
|
||||
static void ClosePacket(Ctx& ctx) { barectf_hip_api_close_packet(&ctx); }
|
||||
};
|
||||
|
||||
// HSA handles barectf platform descriptor.
|
||||
struct HsaHandlesPlatformDescr final {
|
||||
using Ctx = barectf_hsa_handles_ctx;
|
||||
|
||||
static void OpenPacket(Ctx& ctx) { barectf_hsa_handles_open_packet(&ctx); }
|
||||
static void ClosePacket(Ctx& ctx) { barectf_hsa_handles_close_packet(&ctx); }
|
||||
};
|
||||
|
||||
// API operations barectf platform descriptor.
|
||||
struct ApiOpsPlatformDescr final {
|
||||
using Ctx = barectf_api_ops_ctx;
|
||||
|
||||
static void OpenPacket(Ctx& ctx) { barectf_api_ops_open_packet(&ctx); }
|
||||
static void ClosePacket(Ctx& ctx) { barectf_api_ops_close_packet(&ctx); }
|
||||
};
|
||||
|
||||
// Profiler barectf platform descriptor.
|
||||
struct ProfilerPlatformDescr final {
|
||||
using Ctx = barectf_profiler_ctx;
|
||||
|
||||
static void OpenPacket(Ctx& ctx) { barectf_profiler_open_packet(&ctx); }
|
||||
static void ClosePacket(Ctx& ctx) { barectf_profiler_close_packet(&ctx); }
|
||||
};
|
||||
|
||||
// barectf tracer for HSA handle mappings.
|
||||
using HsaHandlesTracer = BarectfTracer<HsaHandlesPlatformDescr>;
|
||||
|
||||
// Writes the HSA handle type mappings to a dedicated data stream
|
||||
// file.
|
||||
void WriteHsaHandleTypes();
|
||||
|
||||
// Loads the existing metadata stream file `metadata_stream_path`,
|
||||
// adjusts the `offset` property of its single clock class, and writes
|
||||
// the result to the `metadata` file within the `trace_dir` directory.
|
||||
void CopyAdjustedMetadataStreamFile(
|
||||
const std::experimental::filesystem::path& metadata_stream_path,
|
||||
const std::experimental::filesystem::path& trace_dir);
|
||||
|
||||
// Dedicated tracers.
|
||||
BarectfTracer<RocTxPlatformDescr> roctx_tracer_;
|
||||
BarectfTracer<HsaApiPlatformDescr> hsa_api_tracer_;
|
||||
BarectfTracer<HipApiPlatformDescr> hip_api_tracer_;
|
||||
BarectfTracer<ApiOpsPlatformDescr> api_ops_tracer_;
|
||||
HsaHandlesTracer hsa_handles_tracer_;
|
||||
BarectfTracer<ProfilerPlatformDescr> profiler_tracer_;
|
||||
|
||||
// Locks any operation performed on the data of this.
|
||||
std::mutex lock_;
|
||||
};
|
||||
|
||||
} // namespace rocm_ctf
|
||||
|
||||
#endif // PLUGIN_CTF_PLUGIN_H
|
||||
@@ -1,7 +0,0 @@
|
||||
{
|
||||
global: rocprofiler_plugin_initialize;
|
||||
rocprofiler_plugin_finalize;
|
||||
rocprofiler_plugin_write_buffer_records;
|
||||
rocprofiler_plugin_write_record;
|
||||
local: *;
|
||||
};
|
||||
@@ -1,44 +0,0 @@
|
||||
# ###############################################################################
|
||||
# # Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
# #
|
||||
# # Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# # of this software and associated documentation files (the "Software"), to
|
||||
# # deal in the Software without restriction, including without limitation the
|
||||
# # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
# # sell copies of the Software, and to permit persons to whom the Software is
|
||||
# # furnished to do so, subject to the following conditions:
|
||||
# #
|
||||
# # The above copyright notice and this permission notice shall be included in
|
||||
# # all copies or substantial portions of the Software.
|
||||
# #
|
||||
# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# # IN THE SOFTWARE.
|
||||
# ###############################################################################
|
||||
|
||||
file(GLOB ROCPROFILER_UTIL_SRC_FILES ${PROJECT_SOURCE_DIR}/src/utils/helper.cpp)
|
||||
|
||||
file(GLOB FILE_SOURCES "*.cpp")
|
||||
add_library(file_plugin SHARED ${FILE_SOURCES} ${ROCPROFILER_UTIL_SRC_FILES})
|
||||
|
||||
set_target_properties(file_plugin PROPERTIES
|
||||
CXX_VISIBILITY_PRESET hidden
|
||||
LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/../exportmap
|
||||
LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR})
|
||||
|
||||
target_compile_definitions(file_plugin
|
||||
PRIVATE HIP_PROF_HIP_API_STRING=1 __HIP_PLATFORM_HCC__=1)
|
||||
|
||||
target_include_directories(file_plugin PRIVATE ${PROJECT_SOURCE_DIR}/inc ${PROJECT_SOURCE_DIR})
|
||||
|
||||
target_link_options(file_plugin PRIVATE -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../exportmap -Wl,--no-undefined)
|
||||
|
||||
target_link_libraries(file_plugin PRIVATE ${ROCPROFILER_TARGET} hsa-runtime64::hsa-runtime64 systemd stdc++fs amd_comgr dl)
|
||||
|
||||
install(TARGETS file_plugin LIBRARY
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/${PROJECT_NAME}
|
||||
COMPONENT runtime)
|
||||
@@ -1,472 +0,0 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include <cxxabi.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <cassert>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <experimental/filesystem>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <ostream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <hsa/hsa.h>
|
||||
#include <mutex>
|
||||
|
||||
#include "rocprofiler.h"
|
||||
#include "rocprofiler_plugin.h"
|
||||
#include "../utils.h"
|
||||
|
||||
namespace fs = std::experimental::filesystem;
|
||||
|
||||
namespace {
|
||||
|
||||
static std::string output_file_name;
|
||||
class file_plugin_t {
|
||||
private:
|
||||
enum class output_type_t {
|
||||
COUNTER,
|
||||
TRACER,
|
||||
PC_SAMPLING
|
||||
};
|
||||
|
||||
class output_file_t {
|
||||
public:
|
||||
output_file_t(std::string name) : name_(std::move(name)) {}
|
||||
|
||||
std::string name() const { return name_; }
|
||||
|
||||
template <typename T> std::ostream& operator<<(T&& value) {
|
||||
if (!is_open()) open();
|
||||
return stream_ << std::forward<T>(value);
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& (*func)(std::ostream&)) {
|
||||
if (!is_open()) open();
|
||||
return stream_ << func;
|
||||
}
|
||||
|
||||
void open() {
|
||||
// If the stream is already in the failed state, there's no need to try
|
||||
// to open the file.
|
||||
if (fail()) return;
|
||||
|
||||
const char* output_dir = getenv("OUTPUT_PATH");
|
||||
output_file_name = getenv("OUT_FILE_NAME") ? std::string(getenv("OUT_FILE_NAME")) + "_" : "";
|
||||
|
||||
if (output_dir == nullptr) {
|
||||
stream_.copyfmt(std::cout);
|
||||
stream_.clear(std::cout.rdstate());
|
||||
stream_.basic_ios<char>::rdbuf(std::cout.rdbuf());
|
||||
return;
|
||||
}
|
||||
|
||||
fs::path output_prefix(output_dir);
|
||||
if (!fs::is_directory(fs::status(output_prefix))) {
|
||||
if (!stream_.fail()) rocmtools::warning("Cannot open output directory '%s'", output_dir);
|
||||
stream_.setstate(std::ios_base::failbit);
|
||||
return;
|
||||
}
|
||||
|
||||
std::stringstream ss;
|
||||
ss << output_file_name << GetPid() << "_" << name_;
|
||||
stream_.open(output_prefix / ss.str());
|
||||
}
|
||||
|
||||
bool is_open() const { return stream_.is_open(); }
|
||||
bool fail() const { return stream_.fail(); }
|
||||
|
||||
private:
|
||||
const std::string name_;
|
||||
std::ofstream stream_;
|
||||
};
|
||||
|
||||
output_file_t* get_output_file(output_type_t output_type, uint32_t domain = 0) {
|
||||
switch (output_type) {
|
||||
case output_type_t::COUNTER:
|
||||
return &output_file_;
|
||||
case output_type_t::TRACER:
|
||||
switch (domain) {
|
||||
case ACTIVITY_DOMAIN_ROCTX:
|
||||
return &roctx_file_;
|
||||
case ACTIVITY_DOMAIN_HSA_API:
|
||||
return &hsa_api_file_;
|
||||
case ACTIVITY_DOMAIN_HIP_API:
|
||||
return &hip_api_file_;
|
||||
case ACTIVITY_DOMAIN_HIP_OPS:
|
||||
return &hip_activity_file_;
|
||||
case ACTIVITY_DOMAIN_HSA_OPS:
|
||||
return &hsa_async_copy_file_;
|
||||
default:
|
||||
assert(!"domain/op not supported!");
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case output_type_t::PC_SAMPLING:
|
||||
return &pc_sample_file_;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
public:
|
||||
file_plugin_t() {
|
||||
output_file_t hsa_handles("hsa_handles.txt");
|
||||
|
||||
[[maybe_unused]] hsa_status_t status = hsa_iterate_agents(
|
||||
[](hsa_agent_t agent, void* user_data) {
|
||||
auto* file = static_cast<decltype(hsa_handles)*>(user_data);
|
||||
hsa_device_type_t type;
|
||||
|
||||
if (hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &type) != HSA_STATUS_SUCCESS)
|
||||
return HSA_STATUS_ERROR;
|
||||
|
||||
*file << std::hex << std::showbase << agent.handle << " agent "
|
||||
<< ((type == HSA_DEVICE_TYPE_CPU) ? "cpu" : "gpu") << std::endl;
|
||||
return HSA_STATUS_SUCCESS;
|
||||
},
|
||||
&hsa_handles);
|
||||
assert(status == HSA_STATUS_SUCCESS && "failed to iterate HSA agents");
|
||||
if (hsa_handles.fail()) {
|
||||
rocmtools::warning("Cannot write to '%s'", hsa_handles.name().c_str());
|
||||
return;
|
||||
}
|
||||
|
||||
// App begin timestamp begin_ts_file.txt
|
||||
output_file_t begin_ts("begin_ts_file.txt");
|
||||
|
||||
[[maybe_unused]] rocprofiler_timestamp_t app_begin_timestamp = {};
|
||||
CHECK_ROCMTOOLS(rocprofiler_get_timestamp(&app_begin_timestamp));
|
||||
|
||||
begin_ts << std::dec << app_begin_timestamp.value << std::endl;
|
||||
if (begin_ts.fail()) {
|
||||
rocmtools::warning("Cannot write to '%s'", begin_ts.name().c_str());
|
||||
return;
|
||||
}
|
||||
|
||||
valid_ = true;
|
||||
}
|
||||
|
||||
std::mutex writing_lock;
|
||||
|
||||
const char* GetDomainName(rocprofiler_tracer_activity_domain_t domain) {
|
||||
switch (domain) {
|
||||
case ACTIVITY_DOMAIN_ROCTX:
|
||||
return "ROCTX_DOMAIN";
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HIP_API:
|
||||
return "HIP_API_DOMAIN";
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HIP_OPS:
|
||||
return "HIP_OPS_DOMAIN";
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HSA_API:
|
||||
return "HSA_API_DOMAIN";
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HSA_OPS:
|
||||
return "HSA_OPS_DOMAIN";
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HSA_EVT:
|
||||
return "HSA_EVT_DOMAIN";
|
||||
break;
|
||||
default:
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
void FlushTracerRecord(rocprofiler_record_tracer_t tracer_record, rocprofiler_session_id_t session_id,
|
||||
rocprofiler_buffer_id_t buffer_id = rocprofiler_buffer_id_t{0}) {
|
||||
std::lock_guard<std::mutex> lock(writing_lock);
|
||||
std::string kernel_name;
|
||||
std::string function_name;
|
||||
std::string roctx_message;
|
||||
uint64_t roctx_id;
|
||||
if ((tracer_record.operation_id.id == 0 && tracer_record.domain == ACTIVITY_DOMAIN_HIP_OPS)) {
|
||||
if (tracer_record.api_data_handle.handle &&
|
||||
strlen(reinterpret_cast<const char*>(tracer_record.api_data_handle.handle)) > 1)
|
||||
kernel_name = rocmtools::cxx_demangle(
|
||||
reinterpret_cast<const char*>(tracer_record.api_data_handle.handle));
|
||||
}
|
||||
if (tracer_record.domain == ACTIVITY_DOMAIN_HSA_API) {
|
||||
size_t function_name_size = 0;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hsa_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_HSA_FUNCTION_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &function_name_size));
|
||||
if (function_name_size > 1) {
|
||||
char* function_name_c = (char*)malloc(function_name_size);
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hsa_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_HSA_FUNCTION_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &function_name_c));
|
||||
if (function_name_c) function_name = std::string(function_name_c);
|
||||
}
|
||||
}
|
||||
if (tracer_record.domain == ACTIVITY_DOMAIN_HIP_API) {
|
||||
size_t function_name_size = 0;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_HIP_FUNCTION_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &function_name_size));
|
||||
if (function_name_size > 1) {
|
||||
char* function_name_c = (char*)malloc(function_name_size);
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_HIP_FUNCTION_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &function_name_c));
|
||||
if (function_name_c) function_name = std::string(function_name_c);
|
||||
}
|
||||
size_t kernel_name_size = 0;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_HIP_KERNEL_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &kernel_name_size));
|
||||
if (kernel_name_size > 1) {
|
||||
char* kernel_name_str = (char*)malloc(kernel_name_size * sizeof(char));
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_HIP_KERNEL_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &kernel_name_str));
|
||||
if (kernel_name_str) kernel_name = rocmtools::cxx_demangle(std::string(kernel_name_str));
|
||||
}
|
||||
}
|
||||
if (tracer_record.domain == ACTIVITY_DOMAIN_ROCTX) {
|
||||
size_t roctx_message_size = 0;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_roctx_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_ROCTX_MESSAGE, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &roctx_message_size));
|
||||
if (roctx_message_size > 1) {
|
||||
[[maybe_unused]] char* roctx_message_str =
|
||||
static_cast<char*>(malloc(roctx_message_size * sizeof(char)));
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_roctx_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_ROCTX_MESSAGE, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &roctx_message_str));
|
||||
if (roctx_message_str)
|
||||
roctx_message = rocmtools::cxx_demangle(std::string(strdup(roctx_message_str)));
|
||||
}
|
||||
size_t roctx_id_size = 0;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_roctx_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_ROCTX_ID, tracer_record.api_data_handle, tracer_record.operation_id,
|
||||
&roctx_id_size));
|
||||
if (roctx_id_size > 1) {
|
||||
[[maybe_unused]] char* roctx_id_str =
|
||||
static_cast<char*>(malloc(roctx_id_size * sizeof(char)));
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_roctx_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_ROCTX_ID, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &roctx_id_str));
|
||||
if (roctx_id_str) {
|
||||
roctx_id = std::stoll(std::string(strdup(roctx_id_str)));
|
||||
free(roctx_id_str);
|
||||
}
|
||||
}
|
||||
}
|
||||
output_file_t* output_file = get_output_file(output_type_t::TRACER, tracer_record.domain);
|
||||
*output_file << "Record [" << tracer_record.header.id.handle << "], Domain("
|
||||
<< GetDomainName(tracer_record.domain) << "), Begin("
|
||||
<< tracer_record.timestamps.begin.value << "), End("
|
||||
<< tracer_record.timestamps.end.value << "), Correlation ID( "
|
||||
<< tracer_record.correlation_id.value << ")";
|
||||
if (roctx_id >= 0) *output_file << ", ROCTX ID(" << roctx_id << ")";
|
||||
if (roctx_message.size() > 1) *output_file << ", ROCTX Message(" << roctx_message << ")";
|
||||
if (function_name.size() > 1) *output_file << ", Function(" << function_name << ")";
|
||||
if (kernel_name.size() > 1) *output_file << ", Kernel Name(" << kernel_name.c_str() << ")";
|
||||
*output_file << std::endl;
|
||||
}
|
||||
|
||||
void FlushProfilerRecord(const rocprofiler_record_profiler_t* profiler_record,
|
||||
rocprofiler_session_id_t session_id, rocprofiler_buffer_id_t buffer_id) {
|
||||
std::lock_guard<std::mutex> lock(writing_lock);
|
||||
size_t name_length = 0;
|
||||
output_file_t* output_file{nullptr};
|
||||
output_file = get_output_file(output_type_t::COUNTER);
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_kernel_info_size(ROCPROFILER_KERNEL_NAME,
|
||||
profiler_record->kernel_id, &name_length));
|
||||
// Taken from rocprofiler: The size hasn't changed in recent past
|
||||
static const uint32_t lds_block_size = 128 * 4;
|
||||
const char* kernel_name_c;
|
||||
if (name_length > 1) {
|
||||
kernel_name_c = static_cast<const char*>(malloc(name_length * sizeof(char)));
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_kernel_info(ROCPROFILER_KERNEL_NAME, profiler_record->kernel_id,
|
||||
&kernel_name_c));
|
||||
}
|
||||
*output_file << std::string("dispatch[") << std::to_string(profiler_record->header.id.handle)
|
||||
<< "], " << std::string("gpu_id(")
|
||||
<< std::to_string(profiler_record->gpu_id.handle) << "), "
|
||||
<< std::string("queue_id(") << std::to_string(profiler_record->queue_id.handle)
|
||||
<< "), " << std::string("queue_index(")
|
||||
<< std::to_string(profiler_record->queue_idx.value) << "), " << std::string("pid(")
|
||||
<< std::to_string(GetPid()) << "), " << std::string("tid(")
|
||||
<< std::to_string(profiler_record->thread_id.value) << ")";
|
||||
*output_file << ", " << std::string("grd(")
|
||||
<< std::to_string(profiler_record->kernel_properties.grid_size) << "), "
|
||||
<< std::string("wgr(")
|
||||
<< std::to_string(profiler_record->kernel_properties.workgroup_size) << "), "
|
||||
<< std::string("lds(")
|
||||
<< std::to_string(
|
||||
((profiler_record->kernel_properties.lds_size + (lds_block_size - 1)) &
|
||||
~(lds_block_size - 1)))
|
||||
<< "), " << std::string("scr(")
|
||||
<< std::to_string(profiler_record->kernel_properties.scratch_size) << "), "
|
||||
<< std::string("arch_vgpr(")
|
||||
<< std::to_string(profiler_record->kernel_properties.arch_vgpr_count) << "), "
|
||||
<< std::string("accum_vgpr(")
|
||||
<< std::to_string(profiler_record->kernel_properties.accum_vgpr_count) << "), "
|
||||
<< std::string("sgpr(")
|
||||
<< std::to_string(profiler_record->kernel_properties.sgpr_count) << "), "
|
||||
<< std::string("wave_size(")
|
||||
<< std::to_string(profiler_record->kernel_properties.wave_size) << "), "
|
||||
<< std::string("sig(")
|
||||
<< std::to_string(profiler_record->kernel_properties.signal_handle);
|
||||
std::string kernel_name = "";
|
||||
if (name_length > 1) {
|
||||
kernel_name = rocmtools::truncate_name(rocmtools::cxx_demangle(kernel_name_c));
|
||||
}
|
||||
*output_file << "), " << std::string("obj(")
|
||||
<< std::to_string(profiler_record->kernel_id.handle) << "), "
|
||||
<< std::string("kernel-name(\"") << kernel_name << "\")"
|
||||
<< std::string(", start_time(")
|
||||
<< std::to_string(profiler_record->timestamps.begin.value) << ")"
|
||||
<< std::string(", end_time(")
|
||||
<< std::to_string(profiler_record->timestamps.end.value) << ")";
|
||||
|
||||
// For Counters
|
||||
*output_file << std::endl;
|
||||
if (profiler_record->counters) {
|
||||
for (uint64_t i = 0; i < profiler_record->counters_count.value; i++) {
|
||||
if (profiler_record->counters[i].counter_handler.handle > 0) {
|
||||
size_t counter_name_length = 0;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_counter_info_size(
|
||||
session_id, ROCPROFILER_COUNTER_NAME, profiler_record->counters[i].counter_handler,
|
||||
&counter_name_length));
|
||||
if (counter_name_length > 1) {
|
||||
const char* name_c = static_cast<const char*>(malloc(name_length * sizeof(char)));
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_counter_info(
|
||||
session_id, ROCPROFILER_COUNTER_NAME, profiler_record->counters[i].counter_handler,
|
||||
&name_c));
|
||||
*output_file << ", " << name_c << " ("
|
||||
<< std::to_string(profiler_record->counters[i].value.value) << ")"
|
||||
<< std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void FlushPCSamplingRecord(
|
||||
const rocprofiler_record_pc_sample_t *pc_sampling_record) {
|
||||
output_file_t* output_file{nullptr};
|
||||
output_file = get_output_file(output_type_t::PC_SAMPLING);
|
||||
const auto &sample = pc_sampling_record->pc_sample;
|
||||
*output_file << "dispatch[" << sample.dispatch_id.value << "], "
|
||||
<< "timestamp(" << sample.timestamp.value << "), "
|
||||
<< "gpu_id(" << sample.gpu_id.handle << "), "
|
||||
<< "pc-sample(" << std::hex << std::showbase << sample.pc << "), "
|
||||
<< "se(" << sample.se << ')'
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
int WriteBufferRecords(const rocprofiler_record_header_t* begin,
|
||||
const rocprofiler_record_header_t* end, rocprofiler_session_id_t session_id,
|
||||
rocprofiler_buffer_id_t buffer_id) {
|
||||
while (begin < end) {
|
||||
if (!begin) return 0;
|
||||
switch (begin->kind) {
|
||||
case ROCPROFILER_PROFILER_RECORD: {
|
||||
const rocprofiler_record_profiler_t* profiler_record =
|
||||
reinterpret_cast<const rocprofiler_record_profiler_t*>(begin);
|
||||
FlushProfilerRecord(profiler_record, session_id, buffer_id);
|
||||
break;
|
||||
}
|
||||
case ROCPROFILER_TRACER_RECORD: {
|
||||
rocprofiler_record_tracer_t* tracer_record = const_cast<rocprofiler_record_tracer_t*>(
|
||||
reinterpret_cast<const rocprofiler_record_tracer_t*>(begin));
|
||||
FlushTracerRecord(*tracer_record, session_id, buffer_id);
|
||||
break;
|
||||
}
|
||||
case ROCPROFILER_PC_SAMPLING_RECORD: {
|
||||
const rocprofiler_record_pc_sample_t *pc_sampling_record =
|
||||
reinterpret_cast<const rocprofiler_record_pc_sample_t *>(begin);
|
||||
FlushPCSamplingRecord(pc_sampling_record);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
rocprofiler_next_record(begin, &begin, session_id, buffer_id);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool is_valid() const { return valid_; }
|
||||
|
||||
private:
|
||||
bool valid_{false};
|
||||
|
||||
output_file_t roctx_file_{"roctx_trace.txt"}, hsa_api_file_{"hsa_api_trace.txt"},
|
||||
hip_api_file_{"hip_api_trace.txt"}, hip_activity_file_{"hcc_ops_trace.txt"},
|
||||
hsa_async_copy_file_{"async_copy_trace.txt"}, pc_sample_file_{"pcs_trace.txt"},
|
||||
output_file_{"results.txt"};
|
||||
};
|
||||
|
||||
file_plugin_t* file_plugin = nullptr;
|
||||
|
||||
} // namespace
|
||||
|
||||
ROCPROFILER_EXPORT int rocprofiler_plugin_initialize(uint32_t rocprofiler_major_version,
|
||||
uint32_t rocprofiler_minor_version) {
|
||||
if (rocprofiler_major_version != ROCPROFILER_VERSION_MAJOR ||
|
||||
rocprofiler_minor_version < ROCPROFILER_VERSION_MINOR)
|
||||
return -1;
|
||||
|
||||
if (file_plugin != nullptr) return -1;
|
||||
|
||||
file_plugin = new file_plugin_t();
|
||||
if (file_plugin->is_valid()) return 0;
|
||||
|
||||
// The plugin failed to initialied, destroy it and return an error.
|
||||
delete file_plugin;
|
||||
file_plugin = nullptr;
|
||||
return -1;
|
||||
}
|
||||
|
||||
ROCPROFILER_EXPORT void rocprofiler_plugin_finalize() {
|
||||
if (!file_plugin) return;
|
||||
delete file_plugin;
|
||||
file_plugin = nullptr;
|
||||
}
|
||||
|
||||
ROCPROFILER_EXPORT int rocprofiler_plugin_write_buffer_records(const rocprofiler_record_header_t* begin,
|
||||
const rocprofiler_record_header_t* end,
|
||||
rocprofiler_session_id_t session_id,
|
||||
rocprofiler_buffer_id_t buffer_id) {
|
||||
if (!file_plugin || !file_plugin->is_valid()) return -1;
|
||||
return file_plugin->WriteBufferRecords(begin, end, session_id, buffer_id);
|
||||
}
|
||||
|
||||
ROCPROFILER_EXPORT int rocprofiler_plugin_write_record(rocprofiler_record_tracer_t record,
|
||||
rocprofiler_session_id_t session_id) {
|
||||
if (!file_plugin || !file_plugin->is_valid()) return -1;
|
||||
if (record.header.id.handle == 0) return 0;
|
||||
file_plugin->FlushTracerRecord(record, session_id);
|
||||
return 0;
|
||||
}
|
||||
@@ -1,27 +0,0 @@
|
||||
file(GLOB ROCPROFILER_UTIL_SRC_FILES ${PROJECT_SOURCE_DIR}/src/utils/helper.cpp)
|
||||
|
||||
add_library(perfetto_plugin
|
||||
${LIBRARY_TYPE} ${ROCPROFILER_UTIL_SRC_FILES}
|
||||
perfetto.cpp perfetto_sdk/sdk/perfetto.cc)
|
||||
|
||||
set_target_properties(perfetto_plugin PROPERTIES
|
||||
CXX_VISIBILITY_PRESET hidden
|
||||
LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/../exportmap
|
||||
LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR})
|
||||
|
||||
target_compile_definitions(perfetto_plugin
|
||||
PRIVATE HIP_PROF_HIP_API_STRING=1
|
||||
__HIP_PLATFORM_HCC__=1)
|
||||
|
||||
target_include_directories(perfetto_plugin
|
||||
PRIVATE ${PROJECT_SOURCE_DIR}/inc ${PROJECT_SOURCE_DIR}
|
||||
${PROJECT_SOURCE_DIR}/plugin/perfetto/perfetto_sdk/sdk)
|
||||
|
||||
target_link_options(perfetto_plugin
|
||||
PRIVATE -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../exportmap -Wl,--no-undefined)
|
||||
|
||||
target_link_libraries(perfetto_plugin PRIVATE ${ROCPROFILER_TARGET} Threads::Threads systemd stdc++fs amd_comgr)
|
||||
|
||||
install(TARGETS perfetto_plugin LIBRARY
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/${PROJECT_NAME}
|
||||
COMPONENT runtime)
|
||||
@@ -1,804 +0,0 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "rocprofiler.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <condition_variable>
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <experimental/filesystem>
|
||||
#include <fstream>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <ostream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <functional>
|
||||
#include <iostream>
|
||||
|
||||
#include <cxxabi.h>
|
||||
#include <fcntl.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
#include <systemd/sd-id128.h>
|
||||
|
||||
#include "perfetto_sdk/sdk/perfetto.h"
|
||||
#include "rocprofiler_plugin.h"
|
||||
#include "../utils.h"
|
||||
|
||||
#define STREAM_CONSTANT 98736677
|
||||
#define QUEUE_CONSTANT 18746479
|
||||
|
||||
namespace fs = std::experimental::filesystem;
|
||||
|
||||
PERFETTO_DEFINE_CATEGORIES(
|
||||
perfetto::Category("GENERIC").SetDescription("GENERAL_CATEGORY"),
|
||||
perfetto::Category("ROCTX_API").SetDescription("ACTIVITY_DOMAIN_ROCTX_API"),
|
||||
perfetto::Category("HSA_API").SetDescription("ACTIVITY_DOMAIN_HSA_API"),
|
||||
perfetto::Category("HIP_API").SetDescription("ACTIVITY_DOMAIN_HIP_API"),
|
||||
perfetto::Category("External_API").SetDescription("ACTIVITY_DOMAIN_EXT_API"),
|
||||
perfetto::Category("HIP_OPS").SetDescription("ACTIVITY_DOMAIN_HIP_OPS"),
|
||||
perfetto::Category("HSA_OPS").SetDescription("ACTIVITY_DOMAIN_HSA_OPS"),
|
||||
perfetto::Category("KERNELS").SetDescription("KERNEL_DISPATCHES"),
|
||||
perfetto::Category("COUNTERS").SetDescription("PERFORMANCE_COUNTERS"));
|
||||
|
||||
PERFETTO_TRACK_EVENT_STATIC_STORAGE();
|
||||
|
||||
namespace {
|
||||
|
||||
std::string process_name;
|
||||
static std::string output_file_name;
|
||||
|
||||
std::string get_kernel_name(rocprofiler_record_profiler_t& profiler_record) {
|
||||
std::string kernel_name = "";
|
||||
size_t name_length = 1;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_kernel_info_size(ROCPROFILER_KERNEL_NAME, profiler_record.kernel_id,
|
||||
&name_length));
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
|
||||
#pragma GCC diagnostic ignored "-Wstringop-overread"
|
||||
if (name_length > 1) {
|
||||
const char* kernel_name_c = static_cast<const char*>(malloc(name_length * sizeof(char)));
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_kernel_info(ROCPROFILER_KERNEL_NAME, profiler_record.kernel_id,
|
||||
&kernel_name_c));
|
||||
if (kernel_name_c && strlen(kernel_name_c) > 1)
|
||||
kernel_name = rocmtools::cxx_demangle(strdup(kernel_name_c));
|
||||
}
|
||||
#pragma GCC diagnostic pop
|
||||
return kernel_name;
|
||||
}
|
||||
|
||||
|
||||
class perfetto_plugin_t {
|
||||
public:
|
||||
perfetto_plugin_t() {
|
||||
const char* output_dir = getenv("OUTPUT_PATH");
|
||||
const char* temp_file_name = getenv("OUT_FILE_NAME");
|
||||
output_file_name = temp_file_name ? std::string(temp_file_name) + "_" : "";
|
||||
|
||||
if (output_dir == nullptr) {
|
||||
stream_.copyfmt(std::cout);
|
||||
stream_.clear(std::cout.rdstate());
|
||||
stream_.basic_ios<char>::rdbuf(std::cout.rdbuf());
|
||||
return;
|
||||
}
|
||||
|
||||
output_prefix_ = output_dir;
|
||||
if (!fs::is_directory(fs::status(output_prefix_))) {
|
||||
if (!stream_.fail()) rocmtools::warning("Cannot open output directory '%s'", output_dir);
|
||||
stream_.setstate(std::ios_base::failbit);
|
||||
return;
|
||||
}
|
||||
|
||||
perfetto::TracingInitArgs args;
|
||||
args.backends |= perfetto::kInProcessBackend;
|
||||
|
||||
perfetto::Tracing::Initialize(args);
|
||||
perfetto::TrackEvent::Register();
|
||||
|
||||
perfetto::protos::gen::TrackEventConfig track_event_cfg;
|
||||
track_event_cfg.add_disabled_categories("*");
|
||||
track_event_cfg.add_enabled_categories("GENERIC");
|
||||
track_event_cfg.add_enabled_categories("ROCTX_API");
|
||||
track_event_cfg.add_enabled_categories("HSA_API");
|
||||
track_event_cfg.add_enabled_categories("HIP_API");
|
||||
track_event_cfg.add_enabled_categories("External_API");
|
||||
track_event_cfg.add_enabled_categories("HIP_OPS");
|
||||
track_event_cfg.add_enabled_categories("HSA_OPS");
|
||||
track_event_cfg.add_enabled_categories("KERNELS");
|
||||
track_event_cfg.add_enabled_categories("COUNTERS");
|
||||
|
||||
perfetto::TraceConfig trace_cfg;
|
||||
|
||||
auto buffer_cfg = trace_cfg.add_buffers();
|
||||
uint32_t max_buffer_size = 10 * 1024 * 1024; // Default max buffer size is 10 GB
|
||||
const char* max_buffer_size_str = getenv("rocprofiler_PERFETTO_MAX_BUFFER_SIZE_KIB");
|
||||
if (max_buffer_size_str && std::atol(max_buffer_size_str) > 0)
|
||||
max_buffer_size = std::atol(max_buffer_size_str);
|
||||
// Record up to max buffer size determined by user or the 10 GB (default value)
|
||||
buffer_cfg->set_size_kb(max_buffer_size);
|
||||
|
||||
auto* data_source_cfg = trace_cfg.add_data_sources()->mutable_config();
|
||||
data_source_cfg->set_name("track_event");
|
||||
data_source_cfg->set_track_event_config_raw(track_event_cfg.SerializeAsString());
|
||||
|
||||
output_prefix_.append(output_file_name + std::to_string(GetPid()) + "_output.pftrace");
|
||||
file_descriptor_ = open(output_prefix_.string().c_str(), O_RDWR | O_CREAT | O_TRUNC, 0600);
|
||||
if (file_descriptor_ == -1) rocmtools::warning("Can't open output file\n");
|
||||
|
||||
tracing_session_ = perfetto::Tracing::NewTrace();
|
||||
tracing_session_->Setup(trace_cfg, file_descriptor_);
|
||||
tracing_session_->StartBlocking();
|
||||
|
||||
|
||||
hostname_[1023] = '\0';
|
||||
gethostname(hostname_, 1023);
|
||||
sd_id128_t ret;
|
||||
char machine_id[SD_ID128_STRING_MAX];
|
||||
[[maybe_unused]] int status = sd_id128_get_machine(&ret);
|
||||
assert(status == 0 && "Error: Couldn't get machine id!");
|
||||
if (sd_id128_to_string(ret, machine_id)) machine_id_ = std::hash<std::string>{}(machine_id);
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(thread_tracks_lock_);
|
||||
process_name =
|
||||
perfetto::ProcessTrack::Current().Serialize().mutable_process()->process_name();
|
||||
auto process_track_desc = perfetto::ProcessTrack::Current().Serialize();
|
||||
uint64_t track_id =
|
||||
track_counter_.fetch_add((1 + machine_id_) * GetPid(), std::memory_order_acquire);
|
||||
for (uint64_t tid : track_ids_used_) {
|
||||
while (track_id == tid) {
|
||||
track_id =
|
||||
track_counter_.fetch_add((1 + machine_id_) * GetPid(), std::memory_order_acquire);
|
||||
}
|
||||
}
|
||||
std::string thread_track_str =
|
||||
rocmtools::string_printf("Node: %s Process ID: %lu Thread ID:", hostname_, GetPid());
|
||||
process_track_desc.mutable_process()->set_process_name(thread_track_str);
|
||||
perfetto::TrackEvent::SetTrackDescriptor(perfetto::ProcessTrack::Current(),
|
||||
process_track_desc);
|
||||
perfetto::ProcessTrack::Current().Serialize().set_uuid(track_id);
|
||||
thread_tracks_.emplace(GetPid(), perfetto::ProcessTrack::Current());
|
||||
}
|
||||
|
||||
is_valid_ = true;
|
||||
}
|
||||
|
||||
~perfetto_plugin_t() {
|
||||
if (is_valid_) {
|
||||
tracing_session_->StopBlocking();
|
||||
close(file_descriptor_);
|
||||
}
|
||||
}
|
||||
|
||||
const char* GetDomainName(rocprofiler_tracer_activity_domain_t domain) {
|
||||
switch (domain) {
|
||||
case ACTIVITY_DOMAIN_ROCTX:
|
||||
return "ROCTX_DOMAIN";
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HIP_API:
|
||||
return "HIP_API_DOMAIN";
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HIP_OPS:
|
||||
return "HIP_OPS_DOMAIN";
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HSA_API:
|
||||
return "HSA_API_DOMAIN";
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HSA_OPS:
|
||||
return "HSA_OPS_DOMAIN";
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HSA_EVT:
|
||||
return "HSA_EVT_DOMAIN";
|
||||
break;
|
||||
default:
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
std::mutex writing_lock;
|
||||
|
||||
int FlushProfilerRecord(rocprofiler_record_profiler_t profiler_record,
|
||||
rocprofiler_session_id_t session_id) {
|
||||
std::lock_guard<std::mutex> lock(writing_lock);
|
||||
// ToDO: rename this variable?
|
||||
if (!tracing_session_) rocmtools::warning("Tracing session is deleted!\n");
|
||||
|
||||
int device_id = profiler_record.gpu_id.handle;
|
||||
std::unordered_map<int, perfetto::Track>::iterator device_track_it;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(device_tracks_lock_);
|
||||
device_track_it = device_tracks_.find(device_id);
|
||||
if (device_track_it == device_tracks_.end()) {
|
||||
/* Create a new perfetto::Track (Sub-Track) */
|
||||
device_track_it =
|
||||
device_tracks_
|
||||
.emplace(device_id, perfetto::ProcessTrack::Global(((device_id + 1) * machine_id_)))
|
||||
.first;
|
||||
auto gpu_desc = device_track_it->second.Serialize();
|
||||
gpu_desc.mutable_process()->set_pid(device_id);
|
||||
std::string gpu_str = rocmtools::string_printf("Node: %s Device:", hostname_);
|
||||
gpu_desc.mutable_process()->set_process_name(gpu_str);
|
||||
perfetto::TrackEvent::SetTrackDescriptor(device_track_it->second, gpu_desc);
|
||||
track_ids_used_.emplace_back(device_id + 1 + machine_id_);
|
||||
}
|
||||
}
|
||||
auto& gpu_track = device_track_it->second;
|
||||
std::pair<int, uint64_t> gpu_queue_id =
|
||||
std::make_pair(device_id, profiler_record.queue_id.handle);
|
||||
auto queue_track_it = queue_tracks_.find(gpu_queue_id.first);
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(queue_tracks_lock_);
|
||||
queue_track_it = queue_tracks_.find(gpu_queue_id.first);
|
||||
if (queue_track_it == queue_tracks_.end()) {
|
||||
/* Create a new perfetto::Track */
|
||||
queue_track_it = queue_tracks_
|
||||
.emplace(gpu_queue_id.first,
|
||||
perfetto::Track((profiler_record.queue_id.handle + 1 +
|
||||
profiler_record.gpu_id.handle) *
|
||||
QUEUE_CONSTANT * machine_id_ * GetPid(),
|
||||
gpu_track))
|
||||
.first;
|
||||
|
||||
auto queue_desc = queue_track_it->second.Serialize();
|
||||
std::string queue_str =
|
||||
rocmtools::string_printf("Process ID: %lu Queue %ld", GetPid(), gpu_queue_id.second);
|
||||
queue_desc.set_name(queue_str);
|
||||
perfetto::TrackEvent::SetTrackDescriptor(queue_track_it->second, queue_desc);
|
||||
}
|
||||
track_ids_used_.emplace_back(profiler_record.queue_id.handle + machine_id_ + 1 +
|
||||
profiler_record.gpu_id.handle);
|
||||
}
|
||||
auto& queue_track = queue_track_it->second;
|
||||
|
||||
// Taken from rocprofiler: The size hasn't changed in recent past
|
||||
static const uint32_t lds_block_size = 128 * 4;
|
||||
|
||||
std::string full_kernel_name = get_kernel_name(profiler_record);
|
||||
// std::string truncated_kernel_name = rocmtools::truncate_name(full_kernel_name);
|
||||
// perfetto::StaticString kernel_name(truncated_kernel_name.c_str());
|
||||
TRACE_EVENT_BEGIN("KERNELS", perfetto::StaticString(full_kernel_name.c_str()), queue_track,
|
||||
profiler_record.timestamps.begin.value, "Full Kernel Name",
|
||||
full_kernel_name.c_str(), "Agent ID", device_id, "Queue ID",
|
||||
profiler_record.queue_id.handle, "GRD",
|
||||
profiler_record.kernel_properties.grid_size, "WGR",
|
||||
profiler_record.kernel_properties.workgroup_size, "LDS",
|
||||
(((profiler_record.kernel_properties.lds_size + (lds_block_size - 1)) &
|
||||
~(lds_block_size - 1))),
|
||||
"SCR", profiler_record.kernel_properties.scratch_size, "Arch. VGPR",
|
||||
profiler_record.kernel_properties.arch_vgpr_count, "Accumilative Vgpr",
|
||||
profiler_record.kernel_properties.accum_vgpr_count, "SGPR",
|
||||
profiler_record.kernel_properties.sgpr_count, "Wave Size",
|
||||
profiler_record.kernel_properties.wave_size, "Signal",
|
||||
profiler_record.kernel_properties.signal_handle);
|
||||
|
||||
TRACE_EVENT_END("KERNELS", queue_track, profiler_record.timestamps.end.value);
|
||||
|
||||
auto get_counter_track_fn = [&](std::string counter_name) {
|
||||
std ::string counter_track_id =
|
||||
std::to_string(machine_id_) + std::to_string(GetPid()) + counter_name;
|
||||
std::pair<int, std::string> gpu_counter_track_id = std::make_pair(device_id, counter_name);
|
||||
std::unordered_map<std::string, perfetto::CounterTrack>::iterator counters_track_it;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(counter_tracks_lock_);
|
||||
counters_track_it = counter_tracks_.find(gpu_counter_track_id.second);
|
||||
if (counters_track_it == counter_tracks_.end()) {
|
||||
/* Create a new perfetto::Track */
|
||||
counters_track_it =
|
||||
counter_tracks_
|
||||
.emplace(gpu_counter_track_id.second,
|
||||
perfetto::CounterTrack(counter_track_id.c_str(), gpu_track))
|
||||
.first;
|
||||
|
||||
auto counter_track_desc = counters_track_it->second.Serialize();
|
||||
std::string counter_track_str = "Process ID " + std::to_string(GetPid()) + " - Counter " +
|
||||
gpu_counter_track_id.second;
|
||||
counter_track_desc.set_name(counter_track_str);
|
||||
perfetto::TrackEvent::SetTrackDescriptor(counters_track_it->second, counter_track_desc);
|
||||
}
|
||||
}
|
||||
return counters_track_it->second;
|
||||
};
|
||||
|
||||
// For Counters
|
||||
if (profiler_record.counters) {
|
||||
for (uint64_t i = 0; i < profiler_record.counters_count.value; i++) {
|
||||
if (profiler_record.counters[i].counter_handler.handle > 0) {
|
||||
size_t name_length = 0;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_counter_info_size(
|
||||
session_id, ROCPROFILER_COUNTER_NAME, profiler_record.counters[i].counter_handler,
|
||||
&name_length));
|
||||
if (name_length > 1) {
|
||||
const char* name_c = static_cast<const char*>(malloc(name_length * sizeof(char)));
|
||||
CHECK_ROCMTOOLS(
|
||||
rocprofiler_query_counter_info(session_id, ROCPROFILER_COUNTER_NAME,
|
||||
profiler_record.counters[i].counter_handler, &name_c));
|
||||
|
||||
perfetto::CounterTrack counters_track = get_counter_track_fn(std::string(name_c));
|
||||
TRACE_COUNTER("COUNTERS", counters_track, profiler_record.timestamps.begin.value,
|
||||
profiler_record.counters[i].value.value);
|
||||
// Added an extra zero event for maintaining start-end of the counter
|
||||
TRACE_COUNTER("COUNTERS", counters_track, profiler_record.timestamps.end.value, 0.001);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int FlushTracerRecord(rocprofiler_record_tracer_t tracer_record,
|
||||
rocprofiler_session_id_t session_id) {
|
||||
std::lock_guard<std::mutex> lock(writing_lock);
|
||||
if (!tracing_session_) rocmtools::warning("Tracing session is deleted!\n");
|
||||
std::string kernel_name;
|
||||
char* function_name;
|
||||
char* activity_name;
|
||||
std::string roctx_message;
|
||||
uint64_t roctx_id = 0;
|
||||
uint64_t thread_id = tracer_record.thread_id.value;
|
||||
std::unordered_map<uint64_t, perfetto::Track>::iterator thread_track_it;
|
||||
std::unordered_map<int, perfetto::Track>::iterator device_track_it;
|
||||
if (tracer_record.domain == ACTIVITY_DOMAIN_HIP_OPS ||
|
||||
tracer_record.domain == ACTIVITY_DOMAIN_HSA_OPS) {
|
||||
int device_id = tracer_record.agent_id.handle;
|
||||
if (tracer_record.domain == ACTIVITY_DOMAIN_HIP_OPS && device_id > 0) device_id--;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(device_tracks_lock_);
|
||||
device_track_it = device_tracks_.find(device_id);
|
||||
if (device_track_it == device_tracks_.end()) {
|
||||
/* Create a new perfetto::Track (Sub-Track) */
|
||||
device_track_it =
|
||||
device_tracks_
|
||||
.emplace(device_id,
|
||||
perfetto::ProcessTrack::Global(((device_id + 1) * machine_id_)))
|
||||
.first;
|
||||
auto gpu_desc = device_track_it->second.Serialize();
|
||||
gpu_desc.mutable_process()->set_pid(device_id);
|
||||
std::string gpu_str = rocmtools::string_printf("Node: %s Device:", hostname_);
|
||||
gpu_desc.mutable_process()->set_process_name(gpu_str);
|
||||
perfetto::TrackEvent::SetTrackDescriptor(device_track_it->second, gpu_desc);
|
||||
track_ids_used_.emplace_back(1 + machine_id_ + device_id);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
std::lock_guard<std::mutex> lock(thread_tracks_lock_);
|
||||
thread_track_it = thread_tracks_.find(thread_id);
|
||||
if (thread_track_it == thread_tracks_.end()) {
|
||||
uint64_t track_id =
|
||||
track_counter_.fetch_add((1 + machine_id_) * GetPid(), std::memory_order_acquire);
|
||||
for (uint64_t tid : track_ids_used_) {
|
||||
while (track_id == tid) {
|
||||
track_id =
|
||||
track_counter_.fetch_add((1 + machine_id_) * GetPid(), std::memory_order_acquire);
|
||||
}
|
||||
}
|
||||
thread_track_it =
|
||||
thread_tracks_.emplace(thread_id, perfetto::ProcessTrack::Global(track_id)).first;
|
||||
auto thread_track_desc = thread_track_it->second.Serialize();
|
||||
std::string thread_track_str =
|
||||
rocmtools::string_printf("Node: %s Process ID: %lu Thread ID:", hostname_, GetPid());
|
||||
thread_track_desc.mutable_process()->set_pid(thread_id);
|
||||
thread_track_desc.mutable_process()->set_process_name(thread_track_str);
|
||||
perfetto::TrackEvent::SetTrackDescriptor(thread_track_it->second, thread_track_desc);
|
||||
}
|
||||
}
|
||||
auto& thread_track = thread_track_it->second;
|
||||
auto& gpu_track = device_track_it->second;
|
||||
switch (tracer_record.domain) {
|
||||
case ACTIVITY_DOMAIN_ROCTX: {
|
||||
std::unordered_map<uint64_t, perfetto::Track>::iterator roctx_track_it;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(roctx_tracks_lock_);
|
||||
roctx_track_it = roctx_tracks_.find(thread_id);
|
||||
if (roctx_track_it == roctx_tracks_.end()) {
|
||||
/* Create a new perfetto::Track */
|
||||
uint64_t track_id =
|
||||
track_counter_.fetch_add((1 + machine_id_) * GetPid(), std::memory_order_acquire);
|
||||
for (uint64_t tid : track_ids_used_) {
|
||||
while (track_id == tid) {
|
||||
track_id = track_counter_.fetch_add((1 + machine_id_) * GetPid(),
|
||||
std::memory_order_acquire);
|
||||
}
|
||||
}
|
||||
roctx_track_it =
|
||||
roctx_tracks_.emplace(thread_id, perfetto::Track(track_id, thread_track)).first;
|
||||
|
||||
auto roctx_track_desc = roctx_track_it->second.Serialize();
|
||||
std::string roctx_track_str = rocmtools::string_printf("ROCTX Markers");
|
||||
roctx_track_desc.set_name(roctx_track_str);
|
||||
perfetto::TrackEvent::SetTrackDescriptor(roctx_track_it->second, roctx_track_desc);
|
||||
}
|
||||
}
|
||||
auto& roctx_track = roctx_track_it->second;
|
||||
|
||||
size_t roctx_message_size = 0;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_roctx_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_ROCTX_MESSAGE, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &roctx_message_size));
|
||||
if (roctx_message_size > 1) {
|
||||
char* roctx_message_str = static_cast<char*>(malloc(roctx_message_size * sizeof(char)));
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_roctx_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_ROCTX_MESSAGE, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &roctx_message_str));
|
||||
if (roctx_message_str)
|
||||
roctx_message = rocmtools::cxx_demangle(std::string(strdup(roctx_message_str)));
|
||||
}
|
||||
size_t roctx_id_size = 0;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_roctx_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_ROCTX_ID, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &roctx_id_size));
|
||||
if (roctx_id_size > 1) {
|
||||
char* roctx_id_str = static_cast<char*>(malloc(roctx_id_size * sizeof(char)));
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_roctx_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_ROCTX_ID, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &roctx_id_str));
|
||||
if (roctx_id_str) {
|
||||
roctx_id = std::stoll(std::string(strdup(roctx_id_str)));
|
||||
free(roctx_id_str);
|
||||
}
|
||||
}
|
||||
|
||||
if (tracer_record.operation_id.id == 1) {
|
||||
perfetto::StaticString roctx_message_pft(
|
||||
(!roctx_message.empty() ? roctx_message.c_str() : ""));
|
||||
TRACE_EVENT_BEGIN("ROCTX_API", roctx_message_pft, roctx_track,
|
||||
tracer_record.timestamps.begin.value, "Timestamp(ns)",
|
||||
tracer_record.timestamps.begin.value, "RocTx ID", roctx_id);
|
||||
roctx_track_entries_++;
|
||||
} else {
|
||||
TRACE_EVENT_END("ROCTX_API", roctx_track, tracer_record.timestamps.begin.value);
|
||||
roctx_track_entries_--;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HSA_API: {
|
||||
std::unordered_map<uint64_t, perfetto::Track>::iterator hsa_track_it;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(hsa_tracks_lock_);
|
||||
hsa_track_it = hsa_tracks_.find(thread_id);
|
||||
if (hsa_track_it == hsa_tracks_.end()) {
|
||||
/* Create a new perfetto::Track */
|
||||
uint64_t track_id =
|
||||
track_counter_.fetch_add((1 + machine_id_) * GetPid(), std::memory_order_acquire);
|
||||
for (uint64_t tid : track_ids_used_) {
|
||||
while (track_id == tid) {
|
||||
track_id = track_counter_.fetch_add((1 + machine_id_) * GetPid(),
|
||||
std::memory_order_acquire);
|
||||
}
|
||||
}
|
||||
hsa_track_it =
|
||||
hsa_tracks_.emplace(thread_id, perfetto::Track(track_id, thread_track)).first;
|
||||
auto hsa_track_desc = hsa_track_it->second.Serialize();
|
||||
std::string hsa_track_str = rocmtools::string_printf("HSA API");
|
||||
hsa_track_desc.set_name(hsa_track_str);
|
||||
perfetto::TrackEvent::SetTrackDescriptor(hsa_track_it->second, hsa_track_desc);
|
||||
}
|
||||
}
|
||||
auto& hsa_track = hsa_track_it->second;
|
||||
size_t function_name_size = 0;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hsa_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_HSA_FUNCTION_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &function_name_size));
|
||||
if (function_name_size > 1) {
|
||||
function_name = static_cast<char*>(malloc(function_name_size * sizeof(char)));
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hsa_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_HSA_FUNCTION_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &function_name));
|
||||
}
|
||||
TRACE_EVENT_BEGIN("HSA_API", perfetto::StaticString(function_name), hsa_track,
|
||||
tracer_record.timestamps.begin.value,
|
||||
perfetto::Flow::ProcessScoped(tracer_record.correlation_id.value));
|
||||
TRACE_EVENT_END("HSA_API", hsa_track, tracer_record.timestamps.end.value);
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HIP_API: {
|
||||
std::unordered_map<uint64_t, perfetto::Track>::iterator hip_track_it;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(hip_tracks_lock_);
|
||||
hip_track_it = hip_tracks_.find(thread_id);
|
||||
if (hip_track_it == hip_tracks_.end()) {
|
||||
/* Create a new perfetto::Track */
|
||||
uint64_t track_id =
|
||||
track_counter_.fetch_add((1 + machine_id_) * GetPid(), std::memory_order_acquire);
|
||||
for (uint64_t tid : track_ids_used_) {
|
||||
while (track_id == tid) {
|
||||
track_id = track_counter_.fetch_add((1 + machine_id_) * GetPid(),
|
||||
std::memory_order_acquire);
|
||||
}
|
||||
}
|
||||
hip_track_it =
|
||||
hip_tracks_.emplace(thread_id, perfetto::Track(track_id, thread_track)).first;
|
||||
|
||||
auto hip_track_desc = hip_track_it->second.Serialize();
|
||||
std::string hip_track_str = rocmtools::string_printf("HIP API");
|
||||
hip_track_desc.set_name(hip_track_str);
|
||||
perfetto::TrackEvent::SetTrackDescriptor(hip_track_it->second, hip_track_desc);
|
||||
}
|
||||
}
|
||||
auto& hip_track = hip_track_it->second;
|
||||
size_t function_name_size = 0;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_HIP_FUNCTION_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &function_name_size));
|
||||
if (function_name_size > 1) {
|
||||
function_name = static_cast<char*>(malloc(function_name_size * sizeof(char)));
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_HIP_FUNCTION_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &function_name));
|
||||
}
|
||||
size_t kernel_name_size = 0;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_HIP_KERNEL_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &kernel_name_size));
|
||||
char* kernel_name_str;
|
||||
if (kernel_name_size > 1) {
|
||||
kernel_name_str = static_cast<char*>(malloc(kernel_name_size * sizeof(char)));
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_HIP_KERNEL_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &kernel_name_str));
|
||||
if (kernel_name_str) {
|
||||
kernel_name = rocmtools::cxx_demangle(std::string(kernel_name_str));
|
||||
free(kernel_name_str);
|
||||
}
|
||||
}
|
||||
if (kernel_name.size() > 0) {
|
||||
TRACE_EVENT_BEGIN("HIP_API", perfetto::StaticString(function_name), hip_track,
|
||||
tracer_record.timestamps.begin.value, "Kernel Name", kernel_name,
|
||||
perfetto::Flow::ProcessScoped(tracer_record.correlation_id.value));
|
||||
} else {
|
||||
TRACE_EVENT_BEGIN("HIP_API", perfetto::StaticString(function_name), hip_track,
|
||||
tracer_record.timestamps.begin.value,
|
||||
perfetto::Flow::ProcessScoped(tracer_record.correlation_id.value));
|
||||
}
|
||||
TRACE_EVENT_END("HIP_API", hip_track, tracer_record.timestamps.end.value);
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_EXT_API: {
|
||||
printf("Warning: External API is not supported!\n");
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HIP_OPS: {
|
||||
uint64_t stream_id = 0;
|
||||
size_t stream_id_str_size = 0;
|
||||
char* stream_id_str;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_HIP_STREAM_ID, rocprofiler_tracer_api_data_handle_t{nullptr, 0},
|
||||
rocprofiler_tracer_operation_id_t{(uint32_t)tracer_record.correlation_id.value},
|
||||
&stream_id_str_size));
|
||||
if (stream_id_str_size > 1) {
|
||||
stream_id_str = static_cast<char*>(malloc(stream_id_str_size * sizeof(char)));
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_HIP_STREAM_ID, rocprofiler_tracer_api_data_handle_t{nullptr, 0},
|
||||
rocprofiler_tracer_operation_id_t{(uint32_t)tracer_record.correlation_id.value},
|
||||
&stream_id_str));
|
||||
if (stream_id_str != nullptr) stream_id = std::stoll(stream_id_str);
|
||||
}
|
||||
std::unordered_map<int, perfetto::Track>::iterator stream_track_it;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(stream_tracks_lock_);
|
||||
stream_track_it = stream_tracks_.find(stream_id);
|
||||
if (stream_track_it == stream_tracks_.end()) {
|
||||
/* Create a new perfetto::Track */
|
||||
uint64_t track_id = ((1 + stream_id + tracer_record.agent_id.handle) * machine_id_ *
|
||||
STREAM_CONSTANT * GetPid());
|
||||
stream_track_it =
|
||||
stream_tracks_.emplace(stream_id, perfetto::Track(track_id, gpu_track)).first;
|
||||
|
||||
auto stream_desc = stream_track_it->second.Serialize();
|
||||
std::string stream_str =
|
||||
rocmtools::string_printf("Process ID: %lu Stream %d", GetPid(), stream_id);
|
||||
stream_desc.set_name(stream_str);
|
||||
perfetto::TrackEvent::SetTrackDescriptor(stream_track_it->second, stream_desc);
|
||||
track_ids_used_.emplace_back(1 + machine_id_ + tracer_record.agent_id.handle);
|
||||
}
|
||||
}
|
||||
auto& stream_track = stream_track_it->second;
|
||||
if (tracer_record.api_data_handle.handle && tracer_record.api_data_handle.size > 1) {
|
||||
kernel_name = rocmtools::cxx_demangle(
|
||||
strdup(reinterpret_cast<const char*>(tracer_record.api_data_handle.handle)));
|
||||
TRACE_EVENT_BEGIN(
|
||||
"HIP_OPS",
|
||||
perfetto::StaticString(strdup(rocmtools::truncate_name(kernel_name).c_str())),
|
||||
stream_track, tracer_record.timestamps.begin.value, "Agent ID",
|
||||
tracer_record.agent_id.handle, "Process ID", GetPid(), "Kernel Name", kernel_name,
|
||||
perfetto::Flow::ProcessScoped(tracer_record.correlation_id.value));
|
||||
} else {
|
||||
size_t activity_name_size = 0;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_HIP_ACTIVITY_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &activity_name_size));
|
||||
if (activity_name_size > 1) {
|
||||
activity_name = static_cast<char*>(malloc(activity_name_size * sizeof(char)));
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_HIP_ACTIVITY_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &activity_name));
|
||||
} else {
|
||||
activity_name = const_cast<char*>(std::string("N/A").c_str());
|
||||
}
|
||||
TRACE_EVENT_BEGIN("HIP_OPS", perfetto::StaticString(activity_name), stream_track,
|
||||
tracer_record.timestamps.begin.value, "Agent ID",
|
||||
tracer_record.agent_id.handle, "Process ID", GetPid(),
|
||||
perfetto::Flow::ProcessScoped(tracer_record.correlation_id.value));
|
||||
}
|
||||
TRACE_EVENT_END("HIP_OPS", stream_track, tracer_record.timestamps.end.value);
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HSA_OPS: {
|
||||
std::pair<int, uint64_t> gpu_queue_id =
|
||||
std::make_pair(tracer_record.agent_id.handle, tracer_record.queue_id.handle);
|
||||
std::unordered_map<int, perfetto::Track>::iterator queue_track_it;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(queue_tracks_lock_);
|
||||
queue_track_it = queue_tracks_.find(gpu_queue_id.first);
|
||||
if (queue_track_it == queue_tracks_.end()) {
|
||||
/* Create a new perfetto::Track */
|
||||
uint64_t track_id =
|
||||
((1 + tracer_record.queue_id.handle + tracer_record.agent_id.handle) * machine_id_ *
|
||||
QUEUE_CONSTANT * GetPid());
|
||||
queue_track_it =
|
||||
queue_tracks_.emplace(gpu_queue_id.first, perfetto::Track(track_id, gpu_track))
|
||||
.first;
|
||||
|
||||
auto queue_desc = queue_track_it->second.Serialize();
|
||||
std::string queue_str = rocmtools::string_printf("Process ID: %lu Queue %ld", GetPid(),
|
||||
gpu_queue_id.second);
|
||||
queue_desc.set_name(queue_str);
|
||||
perfetto::TrackEvent::SetTrackDescriptor(queue_track_it->second, queue_desc);
|
||||
}
|
||||
track_ids_used_.emplace_back(tracer_record.queue_id.handle + machine_id_ + 1 +
|
||||
tracer_record.agent_id.handle);
|
||||
}
|
||||
auto& queue_track = queue_track_it->second;
|
||||
size_t activity_name_size = 0;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hsa_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_HSA_ACTIVITY_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &activity_name_size));
|
||||
if (activity_name_size > 1) {
|
||||
activity_name = static_cast<char*>(malloc(activity_name_size * sizeof(char)));
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hsa_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_HSA_ACTIVITY_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &activity_name));
|
||||
}
|
||||
TRACE_EVENT_BEGIN("HSA_OPS", perfetto::StaticString(activity_name), queue_track,
|
||||
tracer_record.timestamps.begin.value, "Agent ID",
|
||||
tracer_record.agent_id.handle, "Queue ID", tracer_record.queue_id.handle,
|
||||
"Process ID", GetPid(),
|
||||
perfetto::Flow::ProcessScoped(tracer_record.correlation_id.value));
|
||||
TRACE_EVENT_END("HSA_OPS", queue_track, tracer_record.timestamps.end.value);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
rocmtools::warning("ignored record for domain %d", tracer_record.domain);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int WriteBufferRecords(const rocprofiler_record_header_t* begin,
|
||||
const rocprofiler_record_header_t* end, rocprofiler_session_id_t session_id,
|
||||
rocprofiler_buffer_id_t buffer_id) {
|
||||
if (!tracing_session_) rocmtools::warning("Tracing session is deleted!\n");
|
||||
while (begin < end) {
|
||||
if (!begin) return 0;
|
||||
switch (begin->kind) {
|
||||
case ROCPROFILER_PROFILER_RECORD: {
|
||||
rocprofiler_record_profiler_t* profiler_record = const_cast<rocprofiler_record_profiler_t*>(
|
||||
reinterpret_cast<const rocprofiler_record_profiler_t*>(begin));
|
||||
FlushProfilerRecord(*profiler_record, session_id);
|
||||
break;
|
||||
}
|
||||
case ROCPROFILER_TRACER_RECORD: {
|
||||
rocprofiler_record_tracer_t* tracer_record = const_cast<rocprofiler_record_tracer_t*>(
|
||||
reinterpret_cast<const rocprofiler_record_tracer_t*>(begin));
|
||||
FlushTracerRecord(*tracer_record, session_id);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
rocprofiler_next_record(begin, &begin, session_id, buffer_id);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool IsValid() const { return is_valid_; }
|
||||
|
||||
private:
|
||||
fs::path output_prefix_;
|
||||
std::unique_ptr<perfetto::TracingSession> tracing_session_;
|
||||
int file_descriptor_;
|
||||
bool is_valid_{false};
|
||||
size_t roctx_track_entries_{0};
|
||||
|
||||
// Correlate stream id(s) with correlation id(s) to identify the stream id of every HIP activity
|
||||
std::unordered_map<uint64_t, uint64_t> stream_ids_;
|
||||
|
||||
// Callback Tracks
|
||||
std::unordered_map<uint64_t, perfetto::Track> thread_tracks_;
|
||||
std::unordered_map<uint64_t, perfetto::Track> roctx_tracks_, hsa_tracks_, hip_tracks_,
|
||||
hip_ext_tracks_;
|
||||
|
||||
// Activity Tracks
|
||||
std::unordered_map<int, perfetto::Track> device_tracks_;
|
||||
std::unordered_map<int, perfetto::Track> queue_tracks_, stream_tracks_;
|
||||
|
||||
std::unordered_map<std::string, perfetto::CounterTrack> counter_tracks_;
|
||||
|
||||
std::atomic<uint64_t> track_counter_{GetPid()};
|
||||
std::vector<uint64_t> track_ids_used_;
|
||||
|
||||
std::mutex stream_ids_lock_, thread_tracks_lock_, roctx_tracks_lock_, hsa_tracks_lock_,
|
||||
hip_tracks_lock_, hip_ext_tracks_lock_, device_tracks_lock_, queue_tracks_lock_,
|
||||
stream_tracks_lock_, counter_tracks_lock_;
|
||||
|
||||
char hostname_[1024];
|
||||
uint64_t machine_id_;
|
||||
|
||||
std::ofstream stream_;
|
||||
};
|
||||
|
||||
perfetto_plugin_t* perfetto_plugin = nullptr;
|
||||
|
||||
} // namespace
|
||||
|
||||
int rocprofiler_plugin_initialize(uint32_t rocprofiler_major_version,
|
||||
uint32_t rocprofiler_minor_version) {
|
||||
if (rocprofiler_major_version != ROCPROFILER_VERSION_MAJOR ||
|
||||
rocprofiler_minor_version > ROCPROFILER_VERSION_MINOR)
|
||||
return -1;
|
||||
|
||||
if (perfetto_plugin != nullptr) return -1;
|
||||
|
||||
perfetto_plugin = new perfetto_plugin_t();
|
||||
if (perfetto_plugin->IsValid()) return 0;
|
||||
|
||||
delete perfetto_plugin;
|
||||
perfetto_plugin = nullptr;
|
||||
return -1;
|
||||
}
|
||||
|
||||
void rocprofiler_plugin_finalize() {
|
||||
if (!perfetto_plugin) return;
|
||||
delete perfetto_plugin;
|
||||
perfetto_plugin = nullptr;
|
||||
}
|
||||
|
||||
ROCPROFILER_EXPORT int rocprofiler_plugin_write_buffer_records(const rocprofiler_record_header_t* begin,
|
||||
const rocprofiler_record_header_t* end,
|
||||
rocprofiler_session_id_t session_id,
|
||||
rocprofiler_buffer_id_t buffer_id) {
|
||||
if (!perfetto_plugin || !perfetto_plugin->IsValid()) return -1;
|
||||
return perfetto_plugin->WriteBufferRecords(begin, end, session_id, buffer_id);
|
||||
}
|
||||
|
||||
ROCPROFILER_EXPORT int rocprofiler_plugin_write_record(rocprofiler_record_tracer_t record,
|
||||
rocprofiler_session_id_t session_id) {
|
||||
if (!perfetto_plugin || !perfetto_plugin->IsValid()) return -1;
|
||||
if (record.header.id.handle == 0) return 0;
|
||||
perfetto_plugin->FlushTracerRecord(record, session_id);
|
||||
return 0;
|
||||
}
|
||||
@@ -1,189 +0,0 @@
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
Copyright (c) 2017, The Android Open Source Project
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
|
||||
@@ -1,35 +0,0 @@
|
||||
# Global OWNERS that can approve Perfetto changes.
|
||||
# Please look first at OWNERS in the various subdirectories before falling back
|
||||
# on this, as the former tend to be more brain-cache-hot.
|
||||
|
||||
# Perfetto tracing internals and API/ABI boundaries.
|
||||
primiano@google.com
|
||||
skyostil@google.com
|
||||
|
||||
# UI, Ftrace interop, traced_probes, protozero, Android internals.
|
||||
hjd@google.com
|
||||
|
||||
# Trace Processor, metrics, infra.
|
||||
lalitm@google.com
|
||||
|
||||
# Callstack / memory profilers, traced_probes & Linux internals.
|
||||
ddiproietto@google.com
|
||||
rsavitski@google.com
|
||||
|
||||
# Chromium-related things and tracing SDK.
|
||||
eseckler@google.com
|
||||
nuskos@google.com
|
||||
oysteine@google.com
|
||||
|
||||
# Most Android-related metrics.
|
||||
ilkos@google.com
|
||||
|
||||
# fmayer@ left the team. Please try first rsavitski@, ddiproietto@ or primiano@
|
||||
# and leave fmayer@ as an emergency-only escalation on profilers.
|
||||
fmayer@google.com
|
||||
|
||||
# chromium.org aliases for adding DEPS entries from chromium subprojects to
|
||||
# third_party/perfetto.
|
||||
eseckler@chromium.org
|
||||
nuskos@chromium.org
|
||||
skyostil@chromium.org
|
||||
@@ -1,394 +0,0 @@
|
||||
# Tracing SDK
|
||||
|
||||
The Perfetto Tracing SDK is a C++11 library that allows userspace applications
|
||||
to emit trace events and add more app-specific context to a Perfetto trace.
|
||||
|
||||
When using the Tracing SDK there are two main aspects to consider:
|
||||
|
||||
1. Whether you are interested only in tracing events coming from your own app
|
||||
or want to collect full-stack traces that overlay app trace events with
|
||||
system trace events like scheduler traces, syscalls or any other Perfetto
|
||||
data source.
|
||||
|
||||
2. For app-specific tracing, whether you need to trace simple types of timeline
|
||||
events (e.g., slices, counters) or need to define complex data sources with a
|
||||
custom strongly-typed schema (e.g., for dumping the state of a subsystem of
|
||||
your app into the trace).
|
||||
|
||||
For Android-only instrumentation, the advice is to keep using the existing
|
||||
[android.os.Trace (SDK)][atrace-sdk] / [ATrace_* (NDK)][atrace-ndk] if they
|
||||
are sufficient for your use cases. Atrace-based instrumentation is fully
|
||||
supported in Perfetto.
|
||||
See the [Data Sources -> Android System -> Atrace Instrumentation][atrace-ds]
|
||||
for details.
|
||||
|
||||
## Getting started
|
||||
|
||||
TIP: The code from these examples is also available [in the
|
||||
repository](/examples/sdk/README.md).
|
||||
|
||||
To start using the Client API, first check out the latest SDK release:
|
||||
|
||||
```bash
|
||||
git clone https://android.googlesource.com/platform/external/perfetto -b v23.0
|
||||
```
|
||||
|
||||
The SDK consists of two files, `sdk/perfetto.h` and `sdk/perfetto.cc`. These are
|
||||
an amalgamation of the Client API designed to easy to integrate to existing
|
||||
build systems. The sources are self-contained and require only a C++11 compliant
|
||||
standard library.
|
||||
|
||||
For example, to add the SDK to a CMake project, edit your CMakeLists.txt:
|
||||
|
||||
```cmake
|
||||
cmake_minimum_required(VERSION 3.13)
|
||||
project(PerfettoExample)
|
||||
find_package(Threads)
|
||||
|
||||
# Define a static library for Perfetto.
|
||||
include_directories(perfetto/sdk)
|
||||
add_library(perfetto STATIC perfetto/sdk/perfetto.cc)
|
||||
|
||||
# Link the library to your main executable.
|
||||
add_executable(example example.cc)
|
||||
target_link_libraries(example perfetto ${CMAKE_THREAD_LIBS_INIT})
|
||||
```
|
||||
|
||||
Next, initialize Perfetto in your program:
|
||||
|
||||
```C++
|
||||
#include <perfetto.h>
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
perfetto::TracingInitArgs args;
|
||||
|
||||
// The backends determine where trace events are recorded. You may select one
|
||||
// or more of:
|
||||
|
||||
// 1) The in-process backend only records within the app itself.
|
||||
args.backends |= perfetto::kInProcessBackend;
|
||||
|
||||
// 2) The system backend writes events into a system Perfetto daemon,
|
||||
// allowing merging app and system events (e.g., ftrace) on the same
|
||||
// timeline. Requires the Perfetto `traced` daemon to be running (e.g.,
|
||||
// on Android Pie and newer).
|
||||
args.backends |= perfetto::kSystemBackend;
|
||||
|
||||
perfetto::Tracing::Initialize(args);
|
||||
}
|
||||
```
|
||||
|
||||
You are now ready to instrument your app with trace events.
|
||||
|
||||
## Custom data sources vs Track events
|
||||
|
||||
The SDK offers two abstraction layers to inject tracing data, built on top of
|
||||
each other, which trade off code complexity vs expressive power:
|
||||
[track events](#track-events) and [custom data sources](#custom-data-sources).
|
||||
|
||||
### Track events
|
||||
|
||||
Track events are the suggested option when dealing with app-specific tracing as
|
||||
they take care of a number of subtleties (e.g., thread safety, flushing, string
|
||||
interning).
|
||||
Track events are time bounded events (e.g., slices, counter) based on simple
|
||||
`TRACE_EVENT` annotation tags in the codebase, like this:
|
||||
|
||||
```c++
|
||||
#include <perfetto.h>
|
||||
|
||||
PERFETTO_DEFINE_CATEGORIES(
|
||||
perfetto::Category("rendering")
|
||||
.SetDescription("Events from the graphics subsystem"),
|
||||
perfetto::Category("network")
|
||||
.SetDescription("Network upload and download statistics"));
|
||||
|
||||
...
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
...
|
||||
perfetto::Tracing::Initialize(args);
|
||||
perfetto::TrackEvent::Register();
|
||||
}
|
||||
|
||||
...
|
||||
|
||||
void LayerTreeHost::DoUpdateLayers() {
|
||||
TRACE_EVENT("rendering", "LayerTreeHost::DoUpdateLayers");
|
||||
...
|
||||
for (PictureLayer& pl : layers) {
|
||||
TRACE_EVENT("rendering", "PictureLayer::Update");
|
||||
pl.Update();
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Which are rendered in the UI as follows:
|
||||
|
||||

|
||||
|
||||
Track events are the best default option and serve most tracing use cases with
|
||||
very little complexity.
|
||||
|
||||
To include your new track events in the trace, ensure that the `track_event`
|
||||
data source is included in the trace config. If you do not specify any
|
||||
categories then all non-debug categories will be included by default. However,
|
||||
you can also add just the categories you are interested in like so:
|
||||
|
||||
```protobuf
|
||||
data_sources {
|
||||
config {
|
||||
name: "track_event"
|
||||
track_event_config {
|
||||
enabled_categories: "rendering"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
See the [Track events page](track-events.md) for full instructions.
|
||||
|
||||
### Custom data sources
|
||||
|
||||
For most uses, track events are the most straightforward way of instrumenting
|
||||
apps for tracing. However, in some rare circumstances they are not
|
||||
flexible enough, e.g., when the data doesn't fit the notion of a track or is
|
||||
high volume enough that it needs a strongly typed schema to minimize the size of
|
||||
each event. In this case, you can implement a *custom data source* for
|
||||
Perfetto.
|
||||
|
||||
Unlike track events, when working with custom data sources, you will also need
|
||||
corresponding changes in [trace processor](/docs/analysis/trace-processor.md)
|
||||
to enable importing your data format.
|
||||
|
||||
A custom data source is a subclass of `perfetto::DataSource`. Perfetto will
|
||||
automatically create one instance of the class for each tracing session it is
|
||||
active in (usually just one).
|
||||
|
||||
```C++
|
||||
class CustomDataSource : public perfetto::DataSource<CustomDataSource> {
|
||||
public:
|
||||
void OnSetup(const SetupArgs&) override {
|
||||
// Use this callback to apply any custom configuration to your data source
|
||||
// based on the TraceConfig in SetupArgs.
|
||||
}
|
||||
|
||||
void OnStart(const StartArgs&) override {
|
||||
// This notification can be used to initialize the GPU driver, enable
|
||||
// counters, etc. StartArgs will contains the DataSourceDescriptor,
|
||||
// which can be extended.
|
||||
}
|
||||
|
||||
void OnStop(const StopArgs&) override {
|
||||
// Undo any initialization done in OnStart.
|
||||
}
|
||||
|
||||
// Data sources can also have per-instance state.
|
||||
int my_custom_state = 0;
|
||||
};
|
||||
|
||||
PERFETTO_DECLARE_DATA_SOURCE_STATIC_MEMBERS(CustomDataSource);
|
||||
```
|
||||
|
||||
The data source's static data should be defined in one source file like this:
|
||||
|
||||
```C++
|
||||
PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(CustomDataSource);
|
||||
```
|
||||
|
||||
Custom data sources need to be registered with Perfetto:
|
||||
|
||||
```C++
|
||||
int main(int argc, char** argv) {
|
||||
...
|
||||
perfetto::Tracing::Initialize(args);
|
||||
// Add the following:
|
||||
perfetto::DataSourceDescriptor dsd;
|
||||
dsd.set_name("com.example.custom_data_source");
|
||||
CustomDataSource::Register(dsd);
|
||||
}
|
||||
```
|
||||
|
||||
As with all data sources, the custom data source needs to be specified in the
|
||||
trace config to enable tracing:
|
||||
|
||||
```C++
|
||||
perfetto::TraceConfig cfg;
|
||||
auto* ds_cfg = cfg.add_data_sources()->mutable_config();
|
||||
ds_cfg->set_name("com.example.custom_data_source");
|
||||
```
|
||||
|
||||
Finally, call the `Trace()` method to record an event with your custom data
|
||||
source. The lambda function passed to that method will only be called if tracing
|
||||
is enabled. It is always called synchronously and possibly multiple times if
|
||||
multiple concurrent tracing sessions are active.
|
||||
|
||||
```C++
|
||||
CustomDataSource::Trace([](CustomDataSource::TraceContext ctx) {
|
||||
auto packet = ctx.NewTracePacket();
|
||||
packet->set_timestamp(perfetto::TrackEvent::GetTraceTimeNs());
|
||||
packet->set_for_testing()->set_str("Hello world!");
|
||||
});
|
||||
```
|
||||
|
||||
If necessary the `Trace()` method can access the custom data source state
|
||||
(`my_custom_state` in the example above). Doing so, will take a mutex to
|
||||
ensure data source isn't destroyed (e.g., because of stopping tracing) while
|
||||
the `Trace()` method is called on another thread. For example:
|
||||
|
||||
```C++
|
||||
CustomDataSource::Trace([](CustomDataSource::TraceContext ctx) {
|
||||
auto safe_handle = trace_args.GetDataSourceLocked(); // Holds a RAII lock.
|
||||
DoSomethingWith(safe_handle->my_custom_state);
|
||||
});
|
||||
```
|
||||
|
||||
## In-process vs System mode
|
||||
|
||||
The two modes are not mutually exclusive. An app can be configured to work
|
||||
in both modes and respond both to in-process tracing requests and system
|
||||
tracing requests. Both modes generate the same trace file format.
|
||||
|
||||
### In-process mode
|
||||
|
||||
In this mode both the perfetto service and the app-defined data sources are
|
||||
hosted fully in-process, in the same process of the profiled app. No connection
|
||||
to the system `traced` daemon will be attempted.
|
||||
|
||||
In-process mode can be enabled by setting
|
||||
`TracingInitArgs.backends = perfetto::kInProcessBackend` when initializing the
|
||||
SDK, see examples below.
|
||||
|
||||
This mode is used to generate traces that contain only events emitted by
|
||||
the app, but not other types of events (e.g. scheduler traces).
|
||||
|
||||
The main advantage is that by running fully in-process, it doesn't require any
|
||||
special OS privileges and the profiled process can control the lifecycle of
|
||||
tracing sessions.
|
||||
|
||||
This mode is supported on Android, Linux, MacOS and Windows.
|
||||
|
||||
### System mode
|
||||
|
||||
In this mode the app-defined data sources will connect to the external `traced`
|
||||
service using the [IPC over UNIX socket][ipc].
|
||||
|
||||
System mode can be enabled by setting
|
||||
`TracingInitArgs.backends = perfetto::kSystemBackend` when initializing the SDK,
|
||||
see examples below.
|
||||
|
||||
The main advantage of this mode is that it is possible to create fused traces where
|
||||
app events are overlaid on the same timeline of OS events. This enables
|
||||
full-stack performance investigations, looking all the way through syscalls and
|
||||
kernel scheduling events.
|
||||
|
||||
The main limitation of this mode is that it requires the external `traced` daemon
|
||||
to be up and running and reachable through the UNIX socket connection.
|
||||
|
||||
This is suggested for local debugging or lab testing scenarios where the user
|
||||
(or the test harness) can control the OS deployment (e.g., sideload binaries on
|
||||
Android).
|
||||
|
||||
When using system mode, the tracing session must be controlled from the outside,
|
||||
using the `perfetto` command-line client
|
||||
(See [reference](/docs/reference/perfetto-cli)). This is because when collecting
|
||||
system traces, tracing data producers are not allowed to read back the trace
|
||||
data as it might disclose information about other processes and allow
|
||||
side-channel attacks.
|
||||
|
||||
* On Android 9 (Pie) and beyond, traced is shipped as part of the platform.
|
||||
* On older versions of Android, traced can be built from sources using the
|
||||
the [standalone NDK-based workflow](/docs/contributing/build-instructions.md)
|
||||
and sideloaded via adb shell.
|
||||
* On Linux and MacOS `traced` must be built and run separately. See the
|
||||
[Linux quickstart](/docs/quickstart/linux-tracing.md) for instructions.
|
||||
|
||||
_System mode is not yet supported on Windows, due to the lack of an IPC
|
||||
implementation_.
|
||||
|
||||
## {#recording} Recording traces through the API
|
||||
|
||||
_Tracing through the API is currently only supported with the in-process mode.
|
||||
When using system mode, use the `perfetto` cmdline client (see quickstart
|
||||
guides)._
|
||||
|
||||
First initialize a [TraceConfig](/docs/reference/trace-config-proto.autogen)
|
||||
message which specifies what type of data to record.
|
||||
|
||||
If your app includes [track events](track-events.md) (i.e, `TRACE_EVENT`), you
|
||||
typically want to choose the categories which are enabled for tracing.
|
||||
|
||||
By default, all non-debug categories are enabled, but you can enable a specific
|
||||
one like this:
|
||||
|
||||
```C++
|
||||
perfetto::protos::gen::TrackEventConfig track_event_cfg;
|
||||
track_event_cfg.add_disabled_categories("*");
|
||||
track_event_cfg.add_enabled_categories("rendering");
|
||||
```
|
||||
|
||||
Next, build the main trace config together with the track event part:
|
||||
|
||||
```C++
|
||||
perfetto::TraceConfig cfg;
|
||||
cfg.add_buffers()->set_size_kb(1024); // Record up to 1 MiB.
|
||||
auto* ds_cfg = cfg.add_data_sources()->mutable_config();
|
||||
ds_cfg->set_name("track_event");
|
||||
ds_cfg->set_track_event_config_raw(track_event_cfg.SerializeAsString());
|
||||
```
|
||||
|
||||
If your app includes a custom data source, you can also enable it here:
|
||||
|
||||
```C++
|
||||
ds_cfg = cfg.add_data_sources()->mutable_config();
|
||||
ds_cfg->set_name("my_data_source");
|
||||
```
|
||||
|
||||
After building the trace config, you can begin tracing:
|
||||
|
||||
```C++
|
||||
std::unique_ptr<perfetto::TracingSession> tracing_session(
|
||||
perfetto::Tracing::NewTrace());
|
||||
tracing_session->Setup(cfg);
|
||||
tracing_session->StartBlocking();
|
||||
```
|
||||
|
||||
TIP: API methods with `Blocking` in their name will suspend the calling thread
|
||||
until the respective operation is complete. There are also asynchronous
|
||||
variants that don't have this limitation.
|
||||
|
||||
Now that tracing is active, instruct your app to perform the operation you
|
||||
want to record. After that, stop tracing and collect the
|
||||
protobuf-formatted trace data:
|
||||
|
||||
```C++
|
||||
tracing_session->StopBlocking();
|
||||
std::vector<char> trace_data(tracing_session->ReadTraceBlocking());
|
||||
|
||||
// Write the trace into a file.
|
||||
std::ofstream output;
|
||||
output.open("example.perfetto-trace", std::ios::out | std::ios::binary);
|
||||
output.write(&trace_data[0], trace_data.size());
|
||||
output.close();
|
||||
```
|
||||
|
||||
To save memory with longer traces, you can also tell Perfetto to write
|
||||
directly into a file by passing a file descriptor into Setup(), remembering
|
||||
to close the file after tracing is done:
|
||||
|
||||
```C++
|
||||
int fd = open("example.perfetto-trace", O_RDWR | O_CREAT | O_TRUNC, 0600);
|
||||
tracing_session->Setup(cfg, fd);
|
||||
tracing_session->StartBlocking();
|
||||
// ...
|
||||
tracing_session->StopBlocking();
|
||||
close(fd);
|
||||
```
|
||||
|
||||
The resulting trace file can be directly opened in the [Perfetto
|
||||
UI](https://ui.perfetto.dev) or the [Trace Processor](/docs/analysis/trace-processor.md).
|
||||
|
||||
[ipc]: /docs/design-docs/api-and-abi.md#socket-protocol
|
||||
[atrace-ds]: /docs/data-sources/atrace.md
|
||||
[atrace-ndk]: https://developer.android.com/ndk/reference/group/tracing
|
||||
[atrace-sdk]: https://developer.android.com/reference/android/os/Trace
|
||||
File diff soppresso perché troppo grande
Carica Diff
File diff soppresso perché troppo grande
Carica Diff
@@ -1,63 +0,0 @@
|
||||
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cxxabi.h>
|
||||
#include <fcntl.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <sys/types.h>
|
||||
#include <systemd/sd-id128.h>
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <string>
|
||||
|
||||
#include "src/utils/helper.h"
|
||||
|
||||
// Macro to check ROCMTools calls status
|
||||
#define CHECK_ROCMTOOLS(call) \
|
||||
do { \
|
||||
if ((call) != ROCPROFILER_STATUS_SUCCESS) rocmtools::fatal("Error: ROCMTools API Call Error!"); \
|
||||
} while (false)
|
||||
|
||||
namespace {
|
||||
|
||||
[[maybe_unused]] uint32_t GetPid() {
|
||||
static uint32_t pid = syscall(__NR_getpid);
|
||||
return pid;
|
||||
}
|
||||
|
||||
[[maybe_unused]] uint64_t GetMachineID() {
|
||||
char hostname[1023] = "\0";
|
||||
gethostname(hostname, 1023);
|
||||
sd_id128_t ret;
|
||||
char machine_id[SD_ID128_STRING_MAX];
|
||||
[[maybe_unused]] int status = sd_id128_get_machine(&ret);
|
||||
assert(status == 0 && "Error: Couldn't get machine id!");
|
||||
if (sd_id128_to_string(ret, machine_id)) return std::hash<std::string>{}(machine_id);
|
||||
return std::rand();
|
||||
}
|
||||
|
||||
} // namespace
|
||||
@@ -1,247 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
ROCPROFV2_DIR=$(dirname -- $(realpath ${BASH_SOURCE[0]}));
|
||||
ROCM_DIR=$(dirname -- ${ROCPROFV2_DIR})
|
||||
RUN_FROM_BUILD=0
|
||||
if [[ $ROCPROFV2_DIR == *"/build"* ]]; then
|
||||
RUN_FROM_BUILD=1
|
||||
elif [[ $ROCPROFV2_DIR == *"/rocmtools"* ]]; then
|
||||
RUN_FROM_BUILD=1
|
||||
ROCM_DIR=$ROCPROFV2_DIR
|
||||
fi
|
||||
|
||||
usage() {
|
||||
echo -e "ROCMTools Run Script Usage:"
|
||||
echo -e "-h | --help For showing this message"
|
||||
echo -e "--list-counters For showing all available counters for the current GPUs"
|
||||
if [ $RUN_FROM_BUILD == 1 ]; then
|
||||
echo -e "-b | --build For compiling"
|
||||
echo -e "-cb | --clean-build For full clean build"
|
||||
echo -e "-t | --test For Running the tests"
|
||||
echo -e "-ct | --clean-build-test For Running the tests after a clean build"
|
||||
echo -e "-mt | --mem-test For Running the Memory Leak tests. This run requires building using -acb | --asan-clean-build option"
|
||||
echo -e "-acb | --asan-clean-build For compiling with ASAN library attached"
|
||||
echo -e "--install For installing rocmtools without clean build in the default installation folder (review build.sh to know more about the default paths)"
|
||||
echo -e "--clean-install For installing rocmtools with new clean build in the default installation folder (review build.sh to know more about the default paths)"
|
||||
fi
|
||||
echo -e "--hip-api For Collecting HIP API Traces"
|
||||
echo -e "--hip-activity For Collecting HSA API Activities Traces"
|
||||
echo -e "--hsa-api For Collecting HIP API Traces"
|
||||
echo -e "--hsa-activity For Collecting HSA API Activities Traces"
|
||||
echo -e "--roctx-trace For Collecting ROCTx Traces"
|
||||
echo -e "--kernel-trace For Collecting Kernel dispatch Traces"
|
||||
echo -e "--sys-trace For Collecting HIP and HSA APIs and their Activities Traces along ROCTX and Kernel Dispatch traces"
|
||||
echo -e "--plugin PLUGIN_NAME For enabling a plugin (file/perfetto)"
|
||||
echo -e "-i | --input For adding counters file path (every line in the text file represents a counter)"
|
||||
echo -e "-o | --output-file For the output file name"
|
||||
echo -e "-d | --output-directory For adding output path where the output files will be saved"
|
||||
echo -e "-fi | --flush-interval For adding a flush interval in milliseconds, every \"flush interval\" the buffers will be flushed"
|
||||
exit 1
|
||||
}
|
||||
|
||||
if [ -z "$1" ] ; then
|
||||
usage
|
||||
exit 1
|
||||
fi
|
||||
|
||||
while [ 1 ] ; do
|
||||
if [[ "$1" = "-h" || "$1" = "--help" ]] ; then
|
||||
usage
|
||||
exit 1
|
||||
elif [[ "$1" = "-b" || "$1" = "--build" ]] ; then
|
||||
if [ $RUN_FROM_BUILD == 1 ]; then
|
||||
TO_CLEAN=no ./build.sh
|
||||
exit 1
|
||||
fi
|
||||
elif [[ "$1" = "-acb" || "$1" = "--asan-clean-build" ]] ; then
|
||||
if [ $RUN_FROM_BUILD == 1 ]; then
|
||||
ASAN=yes TO_CLEAN=yes ./build.sh
|
||||
exit 1
|
||||
fi
|
||||
elif [[ "$1" = "-cb" || "$1" = "--clean-build" ]] ; then
|
||||
if [ $RUN_FROM_BUILD == 1 ]; then
|
||||
TO_CLEAN=yes ./build.sh
|
||||
exit 1
|
||||
fi
|
||||
elif [[ "$1" = "-t" || "$1" = "--test" ]] ; then
|
||||
if [ $RUN_FROM_BUILD == 1 ]; then
|
||||
export ROCPROFILER_METRICS_PATH=$ROCM_DIR/build/counters/derived_counters.xml
|
||||
TO_CLEAN=no $ROCM_DIR/build.sh
|
||||
pushd build
|
||||
./run_tests.sh
|
||||
exit 1
|
||||
fi
|
||||
elif [[ "$1" = "-mt" || "$1" = "--mem-test" ]] ; then
|
||||
if [ $RUN_FROM_BUILD == 1 ]; then
|
||||
ASAN=yes TO_CLEAN=yes ./build.sh
|
||||
./tests/memorytests/run_asan_tests.sh $ROCM_DIR/build/tests/featuretests/profiler/gtests/apps/hip_vectoradd $ROCM_DIR/build/memleaks.log
|
||||
exit 1
|
||||
fi
|
||||
elif [[ "$1" = "-ct" || "$1" = "--clean-build-test" ]] ; then
|
||||
if [ $RUN_FROM_BUILD == 1 ]; then
|
||||
TO_CLEAN=yes $ROCM_DIR/build.sh
|
||||
pushd build
|
||||
./run_tests.sh
|
||||
exit 1
|
||||
fi
|
||||
elif [[ "$1" = "--install" ]] ; then
|
||||
if [ $RUN_FROM_BUILD == 1 ]; then
|
||||
TO_CLEAN=no $ROCM_DIR/build.sh
|
||||
pushd build
|
||||
make install
|
||||
exit 1
|
||||
fi
|
||||
elif [[ "$1" = "--clean-install" ]] ; then
|
||||
if [ $RUN_FROM_BUILD == 1 ]; then
|
||||
TO_CLEAN=yes $ROCM_DIR/build.sh
|
||||
pushd build
|
||||
make install
|
||||
exit 1
|
||||
fi
|
||||
elif [[ "$1" = "--list-counters" ]] ; then
|
||||
if [ $RUN_FROM_BUILD == 1 ]; then
|
||||
export ROCPROFILER_METRICS_PATH=$ROCM_DIR/build/counters/derived_counters.xml
|
||||
eval $ROCM_DIR/build/src/tools/ctrl
|
||||
else
|
||||
export ROCPROFILER_METRICS_PATH=$ROCPROFV2_DIR/../libexec/rocmtools/counters/derived_counters.xml
|
||||
export LD_LIBRARY_PATH=$ROCPROFV2_DIR/../lib:$LD_LIBRARY_PATH
|
||||
export LD_PRELOAD=$ROCPROFV2_DIR/../lib/librocprofiler_tool.so
|
||||
eval $ROCPROFV2_DIR/../libexec/rocmtools/ctrl
|
||||
fi
|
||||
exit 1
|
||||
elif [[ "$1" = "-i" || "$1" = "--input" ]] ; then
|
||||
if [ $2 ] && [ -n $2 ] && [ -r $2 ] ; then
|
||||
if [ $RUN_FROM_BUILD == 1 ]; then
|
||||
export ROCPROFILER_METRICS_PATH=$ROCM_DIR/build/counters/derived_counters.xml
|
||||
else
|
||||
export ROCPROFILER_METRICS_PATH=$ROCPROFV2_DIR/../libexec/rocmtools/counters/derived_counters.xml
|
||||
fi
|
||||
export COUNTERS_PATH=$2
|
||||
else
|
||||
echo -e "Error: \"$2\" doesn't exist!"
|
||||
usage
|
||||
exit 1
|
||||
fi
|
||||
shift
|
||||
shift
|
||||
elif [[ "$1" = "-o" || "$1" = "--output-file-name" ]] ; then
|
||||
if [ $2 ] ; then
|
||||
export OUT_FILE_NAME=$2
|
||||
else
|
||||
usage
|
||||
exit 1
|
||||
fi
|
||||
shift
|
||||
shift
|
||||
elif [[ "$1" = "-d" || "$1" = "--output-directory" ]] ; then
|
||||
if [ $2 ] ; then
|
||||
mkdir -p $2
|
||||
export OUTPUT_PATH=$2
|
||||
OUTPUT_PATH_INTERNAL=$2
|
||||
else
|
||||
usage
|
||||
exit 1
|
||||
fi
|
||||
shift
|
||||
shift
|
||||
elif [[ "$1" = "-fi" || "$1" = "--flush-interval" ]] ; then
|
||||
if [ $2 ] && [ $2 -gt 0 ] ; then
|
||||
export ROCPROFILER_FLUSH_INTERVAL=$2
|
||||
else
|
||||
echo -e "Wrong input \"$2\" for flush interval, it needs to be integer greater than zero!"
|
||||
usage
|
||||
exit 1
|
||||
fi
|
||||
shift
|
||||
shift
|
||||
elif [ "$1" = "--hip-api" ] ; then
|
||||
export ROCPROFILER_HIP_API_TRACE=1
|
||||
shift
|
||||
elif [ "$1" = "--hip-activity" ] ; then
|
||||
export ROCPROFILER_HIP_API_TRACE=1
|
||||
export ROCPROFILER_HIP_ACTIVITY_TRACE=1
|
||||
shift
|
||||
elif [ "$1" = "--hsa-api" ] ; then
|
||||
export ROCPROFILER_HSA_API_TRACE=1
|
||||
shift
|
||||
elif [ "$1" = "--hsa-activity" ] ; then
|
||||
export ROCPROFILER_HSA_API_TRACE=1
|
||||
export ROCPROFILER_HSA_ACTIVITY_TRACE=1
|
||||
shift
|
||||
elif [ "$1" = "--roctx-trace" ] ; then
|
||||
export ROCPROFILER_ROCTX_TRACE=1
|
||||
shift
|
||||
elif [ "$1" = "--kernel-trace" ] ; then
|
||||
export ROCPROFILER_KERNEL_TRACE=1
|
||||
shift
|
||||
elif [ "$1" = "--sys-trace" ] ; then
|
||||
export ROCPROFILER_HIP_API_TRACE=1
|
||||
export ROCPROFILER_HIP_ACTIVITY_TRACE=1
|
||||
export ROCPROFILER_HSA_API_TRACE=1
|
||||
export ROCPROFILER_HSA_ACTIVITY_TRACE=1
|
||||
export ROCPROFILER_ROCTX_TRACE=1
|
||||
export ROCPROFILER_KERNEL_TRACE=1
|
||||
shift
|
||||
elif [ "$1" = "--amd-sys" ] ; then
|
||||
export ROCPROFILER_ENABLE_AMDSYS=$2
|
||||
shift
|
||||
shift
|
||||
elif [ "$1" = "--plugin" ] ; then
|
||||
if [ -n $2 ] ; then
|
||||
PLUGIN=$2
|
||||
if [ $RUN_FROM_BUILD == 1 ]; then
|
||||
export ROCPROFILER_PLUGIN_LIB=lib${PLUGIN}_plugin.so
|
||||
else
|
||||
export ROCPROFILER_PLUGIN_LIB=rocmtools/lib${PLUGIN}_plugin.so
|
||||
fi
|
||||
else
|
||||
echo -e "Wrong input \"$2\" for plugin!"
|
||||
usage
|
||||
exit 1
|
||||
fi
|
||||
shift
|
||||
shift
|
||||
elif [[ "$1" = "-"* || "$1" = "--"* ]] ; then
|
||||
echo -e "Wrong option \"$1\", Please use the following options:\n"
|
||||
usage
|
||||
exit 1
|
||||
else
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
PMC_LINES=()
|
||||
if [ -n "$COUNTERS_PATH" ]; then
|
||||
input=$COUNTERS_PATH
|
||||
while IFS= read -r line || [[ -n "$line" ]]; do
|
||||
PMC_LINES+=( "$line" )
|
||||
done < $input
|
||||
fi
|
||||
|
||||
if [ -n "$PMC_LINES" ]; then
|
||||
COUNTER=1
|
||||
for i in ${!PMC_LINES[@]}; do
|
||||
export ROCPROFILER_COUNTERS="${PMC_LINES[$i]}"
|
||||
if [ -n "$OUTPUT_PATH" ]; then
|
||||
FINAL_PATH="$OUTPUT_PATH_INTERNAL/pmc_$COUNTER"
|
||||
echo -e "\nThe output path for the following counters: $FINAL_PATH"
|
||||
mkdir -p $FINAL_PATH
|
||||
echo $ROCPROFILER_COUNTERS > $FINAL_PATH/pmc.txt
|
||||
export OUTPUT_PATH=$FINAL_PATH
|
||||
let COUNTER=COUNTER+1
|
||||
fi
|
||||
if [ $RUN_FROM_BUILD == 1 ]; then
|
||||
LD_PRELOAD=$LD_PRELOAD:$ROCM_DIR/build/librocprofiler_tool.so $*
|
||||
else
|
||||
LD_PRELOAD=$LD_PRELOAD:$ROCM_DIR/lib/librocprofiler_tool.so $*
|
||||
fi
|
||||
done
|
||||
else
|
||||
if [ $RUN_FROM_BUILD == 1 ]; then
|
||||
LD_PRELOAD=$LD_PRELOAD:$ROCM_DIR/build/librocprofiler_tool.so $*
|
||||
else
|
||||
LD_PRELOAD=$LD_PRELOAD:$ROCM_DIR/lib/librocprofiler_tool.so $*
|
||||
fi
|
||||
fi
|
||||
|
||||
|
||||
@@ -1,92 +0,0 @@
|
||||
# rocmtools
|
||||
|
||||
|
||||
|
||||
## Getting started
|
||||
|
||||
To make it easy for you to get started with GitLab, here's a list of recommended next steps.
|
||||
|
||||
Already a pro? Just edit this README.md and make it your own. Want to make it easy? [Use the template at the bottom](#editing-this-readme)!
|
||||
|
||||
## Add your files
|
||||
|
||||
- [ ] [Create](https://docs.gitlab.com/ee/user/project/repository/web_editor.html#create-a-file) or [upload](https://docs.gitlab.com/ee/user/project/repository/web_editor.html#upload-a-file) files
|
||||
- [ ] [Add files using the command line](https://docs.gitlab.com/ee/gitlab-basics/add-file.html#add-a-file-using-the-command-line) or push an existing Git repository with the following command:
|
||||
|
||||
```
|
||||
cd existing_repo
|
||||
git remote add origin http://gitlab1.amd.com/vinodtipparaju/rocmtools.git
|
||||
git branch -M main
|
||||
git push -uf origin main
|
||||
```
|
||||
|
||||
## Integrate with your tools
|
||||
|
||||
- [ ] [Set up project integrations](http://gitlab1.amd.com/vinodtipparaju/rocmtools/-/settings/integrations)
|
||||
|
||||
## Collaborate with your team
|
||||
|
||||
- [ ] [Invite team members and collaborators](https://docs.gitlab.com/ee/user/project/members/)
|
||||
- [ ] [Create a new merge request](https://docs.gitlab.com/ee/user/project/merge_requests/creating_merge_requests.html)
|
||||
- [ ] [Automatically close issues from merge requests](https://docs.gitlab.com/ee/user/project/issues/managing_issues.html#closing-issues-automatically)
|
||||
- [ ] [Enable merge request approvals](https://docs.gitlab.com/ee/user/project/merge_requests/approvals/)
|
||||
- [ ] [Automatically merge when pipeline succeeds](https://docs.gitlab.com/ee/user/project/merge_requests/merge_when_pipeline_succeeds.html)
|
||||
|
||||
## Test and Deploy
|
||||
|
||||
Use the built-in continuous integration in GitLab.
|
||||
|
||||
- [ ] [Get started with GitLab CI/CD](https://docs.gitlab.com/ee/ci/quick_start/index.html)
|
||||
- [ ] [Analyze your code for known vulnerabilities with Static Application Security Testing(SAST)](https://docs.gitlab.com/ee/user/application_security/sast/)
|
||||
- [ ] [Deploy to Kubernetes, Amazon EC2, or Amazon ECS using Auto Deploy](https://docs.gitlab.com/ee/topics/autodevops/requirements.html)
|
||||
- [ ] [Use pull-based deployments for improved Kubernetes management](https://docs.gitlab.com/ee/user/clusters/agent/)
|
||||
- [ ] [Set up protected environments](https://docs.gitlab.com/ee/ci/environments/protected_environments.html)
|
||||
|
||||
***
|
||||
|
||||
# Editing this README
|
||||
|
||||
When you're ready to make this README your own, just edit this file and use the handy template below (or feel free to structure it however you want - this is just a starting point!). Thank you to [makeareadme.com](https://www.makeareadme.com/) for this template.
|
||||
|
||||
## Suggestions for a good README
|
||||
Every project is different, so consider which of these sections apply to yours. The sections used in the template are suggestions for most open source projects. Also keep in mind that while a README can be too long and detailed, too long is better than too short. If you think your README is too long, consider utilizing another form of documentation rather than cutting out information.
|
||||
|
||||
## Name
|
||||
Choose a self-explaining name for your project.
|
||||
|
||||
## Description
|
||||
Let people know what your project can do specifically. Provide context and add a link to any reference visitors might be unfamiliar with. A list of Features or a Background subsection can also be added here. If there are alternatives to your project, this is a good place to list differentiating factors.
|
||||
|
||||
## Badges
|
||||
On some READMEs, you may see small images that convey metadata, such as whether or not all the tests are passing for the project. You can use Shields to add some to your README. Many services also have instructions for adding a badge.
|
||||
|
||||
## Visuals
|
||||
Depending on what you are making, it can be a good idea to include screenshots or even a video (you'll frequently see GIFs rather than actual videos). Tools like ttygif can help, but check out Asciinema for a more sophisticated method.
|
||||
|
||||
## Installation
|
||||
Within a particular ecosystem, there may be a common way of installing things, such as using Yarn, NuGet, or Homebrew. However, consider the possibility that whoever is reading your README is a novice and would like more guidance. Listing specific steps helps remove ambiguity and gets people to using your project as quickly as possible. If it only runs in a specific context like a particular programming language version or operating system or has dependencies that have to be installed manually, also add a Requirements subsection.
|
||||
|
||||
## Usage
|
||||
Use examples liberally, and show the expected output if you can. It's helpful to have inline the smallest example of usage that you can demonstrate, while providing links to more sophisticated examples if they are too long to reasonably include in the README.
|
||||
|
||||
## Support
|
||||
Tell people where they can go to for help. It can be any combination of an issue tracker, a chat room, an email address, etc.
|
||||
|
||||
## Roadmap
|
||||
If you have ideas for releases in the future, it is a good idea to list them in the README.
|
||||
|
||||
## Contributing
|
||||
State if you are open to contributions and what your requirements are for accepting them.
|
||||
|
||||
For people who want to make changes to your project, it's helpful to have some documentation on how to get started. Perhaps there is a script that they should run or some environment variables that they need to set. Make these steps explicit. These instructions could also be useful to your future self.
|
||||
|
||||
You can also document commands to lint the code or run tests. These steps help to ensure high code quality and reduce the likelihood that the changes inadvertently break something. Having instructions for running tests is especially helpful if it requires external setup, such as starting a Selenium server for testing in a browser.
|
||||
|
||||
## Authors and acknowledgment
|
||||
Show your appreciation to those who have contributed to the project.
|
||||
|
||||
## License
|
||||
For open source projects, say how it is licensed.
|
||||
|
||||
## Project status
|
||||
If you have run out of energy or time for your project, put a note at the top of the README saying that development has slowed down or stopped completely. Someone may choose to fork your project or volunteer to step in as a maintainer or owner, allowing your project to keep going. You can also make an explicit request for maintainers.
|
||||
@@ -37,11 +37,11 @@ THE SOFTWARE.
|
||||
// a profiling queue supporting a submit callback interface.
|
||||
//
|
||||
//
|
||||
/** \mainpage ROCMTools API Specification
|
||||
/** \mainpage ROCProfiler API Specification
|
||||
*
|
||||
* \section introduction Introduction
|
||||
*
|
||||
* ROCMTools library, GPU Applications Profiling/Tracing APIs.
|
||||
* ROCProfiler library, GPU Applications Profiling/Tracing APIs.
|
||||
* The API provides functionality for profiling GPU applications in kernel and
|
||||
* application and user mode and also with no replay mode at all and it
|
||||
* provides the records pool support with an easy sequence of calls, so the
|
||||
@@ -201,7 +201,7 @@ ROCPROFILER_API uint32_t rocprofiler_version_minor();
|
||||
*/
|
||||
|
||||
/**
|
||||
* ROCMTools API status codes.
|
||||
* ROCProfiler API status codes.
|
||||
*/
|
||||
typedef enum {
|
||||
/**
|
||||
@@ -213,11 +213,11 @@ typedef enum {
|
||||
*/
|
||||
ROCPROFILER_STATUS_ERROR = -1,
|
||||
/**
|
||||
* ROCMTool is already initialized.
|
||||
* ROCProfiler is already initialized.
|
||||
*/
|
||||
ROCPROFILER_STATUS_ERROR_ALREADY_INITIALIZED = -2,
|
||||
/**
|
||||
* ROCMTool is not initialized.
|
||||
* ROCProfiler is not initialized.
|
||||
*/
|
||||
ROCPROFILER_STATUS_ERROR_NOT_INITIALIZED = -3,
|
||||
/**
|
||||
@@ -347,7 +347,7 @@ typedef enum {
|
||||
/**
|
||||
* Query the textual description of the given error for the current thread.
|
||||
*
|
||||
* Returns a NULL terminated string describing the error of the given ROCMTools
|
||||
* Returns a NULL terminated string describing the error of the given ROCProfiler
|
||||
* API call by the calling thread that did not return success.
|
||||
*
|
||||
* @retval Return the error string.
|
||||
@@ -356,7 +356,7 @@ ROCPROFILER_API const char* rocprofiler_error_str(rocprofiler_status_t status) R
|
||||
|
||||
/** @} */
|
||||
|
||||
/** \defgroup rocprofiler_general_group General ROCMTools Requirements
|
||||
/** \defgroup rocprofiler_general_group General ROCProfiler Requirements
|
||||
* @{
|
||||
*/
|
||||
|
||||
@@ -385,7 +385,7 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_finalize() ROCPROFILER_VERSION_
|
||||
/**
|
||||
* \addtogroup sessions_handling_group
|
||||
* @{
|
||||
* ROCMTools Session Modes.
|
||||
* ROCProfiler Session Modes.
|
||||
*/
|
||||
|
||||
/**
|
||||
@@ -483,7 +483,7 @@ typedef enum {
|
||||
} rocprofiler_record_kind_t;
|
||||
|
||||
/**
|
||||
* Generic ROCMtool record header.
|
||||
* Generic ROCProfiler record header.
|
||||
*/
|
||||
typedef struct {
|
||||
/**
|
||||
@@ -954,7 +954,7 @@ typedef struct {
|
||||
*/
|
||||
typedef struct {
|
||||
/**
|
||||
* ROCMtool General Record base header to identify the id and kind of every
|
||||
* ROCProfiler General Record base header to identify the id and kind of every
|
||||
* record
|
||||
*/
|
||||
rocprofiler_record_header_t header;
|
||||
@@ -1030,7 +1030,7 @@ typedef struct {
|
||||
*/
|
||||
typedef struct {
|
||||
/**
|
||||
* ROCMtool General Record base header to identify the id and kind of every
|
||||
* ROCProfiler General Record base header to identify the id and kind of every
|
||||
* record
|
||||
*/
|
||||
rocprofiler_record_header_t header;
|
||||
@@ -1059,27 +1059,27 @@ typedef struct {
|
||||
* This will represent all the information reported by the
|
||||
* ATT tracer such as the kernel and its thread trace data.
|
||||
* This record can be flushed to the user using
|
||||
* ::rocmtools_buffer_callback_t
|
||||
* ::rocprofiler_buffer_callback_t
|
||||
*/
|
||||
typedef struct {
|
||||
/**
|
||||
* ROCMtool General Record base header to identify the id and kind of every
|
||||
* ROCProfiler General Record base header to identify the id and kind of every
|
||||
* record
|
||||
*/
|
||||
rocprofiler_record_header_t header;
|
||||
/**
|
||||
* Kernel Identifier to be used by the user to get the kernel info using
|
||||
* ::rocmtools_query_kernel_info
|
||||
* ::rocprofiler_query_kernel_info
|
||||
*/
|
||||
rocprofiler_kernel_id_t kernel_id;
|
||||
/**
|
||||
* Agent Identifier to be used by the user to get the Agent Information using
|
||||
* ::rocmtools_query_agent_info
|
||||
* ::rocprofiler_query_agent_info
|
||||
*/
|
||||
rocprofiler_agent_id_t gpu_id;
|
||||
/**
|
||||
* Queue Identifier to be used by the user to get the Queue Information using
|
||||
* ::rocmtools_query_agent_info
|
||||
* ::rocprofiler_query_agent_info
|
||||
*/
|
||||
rocprofiler_queue_id_t queue_id;
|
||||
/**
|
||||
@@ -1481,7 +1481,7 @@ typedef struct {
|
||||
*/
|
||||
typedef struct {
|
||||
/**
|
||||
* ROCMtool General Record base header to identify the id and kind of every
|
||||
* ROCProfiler General Record base header to identify the id and kind of every
|
||||
* record
|
||||
*/
|
||||
rocprofiler_record_header_t header;
|
||||
@@ -1574,7 +1574,7 @@ typedef struct {
|
||||
*/
|
||||
typedef struct {
|
||||
/**
|
||||
* ROCMtool General Record base header to identify the id and kind of every
|
||||
* ROCProfiler General Record base header to identify the id and kind of every
|
||||
* record
|
||||
*/
|
||||
rocprofiler_record_header_t header;
|
||||
@@ -1682,7 +1682,7 @@ ROCPROFILER_API rocprofiler_status_t rocprofiler_next_record(const rocprofiler_r
|
||||
|
||||
/** @} */
|
||||
|
||||
/** \defgroup sessions_handling_group ROCMTools Sessions
|
||||
/** \defgroup sessions_handling_group ROCProfiler Sessions
|
||||
* @{
|
||||
*/
|
||||
|
||||
@@ -1712,7 +1712,7 @@ typedef enum {
|
||||
|
||||
/**
|
||||
* Create Session
|
||||
* A ROCMtools Session is having enough information about what needs to be
|
||||
* A ROCProfiler Session is having enough information about what needs to be
|
||||
* collected or traced and it allows the user to start/stop profiling/tracing
|
||||
* whenever required.
|
||||
* Session will hold multiple mode, that can be added using
|
||||
|
||||
@@ -18,17 +18,17 @@
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
/** \section rocprofiler_plugin_api ROCMTools Plugin API
|
||||
/** \section rocprofiler_plugin_api ROCProfiler Plugin API
|
||||
*
|
||||
* The ROCMTools Plugin API is used by the ROCMTools Tool to output all
|
||||
* profiling information. Different implementations of the ROCMTools Plugin
|
||||
* The ROCProfiler Plugin API is used by the ROCProfiler Tool to output all
|
||||
* profiling information. Different implementations of the ROCProfiler Plugin
|
||||
* API can be developed that output the data in different formats. The
|
||||
* ROCMTools Tool can be configured to load a specific library that supports
|
||||
* ROCProfiler Tool can be configured to load a specific library that supports
|
||||
* the user desired format.
|
||||
*
|
||||
* The API is not thread safe. It is the responsibility of the ROCMTools Tool
|
||||
* The API is not thread safe. It is the responsibility of the ROCProfiler Tool
|
||||
* to ensure the operations are synchronized and not called concurrently. There
|
||||
* is no requirement for the ROCMTools Tool to report trace data in any
|
||||
* is no requirement for the ROCProfiler Tool to report trace data in any
|
||||
* specific order. If the format supported by plugin requires specific
|
||||
* ordering, it is the responsibility of the plugin implementation to perform
|
||||
* any necessary sorting.
|
||||
@@ -36,7 +36,7 @@
|
||||
|
||||
/**
|
||||
* \file
|
||||
* ROCMTools Tool Plugin API interface.
|
||||
* ROCProfiler Tool Plugin API interface.
|
||||
*/
|
||||
|
||||
#ifndef ROCPROFILER_PLUGIN_H_
|
||||
@@ -50,14 +50,14 @@
|
||||
extern "C" {
|
||||
#endif /* __cplusplus */
|
||||
|
||||
/** \defgroup rocprofiler_plugins ROCMTools Plugin API Specification
|
||||
/** \defgroup rocprofiler_plugins ROCProfiler Plugin API Specification
|
||||
* @{
|
||||
*/
|
||||
|
||||
/** \defgroup initialization_group Initialization and Finalization
|
||||
* \ingroup rocprofiler_plugins
|
||||
*
|
||||
* The ROCMTools Plugin API must be initialized before using any of the
|
||||
* The ROCProfiler Plugin API must be initialized before using any of the
|
||||
* operations to report trace data, and finalized after the last trace data has
|
||||
* been reported.
|
||||
*
|
||||
@@ -68,14 +68,14 @@ extern "C" {
|
||||
* Initialize plugin.
|
||||
* Must be called before any other operation.
|
||||
*
|
||||
* @param[in] rocprofiler_major_version The major version of the ROCMTools API
|
||||
* being used by the ROCMTools Tool. An error is reported if this does not
|
||||
* match the major version of the ROCMTools API used to build the plugin
|
||||
* @param[in] rocprofiler_major_version The major version of the ROCProfiler API
|
||||
* being used by the ROCProfiler Tool. An error is reported if this does not
|
||||
* match the major version of the ROCProfiler API used to build the plugin
|
||||
* library. This ensures compatibility of the trace data format.
|
||||
* @param[in] rocprofiler_minor_version The minor version of the ROCMTools API
|
||||
* being used by the ROCMTools Tool. An error is reported if the
|
||||
* \p ROCMTools_major_version matches and this is greater than the minor
|
||||
* version of the ROCMTools API used to build the plugin library. This ensures
|
||||
* @param[in] rocprofiler_minor_version The minor version of the ROCProfiler API
|
||||
* being used by the ROCProfiler Tool. An error is reported if the
|
||||
* \p rocprofiler_major_version matches and this is greater than the minor
|
||||
* version of the ROCProfiler API used to build the plugin library. This ensures
|
||||
* compatibility of the trace data format.
|
||||
* @return Returns 0 on success and -1 on error.
|
||||
*/
|
||||
|
||||
@@ -1,56 +1,66 @@
|
||||
# ###############################################################################
|
||||
# # Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
# #
|
||||
# # Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# # of this software and associated documentation files (the "Software"), to
|
||||
# # deal in the Software without restriction, including without limitation the
|
||||
# # rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
# # sell copies of the Software, and to permit persons to whom the Software is
|
||||
# # furnished to do so, subject to the following conditions:
|
||||
# #
|
||||
# # The above copyright notice and this permission notice shall be included in
|
||||
# # all copies or substantial portions of the Software.
|
||||
# #
|
||||
# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# # IN THE SOFTWARE.
|
||||
# ###############################################################################
|
||||
# ##############################################################################
|
||||
# # Copyright (c) 2022 Advanced Micro Devices, Inc. # # Permission is hereby
|
||||
# granted, free of charge, to any person obtaining a copy # of this software and
|
||||
# associated documentation files (the "Software"), to # deal in the Software
|
||||
# without restriction, including without limitation the # rights to use, copy,
|
||||
# modify, merge, publish, distribute, sublicense, and/or # sell copies of the
|
||||
# Software, and to permit persons to whom the Software is # furnished to do so,
|
||||
# subject to the following conditions: # # The above copyright notice and this
|
||||
# permission notice shall be included in # all copies or substantial portions of
|
||||
# the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
|
||||
# KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
# MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
|
||||
# EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
||||
# DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
# OTHERWISE, ARISING # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
# USE OR OTHER DEALINGS # IN THE SOFTWARE.
|
||||
# ##############################################################################
|
||||
|
||||
find_library(
|
||||
ROCPROFV2_ATT rocprofv2_att
|
||||
HINTS ${CMAKE_INSTALL_PREFIX}
|
||||
PATHS ${ROCM_PATH}
|
||||
PATH_SUFFIXES hsa-amd-aqlprofile)
|
||||
|
||||
set(ENV{ROCPROFV2_ATT_LIB_PATH} $ROCPROFV2_ATT)
|
||||
|
||||
# Building att plugin library
|
||||
file(GLOB ROCMTOOLS_UTIL_SRC_FILES ${PROJECT_SOURCE_DIR}/src/utils/helper.cpp)
|
||||
file(GLOB ROCPROFILER_UTIL_SRC_FILES ${PROJECT_SOURCE_DIR}/src/utils/helper.cpp)
|
||||
file(GLOB FILE_SOURCES att.cpp)
|
||||
add_library(att_plugin SHARED ${FILE_SOURCES} ${ROCMTOOLS_UTIL_SRC_FILES})
|
||||
add_library(att_plugin SHARED ${FILE_SOURCES} ${ROCPROFILER_UTIL_SRC_FILES})
|
||||
|
||||
set_target_properties(att_plugin PROPERTIES
|
||||
CXX_VISIBILITY_PRESET hidden
|
||||
LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/../exportmap
|
||||
LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR})
|
||||
set_target_properties(
|
||||
att_plugin
|
||||
PROPERTIES CXX_VISIBILITY_PRESET hidden
|
||||
LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/../exportmap
|
||||
LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR})
|
||||
|
||||
target_compile_definitions(att_plugin
|
||||
PRIVATE HIP_PROF_HIP_API_STRING=1 __HIP_PLATFORM_HCC__=1)
|
||||
target_compile_definitions(att_plugin PRIVATE HIP_PROF_HIP_API_STRING=1
|
||||
__HIP_PLATFORM_HCC__=1)
|
||||
|
||||
target_include_directories(att_plugin PRIVATE ${PROJECT_SOURCE_DIR}/inc ${PROJECT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
target_link_options(att_plugin PRIVATE -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../exportmap -Wl,--no-undefined)
|
||||
target_link_libraries(att_plugin PRIVATE ${ROCPROFILER_TARGET} systemd hsa-runtime64::hsa-runtime64 stdc++fs)
|
||||
target_include_directories(
|
||||
att_plugin PRIVATE ${PROJECT_SOURCE_DIR}/inc ${PROJECT_SOURCE_DIR}
|
||||
${CMAKE_CURRENT_SOURCE_DIR})
|
||||
target_link_options(
|
||||
att_plugin PRIVATE
|
||||
-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../exportmap
|
||||
-Wl,--no-undefined)
|
||||
target_link_libraries(att_plugin PRIVATE ${ROCPROFILER_TARGET} systemd
|
||||
hsa-runtime64::hsa-runtime64 stdc++fs)
|
||||
|
||||
install(TARGETS att_plugin LIBRARY
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/${PROJECT_NAME}
|
||||
COMPONENT runtime)
|
||||
install(TARGETS att_plugin
|
||||
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/${PROJECT_NAME}
|
||||
COMPONENT runtime)
|
||||
|
||||
configure_file(att.py att/att.py COPYONLY)
|
||||
configure_file(trace_view.py att/trace_view.py COPYONLY)
|
||||
#configure_file(t.db att/t.db COPYONLY)
|
||||
# configure_file(t.db att/t.db COPYONLY)
|
||||
configure_file(ui/index.html att/ui/index.html COPYONLY)
|
||||
configure_file(ui/logo.svg att/ui/logo.svg COPYONLY)
|
||||
configure_file(ui/styles.css att/ui/styles.css COPYONLY)
|
||||
#configure_file(ui/trace.json att/ui/trace.json COPYONLY)
|
||||
install(DIRECTORY
|
||||
${CMAKE_CURRENT_BINARY_DIR}/att
|
||||
# configure_file(ui/trace.json att/ui/trace.json COPYONLY)
|
||||
install(
|
||||
DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/att
|
||||
DESTINATION ${CMAKE_INSTALL_LIBEXECDIR}/rocprofiler
|
||||
USE_SOURCE_PERMISSIONS
|
||||
COMPONENT runtime)
|
||||
|
||||
|
||||
@@ -71,10 +71,10 @@ class att_plugin_t {
|
||||
}
|
||||
|
||||
size_t name_length;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_kernel_info_size(ROCPROFILER_KERNEL_NAME,
|
||||
CHECK_ROCPROFILER(rocprofiler_query_kernel_info_size(ROCPROFILER_KERNEL_NAME,
|
||||
att_tracer_record->kernel_id, &name_length));
|
||||
const char* kernel_name_c = static_cast<const char*>(malloc(name_length * sizeof(char)));
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_kernel_info(ROCPROFILER_KERNEL_NAME,
|
||||
CHECK_ROCPROFILER(rocprofiler_query_kernel_info(ROCPROFILER_KERNEL_NAME,
|
||||
att_tracer_record->kernel_id, &kernel_name_c));
|
||||
|
||||
std::string name_demangled = rocmtools::truncate_name(rocmtools::cxx_demangle(kernel_name_c));
|
||||
|
||||
@@ -102,11 +102,12 @@ class ReturnInfo(ctypes.Structure):
|
||||
('num_events', ctypes.c_uint64),
|
||||
('perfevents', POINTER(PerfEvent))]
|
||||
|
||||
rocprofv2_att_lib = os.getenv('ROCPROFV2_ATT_LIB_PATH')
|
||||
try: # For build dir
|
||||
path_to_parser = os.path.abspath('/usr/lib/hsa-amd-aqlprofile/librocprofv2_att.so')
|
||||
path_to_parser = os.path.abspath(rocprofv2_att_lib)
|
||||
SO = CDLL(path_to_parser)
|
||||
except: # For installed dir
|
||||
path_to_parser = os.path.abspath('/usr/local/lib/hsa-amd-aqlprofile/librocprofv2_att.so')
|
||||
path_to_parser = os.path.abspath('/opt/rocm/lib/hsa-amd-aqlprofile/librocprofv2_att.so')
|
||||
SO = CDLL(path_to_parser)
|
||||
|
||||
SO.AnalyseBinary.restype = ReturnInfo
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
= CTF plugin for ROCMTools
|
||||
= CTF plugin for ROCProfiler
|
||||
13 December 2022
|
||||
Philippe Proulx
|
||||
|
||||
This plugin writes the received ROCMTools tracer and profiler records to
|
||||
This plugin writes the received ROCProfiler tracer and profiler records to
|
||||
a https://diamon.org/ctf/[CTF] trace.
|
||||
|
||||
== Build requirements
|
||||
@@ -77,7 +77,7 @@ All CTF event records have the following common fields:
|
||||
Correlation ID.
|
||||
--
|
||||
+
|
||||
For each ROCMTools HSA API tracer record for the HSA function named
|
||||
For each ROCProfiler HSA API tracer record for the HSA function named
|
||||
`__name__`, this plugin writes two event records:
|
||||
+
|
||||
`__name___begin`:::
|
||||
@@ -112,7 +112,7 @@ All CTF event records have the following common fields:
|
||||
Kernel name (empty string if not available).
|
||||
--
|
||||
+
|
||||
For each ROCMTools HIP API tracer record for the HIP function named
|
||||
For each ROCProfiler HIP API tracer record for the HIP function named
|
||||
`__name__`, this plugin writes two event records:
|
||||
+
|
||||
`__name__Begin`:::
|
||||
|
||||
@@ -776,7 +776,7 @@ namespace {
|
||||
|
||||
constexpr std::uint64_t ns_per_s = 1'000'000'000ULL;
|
||||
|
||||
// Samples the ROCMTools clock and returns the value.
|
||||
// Samples the ROCProfiler clock and returns the value.
|
||||
std::uint64_t GetClkVal() {
|
||||
rocprofiler_timestamp_t ts;
|
||||
[[maybe_unused]] const auto ret = rocprofiler_get_timestamp(&ts);
|
||||
@@ -786,16 +786,16 @@ std::uint64_t GetClkVal() {
|
||||
}
|
||||
|
||||
// Updates `offset` and `delta`, if needed, to a more accurate clock
|
||||
// class offset and a smaller ROCMTools clock value delta.
|
||||
// class offset and a smaller ROCProfiler clock value delta.
|
||||
//
|
||||
// This function samples the ROCMTools clock twice, also sampling the
|
||||
// real-time clock in between, and uses the average ROCMTools clock
|
||||
// This function samples the ROCProfiler clock twice, also sampling the
|
||||
// real-time clock in between, and uses the average ROCProfiler clock
|
||||
// value to approximate the actual clock class offset.
|
||||
//
|
||||
// This strategy is based on the measure_single_clock_offset() function
|
||||
// of the LTTng-tools project <https://lttng.org/>.
|
||||
void UpdateClkClsOffsetAndDelta(std::uint64_t& offset, std::uint64_t& delta) {
|
||||
// Sample ROCMTools clock (first time).
|
||||
// Sample ROCProfiler clock (first time).
|
||||
const auto rocm_clk_val1 = GetClkVal();
|
||||
|
||||
// Sample real-time clock.
|
||||
@@ -804,10 +804,10 @@ void UpdateClkClsOffsetAndDelta(std::uint64_t& offset, std::uint64_t& delta) {
|
||||
|
||||
assert(ret == 0);
|
||||
|
||||
// Sample ROCMTools clock (second time).
|
||||
// Sample ROCProfiler clock (second time).
|
||||
const auto rocm_clk_val2 = GetClkVal();
|
||||
|
||||
// Compute the current ROCMTools clock value delta.
|
||||
// Compute the current ROCProfiler clock value delta.
|
||||
const auto this_delta = rocm_clk_val2 - rocm_clk_val1;
|
||||
|
||||
if (this_delta > delta) {
|
||||
@@ -815,7 +815,7 @@ void UpdateClkClsOffsetAndDelta(std::uint64_t& offset, std::uint64_t& delta) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Compute the average ROCMTools clock value.
|
||||
// Compute the average ROCProfiler clock value.
|
||||
const auto rocm_clk_val_avg = (rocm_clk_val1 + rocm_clk_val2) >> 1;
|
||||
|
||||
// Compute the real-time clock value in nanoseconds.
|
||||
|
||||
@@ -162,7 +162,7 @@ class file_plugin_t {
|
||||
output_file_t begin_ts("begin_ts_file.txt");
|
||||
|
||||
[[maybe_unused]] rocprofiler_timestamp_t app_begin_timestamp = {};
|
||||
CHECK_ROCMTOOLS(rocprofiler_get_timestamp(&app_begin_timestamp));
|
||||
CHECK_ROCPROFILER(rocprofiler_get_timestamp(&app_begin_timestamp));
|
||||
|
||||
begin_ts << std::dec << app_begin_timestamp.value << std::endl;
|
||||
if (begin_ts.fail()) {
|
||||
@@ -215,12 +215,12 @@ class file_plugin_t {
|
||||
}
|
||||
if (tracer_record.domain == ACTIVITY_DOMAIN_HSA_API) {
|
||||
size_t function_name_size = 0;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hsa_tracer_api_data_info_size(
|
||||
CHECK_ROCPROFILER(rocprofiler_query_hsa_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_HSA_FUNCTION_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &function_name_size));
|
||||
if (function_name_size > 1) {
|
||||
char* function_name_c = (char*)malloc(function_name_size);
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hsa_tracer_api_data_info(
|
||||
CHECK_ROCPROFILER(rocprofiler_query_hsa_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_HSA_FUNCTION_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &function_name_c));
|
||||
if (function_name_c) function_name = std::string(function_name_c);
|
||||
@@ -228,23 +228,23 @@ class file_plugin_t {
|
||||
}
|
||||
if (tracer_record.domain == ACTIVITY_DOMAIN_HIP_API) {
|
||||
size_t function_name_size = 0;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info_size(
|
||||
CHECK_ROCPROFILER(rocprofiler_query_hip_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_HIP_FUNCTION_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &function_name_size));
|
||||
if (function_name_size > 1) {
|
||||
char* function_name_c = (char*)malloc(function_name_size);
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info(
|
||||
CHECK_ROCPROFILER(rocprofiler_query_hip_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_HIP_FUNCTION_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &function_name_c));
|
||||
if (function_name_c) function_name = std::string(function_name_c);
|
||||
}
|
||||
size_t kernel_name_size = 0;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info_size(
|
||||
CHECK_ROCPROFILER(rocprofiler_query_hip_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_HIP_KERNEL_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &kernel_name_size));
|
||||
if (kernel_name_size > 1) {
|
||||
char* kernel_name_str = (char*)malloc(kernel_name_size * sizeof(char));
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info(
|
||||
CHECK_ROCPROFILER(rocprofiler_query_hip_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_HIP_KERNEL_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &kernel_name_str));
|
||||
if (kernel_name_str) kernel_name = rocmtools::cxx_demangle(std::string(kernel_name_str));
|
||||
@@ -252,26 +252,26 @@ class file_plugin_t {
|
||||
}
|
||||
if (tracer_record.domain == ACTIVITY_DOMAIN_ROCTX) {
|
||||
size_t roctx_message_size = 0;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_roctx_tracer_api_data_info_size(
|
||||
CHECK_ROCPROFILER(rocprofiler_query_roctx_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_ROCTX_MESSAGE, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &roctx_message_size));
|
||||
if (roctx_message_size > 1) {
|
||||
[[maybe_unused]] char* roctx_message_str =
|
||||
static_cast<char*>(malloc(roctx_message_size * sizeof(char)));
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_roctx_tracer_api_data_info(
|
||||
CHECK_ROCPROFILER(rocprofiler_query_roctx_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_ROCTX_MESSAGE, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &roctx_message_str));
|
||||
if (roctx_message_str)
|
||||
roctx_message = rocmtools::cxx_demangle(std::string(strdup(roctx_message_str)));
|
||||
}
|
||||
size_t roctx_id_size = 0;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_roctx_tracer_api_data_info_size(
|
||||
CHECK_ROCPROFILER(rocprofiler_query_roctx_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_ROCTX_ID, tracer_record.api_data_handle, tracer_record.operation_id,
|
||||
&roctx_id_size));
|
||||
if (roctx_id_size > 1) {
|
||||
[[maybe_unused]] char* roctx_id_str =
|
||||
static_cast<char*>(malloc(roctx_id_size * sizeof(char)));
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_roctx_tracer_api_data_info(
|
||||
CHECK_ROCPROFILER(rocprofiler_query_roctx_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_ROCTX_ID, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &roctx_id_str));
|
||||
if (roctx_id_str) {
|
||||
@@ -299,14 +299,14 @@ class file_plugin_t {
|
||||
size_t name_length = 0;
|
||||
output_file_t* output_file{nullptr};
|
||||
output_file = get_output_file(output_type_t::COUNTER);
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_kernel_info_size(ROCPROFILER_KERNEL_NAME,
|
||||
CHECK_ROCPROFILER(rocprofiler_query_kernel_info_size(ROCPROFILER_KERNEL_NAME,
|
||||
profiler_record->kernel_id, &name_length));
|
||||
// Taken from rocprofiler: The size hasn't changed in recent past
|
||||
static const uint32_t lds_block_size = 128 * 4;
|
||||
const char* kernel_name_c;
|
||||
if (name_length > 1) {
|
||||
kernel_name_c = static_cast<const char*>(malloc(name_length * sizeof(char)));
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_kernel_info(ROCPROFILER_KERNEL_NAME, profiler_record->kernel_id,
|
||||
CHECK_ROCPROFILER(rocprofiler_query_kernel_info(ROCPROFILER_KERNEL_NAME, profiler_record->kernel_id,
|
||||
&kernel_name_c));
|
||||
}
|
||||
*output_file << std::string("dispatch[") << std::to_string(profiler_record->header.id.handle)
|
||||
@@ -355,12 +355,12 @@ class file_plugin_t {
|
||||
for (uint64_t i = 0; i < profiler_record->counters_count.value; i++) {
|
||||
if (profiler_record->counters[i].counter_handler.handle > 0) {
|
||||
size_t counter_name_length = 0;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_counter_info_size(
|
||||
CHECK_ROCPROFILER(rocprofiler_query_counter_info_size(
|
||||
session_id, ROCPROFILER_COUNTER_NAME, profiler_record->counters[i].counter_handler,
|
||||
&counter_name_length));
|
||||
if (counter_name_length > 1) {
|
||||
const char* name_c = static_cast<const char*>(malloc(name_length * sizeof(char)));
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_counter_info(
|
||||
CHECK_ROCPROFILER(rocprofiler_query_counter_info(
|
||||
session_id, ROCPROFILER_COUNTER_NAME, profiler_record->counters[i].counter_handler,
|
||||
&name_c));
|
||||
*output_file << ", " << name_c << " ("
|
||||
|
||||
@@ -74,14 +74,14 @@ static std::string output_file_name;
|
||||
std::string get_kernel_name(rocprofiler_record_profiler_t& profiler_record) {
|
||||
std::string kernel_name = "";
|
||||
size_t name_length = 1;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_kernel_info_size(ROCPROFILER_KERNEL_NAME, profiler_record.kernel_id,
|
||||
CHECK_ROCPROFILER(rocprofiler_query_kernel_info_size(ROCPROFILER_KERNEL_NAME, profiler_record.kernel_id,
|
||||
&name_length));
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
|
||||
#pragma GCC diagnostic ignored "-Wstringop-overread"
|
||||
if (name_length > 1) {
|
||||
const char* kernel_name_c = static_cast<const char*>(malloc(name_length * sizeof(char)));
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_kernel_info(ROCPROFILER_KERNEL_NAME, profiler_record.kernel_id,
|
||||
CHECK_ROCPROFILER(rocprofiler_query_kernel_info(ROCPROFILER_KERNEL_NAME, profiler_record.kernel_id,
|
||||
&kernel_name_c));
|
||||
if (kernel_name_c && strlen(kernel_name_c) > 1)
|
||||
kernel_name = rocmtools::cxx_demangle(strdup(kernel_name_c));
|
||||
@@ -327,12 +327,12 @@ class perfetto_plugin_t {
|
||||
for (uint64_t i = 0; i < profiler_record.counters_count.value; i++) {
|
||||
if (profiler_record.counters[i].counter_handler.handle > 0) {
|
||||
size_t name_length = 0;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_counter_info_size(
|
||||
CHECK_ROCPROFILER(rocprofiler_query_counter_info_size(
|
||||
session_id, ROCPROFILER_COUNTER_NAME, profiler_record.counters[i].counter_handler,
|
||||
&name_length));
|
||||
if (name_length > 1) {
|
||||
const char* name_c = static_cast<const char*>(malloc(name_length * sizeof(char)));
|
||||
CHECK_ROCMTOOLS(
|
||||
CHECK_ROCPROFILER(
|
||||
rocprofiler_query_counter_info(session_id, ROCPROFILER_COUNTER_NAME,
|
||||
profiler_record.counters[i].counter_handler, &name_c));
|
||||
|
||||
@@ -435,24 +435,24 @@ class perfetto_plugin_t {
|
||||
auto& roctx_track = roctx_track_it->second;
|
||||
|
||||
size_t roctx_message_size = 0;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_roctx_tracer_api_data_info_size(
|
||||
CHECK_ROCPROFILER(rocprofiler_query_roctx_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_ROCTX_MESSAGE, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &roctx_message_size));
|
||||
if (roctx_message_size > 1) {
|
||||
char* roctx_message_str = static_cast<char*>(malloc(roctx_message_size * sizeof(char)));
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_roctx_tracer_api_data_info(
|
||||
CHECK_ROCPROFILER(rocprofiler_query_roctx_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_ROCTX_MESSAGE, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &roctx_message_str));
|
||||
if (roctx_message_str)
|
||||
roctx_message = rocmtools::cxx_demangle(std::string(strdup(roctx_message_str)));
|
||||
}
|
||||
size_t roctx_id_size = 0;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_roctx_tracer_api_data_info_size(
|
||||
CHECK_ROCPROFILER(rocprofiler_query_roctx_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_ROCTX_ID, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &roctx_id_size));
|
||||
if (roctx_id_size > 1) {
|
||||
char* roctx_id_str = static_cast<char*>(malloc(roctx_id_size * sizeof(char)));
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_roctx_tracer_api_data_info(
|
||||
CHECK_ROCPROFILER(rocprofiler_query_roctx_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_ROCTX_ID, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &roctx_id_str));
|
||||
if (roctx_id_str) {
|
||||
@@ -499,12 +499,12 @@ class perfetto_plugin_t {
|
||||
}
|
||||
auto& hsa_track = hsa_track_it->second;
|
||||
size_t function_name_size = 0;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hsa_tracer_api_data_info_size(
|
||||
CHECK_ROCPROFILER(rocprofiler_query_hsa_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_HSA_FUNCTION_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &function_name_size));
|
||||
if (function_name_size > 1) {
|
||||
function_name = static_cast<char*>(malloc(function_name_size * sizeof(char)));
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hsa_tracer_api_data_info(
|
||||
CHECK_ROCPROFILER(rocprofiler_query_hsa_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_HSA_FUNCTION_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &function_name));
|
||||
}
|
||||
@@ -540,23 +540,23 @@ class perfetto_plugin_t {
|
||||
}
|
||||
auto& hip_track = hip_track_it->second;
|
||||
size_t function_name_size = 0;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info_size(
|
||||
CHECK_ROCPROFILER(rocprofiler_query_hip_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_HIP_FUNCTION_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &function_name_size));
|
||||
if (function_name_size > 1) {
|
||||
function_name = static_cast<char*>(malloc(function_name_size * sizeof(char)));
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info(
|
||||
CHECK_ROCPROFILER(rocprofiler_query_hip_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_HIP_FUNCTION_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &function_name));
|
||||
}
|
||||
size_t kernel_name_size = 0;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info_size(
|
||||
CHECK_ROCPROFILER(rocprofiler_query_hip_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_HIP_KERNEL_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &kernel_name_size));
|
||||
char* kernel_name_str;
|
||||
if (kernel_name_size > 1) {
|
||||
kernel_name_str = static_cast<char*>(malloc(kernel_name_size * sizeof(char)));
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info(
|
||||
CHECK_ROCPROFILER(rocprofiler_query_hip_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_HIP_KERNEL_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &kernel_name_str));
|
||||
if (kernel_name_str) {
|
||||
@@ -584,13 +584,13 @@ class perfetto_plugin_t {
|
||||
uint64_t stream_id = 0;
|
||||
size_t stream_id_str_size = 0;
|
||||
char* stream_id_str;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info_size(
|
||||
CHECK_ROCPROFILER(rocprofiler_query_hip_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_HIP_STREAM_ID, rocprofiler_tracer_api_data_handle_t{nullptr, 0},
|
||||
rocprofiler_tracer_operation_id_t{(uint32_t)tracer_record.correlation_id.value},
|
||||
&stream_id_str_size));
|
||||
if (stream_id_str_size > 1) {
|
||||
stream_id_str = static_cast<char*>(malloc(stream_id_str_size * sizeof(char)));
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info(
|
||||
CHECK_ROCPROFILER(rocprofiler_query_hip_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_HIP_STREAM_ID, rocprofiler_tracer_api_data_handle_t{nullptr, 0},
|
||||
rocprofiler_tracer_operation_id_t{(uint32_t)tracer_record.correlation_id.value},
|
||||
&stream_id_str));
|
||||
@@ -627,12 +627,12 @@ class perfetto_plugin_t {
|
||||
perfetto::Flow::ProcessScoped(tracer_record.correlation_id.value));
|
||||
} else {
|
||||
size_t activity_name_size = 0;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info_size(
|
||||
CHECK_ROCPROFILER(rocprofiler_query_hip_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_HIP_ACTIVITY_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &activity_name_size));
|
||||
if (activity_name_size > 1) {
|
||||
activity_name = static_cast<char*>(malloc(activity_name_size * sizeof(char)));
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hip_tracer_api_data_info(
|
||||
CHECK_ROCPROFILER(rocprofiler_query_hip_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_HIP_ACTIVITY_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &activity_name));
|
||||
} else {
|
||||
@@ -673,12 +673,12 @@ class perfetto_plugin_t {
|
||||
}
|
||||
auto& queue_track = queue_track_it->second;
|
||||
size_t activity_name_size = 0;
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hsa_tracer_api_data_info_size(
|
||||
CHECK_ROCPROFILER(rocprofiler_query_hsa_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_HSA_ACTIVITY_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &activity_name_size));
|
||||
if (activity_name_size > 1) {
|
||||
activity_name = static_cast<char*>(malloc(activity_name_size * sizeof(char)));
|
||||
CHECK_ROCMTOOLS(rocprofiler_query_hsa_tracer_api_data_info(
|
||||
CHECK_ROCPROFILER(rocprofiler_query_hsa_tracer_api_data_info(
|
||||
session_id, ROCPROFILER_HSA_ACTIVITY_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &activity_name));
|
||||
}
|
||||
|
||||
@@ -36,10 +36,10 @@
|
||||
|
||||
#include "src/utils/helper.h"
|
||||
|
||||
// Macro to check ROCMTools calls status
|
||||
#define CHECK_ROCMTOOLS(call) \
|
||||
// Macro to check ROCProfiler calls status
|
||||
#define CHECK_ROCPROFILER(call) \
|
||||
do { \
|
||||
if ((call) != ROCPROFILER_STATUS_SUCCESS) rocmtools::fatal("Error: ROCMTools API Call Error!"); \
|
||||
if ((call) != ROCPROFILER_STATUS_SUCCESS) rocmtools::fatal("Error: ROCProfiler API Call Error!"); \
|
||||
} while (false)
|
||||
|
||||
namespace {
|
||||
|
||||
@@ -48,10 +48,10 @@ This code sample demonstrates how to use the APIs to collect counters and metric
|
||||
|
||||
|
||||
## How to compile
|
||||
In order to get the samples to compile, make sure to copy rocmtools binaries into /opt/rocm/lib
|
||||
Running 'make install' inside the rocmtools/build folder will copy the binaries to /opt/rocm/lib
|
||||
In order to get the samples to compile, make sure to copy rocprofiler binaries into /opt/rocm/lib
|
||||
Running 'make install' inside the rocprofiler/build folder will copy the binaries to /opt/rocm/lib
|
||||
|
||||
Alternately, change the 'ROCMTOOLS_LIBS_PATH' variable in the Makefile to point to the rocmtools/build folder.
|
||||
Alternately, change the 'ROCPROFILER_LIBS_PATH' variable in the Makefile to point to the rocprofiler/build folder.
|
||||
After modifications to Makefile are done, run:
|
||||
|
||||
```bash
|
||||
|
||||
@@ -44,7 +44,7 @@
|
||||
// Macro to check ROCPROFILER calls status
|
||||
#define CHECK_ROCPROFILER(call) \
|
||||
do { \
|
||||
if ((call) != ROCPROFILER_STATUS_SUCCESS) rocmtools::fatal("Error: ROCMTools API Call Error!"); \
|
||||
if ((call) != ROCPROFILER_STATUS_SUCCESS) rocmtools::fatal("Error: ROCProfiler API Call Error!"); \
|
||||
} while (false)
|
||||
|
||||
// Device (Kernel) functions, it must be void
|
||||
|
||||
@@ -84,7 +84,7 @@ std::string string_printf(const char* format, ...) {
|
||||
message += info.sstream.str();
|
||||
#endif /* defined (ENABLE_BACKTRACE) */
|
||||
|
||||
std::string errmsg("ROCMTools: fatal error: " + message);
|
||||
std::string errmsg("ROCProfiler: fatal error: " + message);
|
||||
fputs(errmsg.c_str(), stderr);
|
||||
|
||||
throw(errmsg);
|
||||
|
||||
@@ -95,7 +95,7 @@ by one or more payloads:
|
||||
*/
|
||||
typedef struct {
|
||||
/**
|
||||
* ROCMtool General Record base header to identify the id and kind of every
|
||||
* ROCProfiler General Record base header to identify the id and kind of every
|
||||
* record
|
||||
*/
|
||||
rocprofiler_record_header_t header;
|
||||
|
||||
@@ -228,7 +228,7 @@ run_kernel(program_options const &opts)
|
||||
rocprofiler_create_session(ROCPROFILER_NONE_REPLAY_MODE, &sid),
|
||||
rocprofiler_ok);
|
||||
if (ROCPROFILER_STATUS_SUCCESS != rocprofiler_ok) {
|
||||
fputs("error: failed to create rocmtools session\n", stderr);
|
||||
fputs("error: failed to create rocprofiler session\n", stderr);
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
|
||||
@@ -41,7 +41,7 @@
|
||||
#define ROCPROFILER_ERROR(code) \
|
||||
do { \
|
||||
fprintf(stderr, \
|
||||
PROGNAME ": Assertion failed at %s:%d, ROCmtools error: %s\n", \
|
||||
PROGNAME ": Assertion failed at %s:%d, ROCProfiler error: %s\n", \
|
||||
__FILE__, __LINE__, rocprofiler_error_str(code)); \
|
||||
fflush(stderr); \
|
||||
} while (false);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# ############################################################################################################################################
|
||||
# ROCMTOOL General Requirements
|
||||
# ROCProfiler General Requirements
|
||||
# ############################################################################################################################################
|
||||
find_package(Python3 COMPONENTS Interpreter REQUIRED)
|
||||
|
||||
@@ -71,7 +71,7 @@ execute_process(
|
||||
# COMMENT "Generating derived_counter.cpp...")
|
||||
|
||||
# ############################################################################################################################################
|
||||
# ROCMTOOL Tracer HIP/HSA Parsing
|
||||
# ROCProfiler Tracer HIP/HSA Parsing
|
||||
# ############################################################################################################################################
|
||||
get_property(HIP_INCLUDE_DIRECTORIES TARGET hip::amdhip64 PROPERTY INTERFACE_INCLUDE_DIRECTORIES)
|
||||
find_file(HIP_RUNTIME_API_H hip_runtime_api.h
|
||||
@@ -127,7 +127,7 @@ set(GENERATED_SOURCES
|
||||
hsa_prof_str.inline.h)
|
||||
|
||||
# ############################################################################################################################################
|
||||
# ROCMTOOL API
|
||||
# ROCProfiler API
|
||||
# ############################################################################################################################################
|
||||
# PC sampling uses libpciaccess as a fallback if the debugfs ioctl is
|
||||
# unavailable
|
||||
@@ -150,8 +150,8 @@ file(GLOB ROCPROFILER_SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp)
|
||||
file(GLOB ROCPROFILER_PROFILER_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/session/profiler/profiler.cpp)
|
||||
file(GLOB ROCPROFILER_TRACER_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/session/tracer/*.cpp)
|
||||
file(GLOB ROCPROFILER_ROCTRACER_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/session/tracer/src/*.cpp)
|
||||
file(GLOB ROCMTOOLS_ATT_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/session/att/att.cpp)
|
||||
file(GLOB ROCMTOOL_CLASS_SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/rocmtool.cpp)
|
||||
file(GLOB ROCPROFILER_ATT_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/session/att/att.cpp)
|
||||
file(GLOB ROCPROFILER_CLASS_SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/rocmtool.cpp)
|
||||
file(GLOB ROCPROFILER_SPM_SRC_FILES ${PROJECT_SOURCE_DIR}/src/core/session/spm/spm.cpp)
|
||||
|
||||
|
||||
@@ -188,9 +188,9 @@ file(GLOB CORE_PC_SAMPLING_FILES ${CORE_PC_SAMPLING_DIR}/core/*.cpp ${CORE_PC_SA
|
||||
# Compiling/Installing ROCProfiler API
|
||||
add_library(${ROCPROFILER_TARGET} SHARED
|
||||
${ROCPROFILER_SRC_FILES}
|
||||
${ROCMTOOL_CLASS_SRC_FILES}
|
||||
${ROCPROFILER_CLASS_SRC_FILES}
|
||||
${ROCPROFILER_PROFILER_SRC_FILES}
|
||||
${ROCMTOOLS_ATT_SRC_FILES}
|
||||
${ROCPROFILER_ATT_SRC_FILES}
|
||||
${CORE_HARDWARE_SRC_FILES}
|
||||
${CORE_HSA_SRC_FILES}
|
||||
${ROCPROFILER_SPM_SRC_FILES}
|
||||
|
||||
@@ -69,7 +69,7 @@ Filter::Filter(rocprofiler_filter_id_t id, rocprofiler_filter_kind_t filter_kind
|
||||
}
|
||||
default: {
|
||||
warning(
|
||||
"Error: ROCMtools filter specified is not supported for "
|
||||
"Error: ROCProfiler filter specified is not supported for "
|
||||
"profiler mode!\n");
|
||||
}
|
||||
}
|
||||
@@ -92,7 +92,7 @@ std::vector<std::string> Filter::GetCounterData() {
|
||||
return profiler_counter_names_;
|
||||
}
|
||||
fatal(
|
||||
"Error: ROCMtools filter specified is not supported for "
|
||||
"Error: ROCProfiler filter specified is not supported for "
|
||||
"Counter Collection Filter!\n");
|
||||
}
|
||||
|
||||
@@ -101,7 +101,7 @@ std::vector<rocprofiler_tracer_activity_domain_t> Filter::GetTraceData() {
|
||||
return tracer_apis_;
|
||||
}
|
||||
fatal(
|
||||
"Error: ROCMtools filter specified is not supported for "
|
||||
"Error: ROCProfiler filter specified is not supported for "
|
||||
"profiler mode!\n");
|
||||
}
|
||||
|
||||
@@ -110,7 +110,7 @@ std::vector<rocprofiler_att_parameter_t> Filter::GetAttParametersData() {
|
||||
return att_parameters_;
|
||||
}
|
||||
fatal(
|
||||
"Error: ROCMtools filter specified is not supported for "
|
||||
"Error: ROCProfiler filter specified is not supported for "
|
||||
"ATT tracing mode!\n");
|
||||
}
|
||||
|
||||
@@ -119,7 +119,7 @@ rocprofiler_spm_parameter_t* Filter::GetSpmParameterData() {
|
||||
return spm_parameter_;
|
||||
}
|
||||
fatal(
|
||||
"Error: ROCMtools filter specified is not supported for "
|
||||
"Error: ROCProfiler filter specified is not supported for "
|
||||
"SPM collection mode!\n");
|
||||
}
|
||||
void Filter::SetProperty(rocprofiler_filter_property_t property) {
|
||||
@@ -181,7 +181,7 @@ void Filter::SetProperty(rocprofiler_filter_property_t property) {
|
||||
break;
|
||||
// TODO(aelwazir): Check for empty property
|
||||
// warning(
|
||||
// "Error: ROCMtools filter specified is not supported for "
|
||||
// "Error: ROCProfiler filter specified is not supported for "
|
||||
// "profiler mode!\n");
|
||||
}
|
||||
}
|
||||
@@ -211,7 +211,7 @@ std::variant<std::vector<std::string>, uint32_t*> Filter::GetProperty(
|
||||
}
|
||||
default:
|
||||
fatal(
|
||||
"Error: ROCMtools filter specified is not supported for the given "
|
||||
"Error: ROCProfiler filter specified is not supported for the given "
|
||||
"kind!");
|
||||
break;
|
||||
}
|
||||
@@ -241,7 +241,7 @@ size_t Filter::GetPropertiesCount(rocprofiler_filter_property_kind_t kind) {
|
||||
}
|
||||
}
|
||||
fatal(
|
||||
"Error: ROCMtools filter specified is not supported for the given "
|
||||
"Error: ROCProfiler filter specified is not supported for the given "
|
||||
"kind!");
|
||||
}
|
||||
|
||||
|
||||
@@ -34,7 +34,7 @@ namespace fs = std::experimental::filesystem;
|
||||
|
||||
// Usage message
|
||||
void printUsage() {
|
||||
std::cout << "ROCMTools Run Binary Usage:" << std::endl;
|
||||
std::cout << "ROCProfiler Run Binary Usage:" << std::endl;
|
||||
std::cout << "\nTo run ./run.sh PARAMs, PARAMs can be the following:" << std::endl;
|
||||
std::cout << "-h | --help For showing this message" << std::endl;
|
||||
std::cout << "-t | --test For Running the tests" << std::endl;
|
||||
@@ -122,7 +122,7 @@ int main(int argc, char** argv) {
|
||||
}
|
||||
current_path[sizeof(current_path) - 1] = '\0';
|
||||
|
||||
// Getting the rocprofv2 binary path to locate rocmtools library path
|
||||
// Getting the rocprofv2 binary path to locate rocprofiler library path
|
||||
fs::path bin_path;
|
||||
if (Dl_info dl_info; dladdr((void*)runApp, &dl_info) != 0) {
|
||||
bin_path = fs::path(dl_info.dli_fname);
|
||||
@@ -140,7 +140,7 @@ int main(int argc, char** argv) {
|
||||
if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "-h") == 0) {
|
||||
printUsage();
|
||||
return 1;
|
||||
// Normal ROCMTools Tests
|
||||
// Normal ROCProfiler Tests
|
||||
} else if (strcmp(argv[i], "-t") == 0 || strcmp(argv[i], "--test") == 0) {
|
||||
fs::path test_path = bin_path;
|
||||
test_path = test_path.replace_filename("run_tests.sh");
|
||||
@@ -225,7 +225,7 @@ int main(int argc, char** argv) {
|
||||
}
|
||||
app_args[j] = NULL;
|
||||
|
||||
// Providing LD_PRELOAD of ROCMTools Library to runApp function
|
||||
// Providing LD_PRELOAD of ROCProfiler Library to runApp function
|
||||
std::string pathenv_str = "LD_PRELOAD=librocprofiler_tool.so";
|
||||
std::string current_ld_preload;
|
||||
if (getenv("LD_PRELOAD")) current_ld_preload = getenv("LD_PRELOAD");
|
||||
|
||||
@@ -59,10 +59,10 @@
|
||||
|
||||
namespace fs = std::experimental::filesystem;
|
||||
|
||||
// Macro to check ROCMTools calls status
|
||||
#define CHECK_ROCMTOOLS(call) \
|
||||
// Macro to check ROCProfiler calls status
|
||||
#define CHECK_ROCPROFILER(call) \
|
||||
do { \
|
||||
if ((call) != ROCPROFILER_STATUS_SUCCESS) rocmtools::fatal("Error: ROCMTools API Call Error!"); \
|
||||
if ((call) != ROCPROFILER_STATUS_SUCCESS) rocmtools::fatal("Error: ROCProfiler API Call Error!"); \
|
||||
} while (false)
|
||||
|
||||
namespace {
|
||||
@@ -193,14 +193,6 @@ att_parsed_input_t GetATTParams() {
|
||||
ATT_PARAM_NAMES["PERFCOUNTERS_COL_PERIOD"] = ROCPROFILER_ATT_MAXVALUE;
|
||||
ATT_PARAM_NAMES["KERNEL"] = ROCPROFILER_ATT_MAXVALUE;
|
||||
ATT_PARAM_NAMES["REDUCED_MEMORY"] = ROCPROFILER_ATT_MAXVALUE;
|
||||
/*
|
||||
ATT_PARAM_NAMES["ATT_MASK"] = ROCMTOOLS_ATT_MASK;
|
||||
ATT_PARAM_NAMES["TOKEN_MASK"] = ROCMTOOLS_ATT_TOKEN_MASK;
|
||||
ATT_PARAM_NAMES["TOKEN_MASK2"] = ROCMTOOLS_ATT_TOKEN_MASK2;
|
||||
ATT_PARAM_NAMES["SE_MASK"] = ROCMTOOLS_ATT_SE_MASK;
|
||||
ATT_PARAM_NAMES["PERF_MASK"] = ROCMTOOLS_ATT_PERF_MASK;
|
||||
ATT_PARAM_NAMES["PERF_CTRL"] = ROCMTOOLS_ATT_PERF_CTRL;
|
||||
*/
|
||||
|
||||
// Default values used for token generation.
|
||||
std::unordered_map<std::string, uint32_t> default_params = {
|
||||
@@ -308,14 +300,14 @@ void finish() {
|
||||
}
|
||||
if (session_created.load(std::memory_order_relaxed)) {
|
||||
session_created.exchange(false, std::memory_order_release);
|
||||
CHECK_ROCMTOOLS(rocprofiler_terminate_session(session_id));
|
||||
CHECK_ROCPROFILER(rocprofiler_terminate_session(session_id));
|
||||
for ([[maybe_unused]] rocprofiler_buffer_id_t buffer_id : buffer_ids) {
|
||||
CHECK_ROCMTOOLS(rocprofiler_flush_data(session_id, buffer_id));
|
||||
CHECK_ROCPROFILER(rocprofiler_flush_data(session_id, buffer_id));
|
||||
}
|
||||
}
|
||||
|
||||
// CHECK_ROCMTOOLS(rocprofiler_destroy_session(session_id));
|
||||
// CHECK_ROCMTOOLS(rocprofiler_finalize());
|
||||
// CHECK_ROCPROFILER(rocprofiler_destroy_session(session_id));
|
||||
// CHECK_ROCPROFILER(rocprofiler_finalize());
|
||||
}
|
||||
|
||||
// load plugins
|
||||
@@ -327,7 +319,7 @@ void plugins_load() {
|
||||
if (fs::path(dl_info.dli_fname).string().find("build") != std::string::npos) {
|
||||
plugin_name = "libfile_plugin.so";
|
||||
} else {
|
||||
plugin_name = "rocmtools/libfile_plugin.so";
|
||||
plugin_name = "rocprofiler/libfile_plugin.so";
|
||||
}
|
||||
}
|
||||
if (!plugin.emplace(fs::path(dl_info.dli_fname).replace_filename(plugin_name)).is_valid()) {
|
||||
@@ -353,7 +345,7 @@ void wait_for_amdsys() {
|
||||
// Start
|
||||
case 4: {
|
||||
printf("AMDSYS:: Starting Tools Session...\n");
|
||||
CHECK_ROCMTOOLS(rocprofiler_start_session(session_id));
|
||||
CHECK_ROCPROFILER(rocprofiler_start_session(session_id));
|
||||
session_created.exchange(true, std::memory_order_release);
|
||||
break;
|
||||
}
|
||||
@@ -362,9 +354,9 @@ void wait_for_amdsys() {
|
||||
if (session_created.load(std::memory_order_relaxed)) {
|
||||
printf("AMDSYS:: Stopping Tools Session...\n");
|
||||
session_created.exchange(false, std::memory_order_release);
|
||||
CHECK_ROCMTOOLS(rocprofiler_terminate_session(session_id));
|
||||
CHECK_ROCPROFILER(rocprofiler_terminate_session(session_id));
|
||||
for ([[maybe_unused]] rocprofiler_buffer_id_t buffer_id : buffer_ids) {
|
||||
CHECK_ROCMTOOLS(rocprofiler_flush_data(session_id, buffer_id));
|
||||
CHECK_ROCPROFILER(rocprofiler_flush_data(session_id, buffer_id));
|
||||
}
|
||||
}
|
||||
break;
|
||||
@@ -375,9 +367,9 @@ void wait_for_amdsys() {
|
||||
if (session_created.load(std::memory_order_relaxed)) {
|
||||
printf("AMDSYS:: Stopping Tools Session...\n");
|
||||
session_created.exchange(false, std::memory_order_release);
|
||||
CHECK_ROCMTOOLS(rocprofiler_terminate_session(session_id));
|
||||
CHECK_ROCPROFILER(rocprofiler_terminate_session(session_id));
|
||||
for ([[maybe_unused]] rocprofiler_buffer_id_t buffer_id : buffer_ids) {
|
||||
CHECK_ROCMTOOLS(rocprofiler_flush_data(session_id, buffer_id));
|
||||
CHECK_ROCPROFILER(rocprofiler_flush_data(session_id, buffer_id));
|
||||
}
|
||||
}
|
||||
amd_sys_handler.exchange(false, std::memory_order_release);
|
||||
@@ -421,7 +413,7 @@ ROCPROFILER_EXPORT bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
|
||||
uint64_t failed_tool_count, const char* const* failed_tool_names) {
|
||||
if (rocprofiler_version_major() != ROCPROFILER_VERSION_MAJOR ||
|
||||
rocprofiler_version_minor() < ROCPROFILER_VERSION_MINOR) {
|
||||
warning("the ROCMTools API version is not compatible with this tool");
|
||||
warning("the ROCProfiler API version is not compatible with this tool");
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -434,7 +426,7 @@ ROCPROFILER_EXPORT bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
|
||||
amd_sys_handler.exchange(true, std::memory_order_release);
|
||||
}
|
||||
|
||||
CHECK_ROCMTOOLS(rocprofiler_initialize());
|
||||
CHECK_ROCPROFILER(rocprofiler_initialize());
|
||||
|
||||
// Printing out info
|
||||
char* info_symb = getenv("ROCPROFILER_COUNTER_LIST");
|
||||
@@ -443,7 +435,7 @@ ROCPROFILER_EXPORT bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
|
||||
printf("Basic HW counters:\n");
|
||||
else
|
||||
printf("Derived metrics:\n");
|
||||
CHECK_ROCMTOOLS(rocprofiler_iterate_counters(info_callback));
|
||||
CHECK_ROCPROFILER(rocprofiler_iterate_counters(info_callback));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
@@ -485,7 +477,7 @@ ROCPROFILER_EXPORT bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
|
||||
parameters.emplace_back(param);
|
||||
}
|
||||
|
||||
CHECK_ROCMTOOLS(rocprofiler_create_session(ROCPROFILER_KERNEL_REPLAY_MODE, &session_id));
|
||||
CHECK_ROCPROFILER(rocprofiler_create_session(ROCPROFILER_KERNEL_REPLAY_MODE, &session_id));
|
||||
|
||||
bool want_pc_sampling = getenv("ROCPROFILER_PC_SAMPLING");
|
||||
|
||||
@@ -502,7 +494,7 @@ ROCPROFILER_EXPORT bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
|
||||
if (parameters.size() > 0) filters_requested.emplace_back(ROCPROFILER_ATT_TRACE_COLLECTION);
|
||||
|
||||
rocprofiler_buffer_id_t buffer_id;
|
||||
CHECK_ROCMTOOLS(rocprofiler_create_buffer(
|
||||
CHECK_ROCPROFILER(rocprofiler_create_buffer(
|
||||
session_id,
|
||||
[](const rocprofiler_record_header_t* record, const rocprofiler_record_header_t* end_record,
|
||||
rocprofiler_session_id_t session_id, rocprofiler_buffer_id_t buffer_id) {
|
||||
@@ -512,7 +504,7 @@ ROCPROFILER_EXPORT bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
|
||||
buffer_ids.emplace_back(buffer_id);
|
||||
|
||||
rocprofiler_buffer_id_t buffer_id_1;
|
||||
CHECK_ROCMTOOLS(rocprofiler_create_buffer(
|
||||
CHECK_ROCPROFILER(rocprofiler_create_buffer(
|
||||
session_id,
|
||||
[](const rocprofiler_record_header_t* record, const rocprofiler_record_header_t* end_record,
|
||||
rocprofiler_session_id_t session_id, rocprofiler_buffer_id_t buffer_id_1) {
|
||||
@@ -527,19 +519,19 @@ ROCPROFILER_EXPORT bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
|
||||
printf("Enabling Counter Collection\n");
|
||||
rocprofiler_filter_id_t filter_id;
|
||||
[[maybe_unused]] rocprofiler_filter_property_t property = {};
|
||||
CHECK_ROCMTOOLS(rocprofiler_create_filter(
|
||||
CHECK_ROCPROFILER(rocprofiler_create_filter(
|
||||
session_id, filter_kind, rocprofiler_filter_data_t{.counters_names = &counters_[0]},
|
||||
counters_.size(), &filter_id, property));
|
||||
CHECK_ROCMTOOLS(rocprofiler_set_filter_buffer(session_id, filter_id, buffer_id_1));
|
||||
CHECK_ROCPROFILER(rocprofiler_set_filter_buffer(session_id, filter_id, buffer_id_1));
|
||||
filter_ids.emplace_back(filter_id);
|
||||
break;
|
||||
}
|
||||
case ROCPROFILER_DISPATCH_TIMESTAMPS_COLLECTION: {
|
||||
rocprofiler_filter_id_t filter_id;
|
||||
[[maybe_unused]] rocprofiler_filter_property_t property = {};
|
||||
CHECK_ROCMTOOLS(rocprofiler_create_filter(session_id, filter_kind, rocprofiler_filter_data_t{},
|
||||
CHECK_ROCPROFILER(rocprofiler_create_filter(session_id, filter_kind, rocprofiler_filter_data_t{},
|
||||
0, &filter_id, property));
|
||||
CHECK_ROCMTOOLS(rocprofiler_set_filter_buffer(session_id, filter_id, buffer_id_1));
|
||||
CHECK_ROCPROFILER(rocprofiler_set_filter_buffer(session_id, filter_id, buffer_id_1));
|
||||
filter_ids.emplace_back(filter_id);
|
||||
break;
|
||||
}
|
||||
@@ -547,11 +539,11 @@ ROCPROFILER_EXPORT bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
|
||||
printf("Enabling API Tracing\n");
|
||||
rocprofiler_filter_id_t filter_id;
|
||||
[[maybe_unused]] rocprofiler_filter_property_t property = {};
|
||||
CHECK_ROCMTOOLS(rocprofiler_create_filter(session_id, filter_kind,
|
||||
CHECK_ROCPROFILER(rocprofiler_create_filter(session_id, filter_kind,
|
||||
rocprofiler_filter_data_t{&apis_requested[0]},
|
||||
apis_requested.size(), &filter_id, property));
|
||||
CHECK_ROCMTOOLS(rocprofiler_set_filter_buffer(session_id, filter_id, buffer_id));
|
||||
CHECK_ROCMTOOLS(
|
||||
CHECK_ROCPROFILER(rocprofiler_set_filter_buffer(session_id, filter_id, buffer_id));
|
||||
CHECK_ROCPROFILER(
|
||||
rocprofiler_set_api_trace_sync_callback(session_id, filter_id, plugin_write_record));
|
||||
filter_ids.emplace_back(filter_id);
|
||||
break;
|
||||
@@ -568,11 +560,11 @@ ROCPROFILER_EXPORT bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
|
||||
property.data_count = kernel_names_c.size();
|
||||
property.name_regex = kernel_names_c.data();
|
||||
|
||||
CHECK_ROCMTOOLS(
|
||||
CHECK_ROCPROFILER(
|
||||
rocprofiler_create_filter(session_id, ROCPROFILER_ATT_TRACE_COLLECTION,
|
||||
rocprofiler_filter_data_t{.att_parameters = ¶meters[0]},
|
||||
parameters.size(), &filter_id, property));
|
||||
CHECK_ROCMTOOLS(rocprofiler_set_filter_buffer(session_id, filter_id, buffer_id_1));
|
||||
CHECK_ROCPROFILER(rocprofiler_set_filter_buffer(session_id, filter_id, buffer_id_1));
|
||||
filter_ids.emplace_back(filter_id);
|
||||
break;
|
||||
}
|
||||
@@ -580,10 +572,10 @@ ROCPROFILER_EXPORT bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
|
||||
puts("Enabling PC sampling");
|
||||
rocprofiler_filter_id_t filter_id;
|
||||
[[maybe_unused]] rocprofiler_filter_property_t property = {};
|
||||
CHECK_ROCMTOOLS(rocprofiler_create_filter(session_id, filter_kind,
|
||||
CHECK_ROCPROFILER(rocprofiler_create_filter(session_id, filter_kind,
|
||||
rocprofiler_filter_data_t{},
|
||||
0, &filter_id, property));
|
||||
CHECK_ROCMTOOLS(rocprofiler_set_filter_buffer(session_id, filter_id, buffer_id));
|
||||
CHECK_ROCPROFILER(rocprofiler_set_filter_buffer(session_id, filter_id, buffer_id));
|
||||
filter_ids.emplace_back(filter_id);
|
||||
break;
|
||||
}
|
||||
@@ -593,7 +585,7 @@ ROCPROFILER_EXPORT bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
|
||||
}
|
||||
|
||||
if (getenv("ROCPROFILER_ENABLE_AMDSYS") == nullptr) {
|
||||
CHECK_ROCMTOOLS(rocprofiler_start_session(session_id));
|
||||
CHECK_ROCPROFILER(rocprofiler_start_session(session_id));
|
||||
session_created.exchange(true, std::memory_order_release);
|
||||
}
|
||||
return true;
|
||||
|
||||
@@ -415,12 +415,12 @@ uint32_t HsaRsrcFactory::GetCountOfCpuAgents() { return uint32_t(cpu_list_.size(
|
||||
bool HsaRsrcFactory::GetGpuAgentInfo(uint32_t idx, const AgentInfo** agent_info) {
|
||||
// Determine if request is valid
|
||||
uint32_t size = uint32_t(gpu_list_.size());
|
||||
if (idx-cpu_list_.size() >= size) {
|
||||
if (idx >= size) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Copy AgentInfo from specified index
|
||||
*agent_info = gpu_list_[idx-cpu_list_.size()];
|
||||
*agent_info = gpu_list_[idx];
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
# ROCMTools testing environment.
|
||||
# ROCProfiler testing environment.
|
||||
|
||||
This document explains how ROCMTools testing environment works.
|
||||
This document explains how ROCProfiler testing environment works.
|
||||
We make use of the GoogleTest (Gtest) framework to automatically find and add test cases to the CMAKE testing environment.
|
||||
|
||||
# Test Categories
|
||||
|
||||
ROCMTools testing is categorised as following:
|
||||
ROCProfiler testing is categorised as following:
|
||||
|
||||
- unittests (Gtest Based)
|
||||
- featuretests (standalone and Gtest Based)
|
||||
@@ -14,8 +14,8 @@ ROCMTools testing is categorised as following:
|
||||
|
||||
### Quickstart
|
||||
|
||||
ROCMTools tests are integrated into the top-level cmake project. The tests depend upon the installed version of ROCMTools.
|
||||
Typical usage (paths relative to top of the ROCMTools repo):
|
||||
ROCProfiler tests are integrated into the top-level cmake project. The tests depend upon the installed version of ROCProfiler.
|
||||
Typical usage (paths relative to top of the ROCProfiler repo):
|
||||
```
|
||||
$ ./build.sh
|
||||
$ ./rocprofv2 -ct
|
||||
|
||||
@@ -52,7 +52,7 @@ THE SOFTWARE.
|
||||
*
|
||||
* \section introduction Introduction
|
||||
*
|
||||
* The goal of this test is to test ROCmTools APIs to collect ATT traces.
|
||||
* The goal of this test is to test ROCProfiler APIs to collect ATT traces.
|
||||
*
|
||||
* A simple vectoradd_float kernel is launched and the trace results are printed
|
||||
* as console output
|
||||
@@ -62,7 +62,7 @@ THE SOFTWARE.
|
||||
// function to check att tracing API status
|
||||
auto CheckApi = [](rocprofiler_status_t status) {
|
||||
if (status != ROCPROFILER_STATUS_SUCCESS) {
|
||||
std::cout << "ROCmTools API Error" << std::endl;
|
||||
std::cout << "ROCProfiler API Error" << std::endl;
|
||||
}
|
||||
assert(status == ROCPROFILER_STATUS_SUCCESS);
|
||||
};
|
||||
@@ -210,7 +210,7 @@ int LaunchVectorAddKernel() {
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
|
||||
// inititalize rocmtools
|
||||
// inititalize ROCProfiler
|
||||
CheckApi(rocprofiler_initialize());
|
||||
|
||||
// Att trace collection parameters
|
||||
@@ -253,7 +253,7 @@ int main(int argc, char** argv) {
|
||||
// destroy session
|
||||
CheckApi(rocprofiler_destroy_session(session_id));
|
||||
|
||||
// finalize att tracing by destroying rocmtool object
|
||||
// finalize att tracing by destroying rocprofiler object
|
||||
CheckApi(rocprofiler_finalize());
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -1,12 +1,11 @@
|
||||
#!/bin/bash
|
||||
|
||||
CURRENT_DIR="$( dirname -- "$0"; )";
|
||||
export PATH=$rocmtoolsdir:$PATH
|
||||
|
||||
|
||||
echo -e "Running Profiler Tests"
|
||||
|
||||
echo -e "Running Unit tests for rocmtools"
|
||||
echo -e "Running Unit tests for rocprofiler"
|
||||
eval ${CURRENT_DIR}/tests/unittests/core/runCoreUnitTests
|
||||
eval ${CURRENT_DIR}/tests/unittests/profiler/runProfilerUnitTests
|
||||
|
||||
|
||||
Fai riferimento in un nuovo problema
Block a user