Add 'projects/roctracer/' from commit 'dd745ed9c731cf1c67a182a4ce41ce30afbfb8ca'

git-subtree-dir: projects/roctracer
git-subtree-mainline: d8cba83d42
git-subtree-split: dd745ed9c7
Tá an tiomantas seo le fáil i:
systems-assistant[bot]
2025-07-22 22:52:51 +00:00
tuismitheoir d8cba83d42 dd745ed9c7
tiomantas 53e20372c7
D'athraigh 95 comhad le 37906 breiseanna agus 0 scriosta
+40
Féach ar an gComhad
@@ -0,0 +1,40 @@
resources:
repositories:
- repository: pipelines_repo
type: github
endpoint: ROCm
name: ROCm/ROCm
variables:
- group: common
- template: /.azuredevops/variables-global.yml@pipelines_repo
trigger:
batch: true
branches:
include:
- amd-mainline
- amd-staging
paths:
exclude:
- .github
- doc
- LICENSE
- README.md
pr:
autoCancel: true
branches:
include:
- amd-mainline
- amd-staging
paths:
exclude:
- .github
- doc
- LICENSE
- README.md
drafts: false
jobs:
- template: ${{ variables.CI_COMPONENT_PATH }}/roctracer.yml@pipelines_repo
+60
Féach ar an gComhad
@@ -0,0 +1,60 @@
---
Language: Cpp
# BasedOnStyle: Google
AccessModifierOffset: -1
ConstructorInitializerIndentWidth: 4
AlignEscapedNewlinesLeft: false
AlignTrailingComments: true
AlignConsecutiveAssignments: false
AlignOperands: false
AllowAllParametersOfDeclarationOnNextLine: true
AllowShortBlocksOnASingleLine: false
AllowShortIfStatementsOnASingleLine: true
AllowShortLoopsOnASingleLine: true
AllowShortFunctionsOnASingleLine: All
AlwaysBreakAfterDefinitionReturnType: false
AlwaysBreakTemplateDeclarations: false
AlwaysBreakBeforeMultilineStrings: true
BreakBeforeBinaryOperators: false
BreakBeforeTernaryOperators: true
BreakConstructorInitializersBeforeComma: false
BinPackParameters: true
ColumnLimit: 100
ConstructorInitializerAllOnOneLineOrOnePerLine: true
ExperimentalAutoDetectBinPacking: false
IndentCaseLabels: true
IndentWrappedFunctionNames: false
IndentFunctionDeclarationAfterType: false
MaxEmptyLinesToKeep: 2
KeepEmptyLinesAtTheStartOfBlocks: false
NamespaceIndentation: None
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: false
PenaltyBreakBeforeFirstCallParameter: 1
PenaltyBreakComment: 300
PenaltyBreakString: 1000
PenaltyBreakFirstLessLess: 120
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 200
DerivePointerAlignment: false
PointerAlignment: Left
SpacesBeforeTrailingComments: 2
Cpp11BracedListStyle: true
Standard: Auto
IndentWidth: 2
TabWidth: 8
UseTab: Never
BreakBeforeBraces: Attach
SpacesInParentheses: false
SpacesInAngles: false
SpaceInEmptyParentheses: false
SpacesInCStyleCastParentheses: false
SpacesInContainerLiterals: true
SpaceBeforeAssignmentOperators: true
ContinuationIndentWidth: 4
CommentPragmas: '^ IWYU pragma:'
ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ]
SpaceBeforeParens: ControlStatements
DisableFormat: false
SortIncludes: false
...
+5
Féach ar an gComhad
@@ -0,0 +1,5 @@
disabled: false
scmId: gh-emu-rocm
branchesToScan:
- amd-staging
- amd-mainline
+15
Féach ar an gComhad
@@ -0,0 +1,15 @@
name: Rocm Validation Suite KWS
on:
push:
branches: [amd-staging]
pull_request:
types: [opened, synchronize, reopened]
workflow_dispatch:
jobs:
kws:
if: ${{ github.event_name == 'pull_request' }}
uses: AMD-ROCm-Internal/rocm_ci_infra/.github/workflows/kws.yml@mainline
secrets: inherit
with:
pr_number: ${{github.event.pull_request.number}}
base_branch: ${{github.base_ref}}
+25
Féach ar an gComhad
@@ -0,0 +1,25 @@
name: ROCm CI Caller
on:
pull_request:
branches: [amd-staging, amd-npi, release/rocm-rel-*, amd-mainline]
types: [opened, reopened, synchronize]
push:
branches: [amd-mainline]
workflow_dispatch:
issue_comment:
types: [created]
jobs:
call-workflow:
if: ${{ github.event_name != 'issue_comment' || github.event.comment.body == '!verify' }}
uses: AMD-ROCm-Internal/rocm_ci_infra/.github/workflows/rocm_ci.yml@mainline
secrets: inherit
with:
input_sha: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
input_pr_num: ${{ github.event_name == 'pull_request' && github.event.pull_request.number || 0 }}
input_pr_url: ${{ github.event_name == 'pull_request' && github.event.pull_request.html_url || '' }}
input_pr_title: ${{ github.event_name == 'pull_request' && github.event.pull_request.title || '' }}
repository_name: ${{ github.repository }}
base_ref: ${{ github.event_name == 'pull_request' && github.base_ref || github.ref }}
trigger_event_type: ${{ github.event_name }}
+17
Féach ar an gComhad
@@ -0,0 +1,17 @@
name: Sync amd-mainline to public repository
on:
push:
branches: [ amd-mainline ]
jobs:
git-mirror:
runs-on: ubuntu-latest
steps:
- name: git-sync
uses: AMD-ROCm-Internal/rocprofiler-github-actions@git-sync-v3
with:
source_repo: "https://${{ secrets.TOKEN }}@github.com/AMD-ROCm-Internal/roctracer.git"
source_branch: "amd-mainline"
destination_repo: "https://${{ secrets.EXT_TOKEN }}@github.com/ROCm/roctracer.git"
destination_branch: "amd-mainline"
+17
Féach ar an gComhad
@@ -0,0 +1,17 @@
name: Sync amd-staging to public repository
on:
push:
branches: [ amd-staging ]
jobs:
git-mirror:
runs-on: ubuntu-latest
steps:
- name: git-sync
uses: AMD-ROCm-Internal/rocprofiler-github-actions@git-sync-v3
with:
source_repo: "https://${{ secrets.TOKEN }}@github.com/AMD-ROCm-Internal/roctracer.git"
source_branch: "amd-staging"
destination_repo: "https://${{ secrets.EXT_TOKEN }}@github.com/ROCm/roctracer.git"
destination_branch: "amd-staging"
+7
Féach ar an gComhad
@@ -0,0 +1,7 @@
.*
!.gitignore
*.o
*.exe
*.swp
*.Po
build
+240
Féach ar an gComhad
@@ -0,0 +1,240 @@
################################################################################
## Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
##
## Permission is hereby granted, free of charge, to any person obtaining a copy
## of this software and associated documentation files (the "Software"), to
## deal in the Software without restriction, including without limitation the
## rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
## sell copies of the Software, and to permit persons to whom the Software is
## furnished to do so, subject to the following conditions:
##
## The above copyright notice and this permission notice shall be included in
## all copies or substantial portions of the Software.
##
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
## FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
## IN THE SOFTWARE.
################################################################################
cmake_minimum_required(VERSION 3.18.0)
project(roctracer VERSION 4.1.0)
if(${ROCM_PATCH_VERSION})
set(PROJECT_VERSION_PATCH ${ROCM_PATCH_VERSION})
set(PROJECT_VERSION "${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}")
endif()
include(GNUInstallDirs)
# set default ROCM_PATH
if(NOT DEFINED ROCM_PATH)
set(ROCM_PATH "/opt/rocm" CACHE STRING "Default ROCM installation directory")
endif()
## Build is not supported on Windows plaform
if(WIN32)
message(FATAL_ERROR "Windows build is not supported.")
endif()
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
add_compile_options(-Wall -Wno-error=ignored-attributes -Werror)
# To set addition RUNPATH in libraries
# installed in /opt/rocm-ver/lib/roctracer
set(ROCM_APPEND_PRIVLIB_RPATH "$ORIGIN/..")
set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package(Threads REQUIRED)
find_package(hsa-runtime64 REQUIRED CONFIG PATHS ${ROCM_PATH})
find_package(HIP REQUIRED CONFIG PATHS ${ROCM_PATH})
if(NOT DEFINED LIBRARY_TYPE)
set(LIBRARY_TYPE SHARED)
endif()
## Build libraries
add_subdirectory(src)
## Build tests
if(${LIBRARY_TYPE} STREQUAL SHARED)
add_subdirectory(test)
endif()
## Build Plugins
add_subdirectory(plugin)
if(${LIBRARY_TYPE} STREQUAL SHARED)
## Installation and packaging
if(DEFINED CPACK_PACKAGING_INSTALL_PREFIX)
get_filename_component(DEST_NAME ${CPACK_PACKAGING_INSTALL_PREFIX} NAME)
get_filename_component(DEST_DIR ${CPACK_PACKAGING_INSTALL_PREFIX} DIRECTORY)
set(CPACK_PACKAGING_INSTALL_PREFIX ${DEST_DIR})
endif()
message("-----------Dest-name: ${DEST_NAME}")
message("------Install-prefix: ${CMAKE_INSTALL_PREFIX}")
message("-----------CPACK-dir: ${CPACK_PACKAGING_INSTALL_PREFIX}")
## Packaging directives
set(CPACK_GENERATOR "DEB" "RPM" "TGZ" CACHE STRING "CPACK GENERATOR DEB;RPM")
set(ENABLE_LDCONFIG ON CACHE BOOL "Set library links and caches using ldconfig.")
set(CPACK_PACKAGE_NAME "${PROJECT_NAME}")
set(CPACK_PACKAGE_VENDOR "Advanced Micro Devices, Inc.")
set(CPACK_PACKAGE_VERSION_MAJOR ${PROJECT_VERSION_MAJOR})
set(CPACK_PACKAGE_VERSION_MINOR ${PROJECT_VERSION_MINOR})
set(CPACK_PACKAGE_VERSION_PATCH ${PROJECT_VERSION_PATCH})
set(CPACK_PACKAGE_VERSION "${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}")
set(CPACK_PACKAGE_CONTACT "ROCm Profiler Support <dl.ROCm-Profiler.support@amd.com>")
set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "AMD ROCTRACER library")
set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE")
if(DEFINED ENV{ROCM_LIBPATCH_VERSION})
set(CPACK_PACKAGE_VERSION "${CPACK_PACKAGE_VERSION}.$ENV{ROCM_LIBPATCH_VERSION}")
message("Using CPACK_PACKAGE_VERSION ${CPACK_PACKAGE_VERSION}")
endif()
## Install license file
install(FILES ${CPACK_RESOURCE_FILE_LICENSE}
DESTINATION ${CMAKE_INSTALL_DOCDIR}
COMPONENT runtime)
install(FILES ${CPACK_RESOURCE_FILE_LICENSE}
DESTINATION ${CMAKE_INSTALL_DOCDIR}-asan
COMPONENT asan)
## Debian package specific variables
if(DEFINED ENV{CPACK_DEBIAN_PACKAGE_RELEASE})
set(CPACK_DEBIAN_PACKAGE_RELEASE $ENV{CPACK_DEBIAN_PACKAGE_RELEASE})
else()
set(CPACK_DEBIAN_PACKAGE_RELEASE "local")
endif()
message("Using CPACK_DEBIAN_PACKAGE_RELEASE ${CPACK_DEBIAN_PACKAGE_RELEASE}")
set(CPACK_DEB_COMPONENT_INSTALL ON)
set(CPACK_DEBIAN_FILE_NAME "DEB-DEFAULT")
set(CPACK_DEBIAN_RUNTIME_PACKAGE_NAME "${PROJECT_NAME}")
set(CPACK_DEBIAN_RUNTIME_PACKAGE_DEPENDS "rocm-core")
set(CPACK_DEBIAN_DEV_PACKAGE_NAME "${PROJECT_NAME}-dev")
set(CPACK_DEBIAN_DEV_PACKAGE_DEPENDS "${PROJECT_NAME}, hsa-rocr-dev, rocm-core")
set(CPACK_DEBIAN_TESTS_PACKAGE_NAME "${PROJECT_NAME}-tests")
set(CPACK_DEBIAN_TESTS_PACKAGE_DEPENDS "${PROJECT_NAME}-dev")
# Debian package specific variable for ASAN
set(CPACK_DEBIAN_ASAN_PACKAGE_NAME "${PROJECT_NAME}-asan" )
set(CPACK_DEBIAN_ASAN_PACKAGE_DEPENDS "rocm-core-asan" )
## RPM package specific variables
if(DEFINED ENV{CPACK_RPM_PACKAGE_RELEASE})
set(CPACK_RPM_PACKAGE_RELEASE $ENV{CPACK_RPM_PACKAGE_RELEASE})
else()
set(CPACK_RPM_PACKAGE_RELEASE "local")
endif()
message("Using CPACK_RPM_PACKAGE_RELEASE ${CPACK_RPM_PACKAGE_RELEASE}")
set(CPACK_RPM_PACKAGE_LICENSE "MIT")
## 'dist' breaks manual builds on debian systems due to empty Provides
execute_process(COMMAND rpm --eval %{?dist}
RESULT_VARIABLE PROC_RESULT
OUTPUT_VARIABLE EVAL_RESULT
OUTPUT_STRIP_TRAILING_WHITESPACE)
message("RESULT_VARIABLE ${PROC_RESULT} OUTPUT_VARIABLE: ${EVAL_RESULT}")
if(PROC_RESULT EQUAL "0" AND NOT EVAL_RESULT STREQUAL "")
string(APPEND CPACK_RPM_PACKAGE_RELEASE "%{?dist}")
endif()
set(CPACK_RPM_COMPONENT_INSTALL ON)
set(CPACK_RPM_FILE_NAME "RPM-DEFAULT")
set(CPACK_RPM_RUNTIME_PACKAGE_NAME "${PROJECT_NAME}")
set(CPACK_RPM_RUNTIME_PACKAGE_REQUIRES "rocm-core")
set(CPACK_RPM_DEV_PACKAGE_NAME "${PROJECT_NAME}-devel")
set(CPACK_RPM_DEV_PACKAGE_REQUIRES "${PROJECT_NAME}, rocm-core")
set(CPACK_RPM_DEV_PACKAGE_PROVIDES "${PROJECT_NAME}-dev")
set(CPACK_RPM_DEV_PACKAGE_OBSOLETES "${PROJECT_NAME}-dev")
set(CPACK_RPM_TESTS_PACKAGE_NAME "${PROJECT_NAME}-tests")
set(CPACK_RPM_TESTS_PACKAGE_REQUIRES "${PROJECT_NAME}-devel, rocm-llvm-devel")
message("CPACK_RPM_PACKAGE_RELEASE: ${CPACK_RPM_PACKAGE_RELEASE}")
# RPM package specific variable for ASAN
set(CPACK_RPM_ASAN_PACKAGE_NAME "${PROJECT_NAME}-asan" )
set(CPACK_RPM_ASAN_PACKAGE_REQUIRES "rocm-core-asan" )
#Disable build id for rocprofiler as its creating transaction error
set ( CPACK_RPM_SPEC_MORE_DEFINE "%define _build_id_links none
%global __strip ${CPACK_STRIP_EXECUTABLE}
%global __objdump ${CPACK_OBJDUMP_EXECUTABLE}
%global __objcopy ${CPACK_OBJCOPY_EXECUTABLE}
%global __readelf ${CPACK_READELF_EXECUTABLE}")
if(NOT ROCM_DEP_ROCMCORE)
string(REGEX REPLACE ",? ?rocm-core" "" CPACK_RPM_RUNTIME_PACKAGE_REQUIRES ${CPACK_RPM_RUNTIME_PACKAGE_REQUIRES})
string(REGEX REPLACE ",? ?rocm-core" "" CPACK_RPM_DEV_PACKAGE_REQUIRES ${CPACK_RPM_DEV_PACKAGE_REQUIRES})
string(REGEX REPLACE ",? ?rocm-core-asan" "" CPACK_RPM_ASAN_PACKAGE_REQUIRES ${CPACK_RPM_ASAN_PACKAGE_REQUIRES})
string(REGEX REPLACE ",? ?rocm-core" "" CPACK_DEBIAN_RUNTIME_PACKAGE_DEPENDS ${CPACK_DEBIAN_RUNTIME_PACKAGE_DEPENDS})
string(REGEX REPLACE ",? ?rocm-core" "" CPACK_DEBIAN_DEV_PACKAGE_DEPENDS ${CPACK_DEBIAN_DEV_PACKAGE_DEPENDS})
string(REGEX REPLACE ",? ?rocm-core-asan" "" CPACK_DEBIAN_ASAN_PACKAGE_DEPENDS ${CPACK_DEBIAN_ASAN_PACKAGE_DEPENDS})
endif()
if(ENABLE_ASAN_PACKAGING)
# ASAN Package requires asan component with only libraries and license file
set(CPACK_COMPONENTS_ALL asan)
else()
set(CPACK_COMPONENTS_ALL runtime dev tests)
endif()
include(CPack)
cpack_add_component(runtime
DISPLAY_NAME "Runtime"
DESCRIPTION "Dynamic libraries for the ROCtracer")
cpack_add_component(dev
DISPLAY_NAME "Devel"
DESCRIPTION "Header files and documentation for ROCtracer")
cpack_add_component(tests
DISPLAY_NAME "Tests"
DESCRIPTION "Tests for the ROCtracer"
DEPENDS runtime)
cpack_add_component(asan
DISPLAY_NAME "ASAN"
DESCRIPTION "ASAN libraries for the ROCtracer")
endif()
find_package(Doxygen)
if(DOXYGEN_FOUND)
## Set input and output files
set(DOXYGEN_IN ${CMAKE_CURRENT_SOURCE_DIR}/doc/Doxyfile.in)
set(DOXYGEN_OUT ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile)
## Request to configure the file
configure_file(${DOXYGEN_IN} ${DOXYGEN_OUT} @ONLY)
add_custom_command(
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/doc/html/index.html ${CMAKE_CURRENT_BINARY_DIR}/doc/latex/refman.pdf
COMMAND ${DOXYGEN_EXECUTABLE} ${DOXYGEN_OUT}
COMMAND make -C ${CMAKE_CURRENT_BINARY_DIR}/doc/latex pdf
MAIN_DEPENDENCY ${DOXYGEN_OUT} ${DOXYGEN_IN}
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/inc/roctracer.h ${CMAKE_CURRENT_SOURCE_DIR}/inc/roctracer_plugin.h
COMMENT "Generating documentation")
add_custom_target(doc DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/doc/html/index.html
${CMAKE_CURRENT_BINARY_DIR}/doc/latex/refman.pdf)
install(FILES
"${CMAKE_CURRENT_BINARY_DIR}/doc/latex/refman.pdf"
DESTINATION ${CMAKE_INSTALL_DOCDIR}
RENAME "roctracer.pdf"
OPTIONAL
COMPONENT dev)
install(DIRECTORY
"${CMAKE_CURRENT_BINARY_DIR}/doc/html/"
DESTINATION ${CMAKE_INSTALL_DATADIR}/html/${PROJECT_NAME}
OPTIONAL
COMPONENT dev)
endif()
+1
Féach ar an gComhad
@@ -0,0 +1 @@
* @ammarwa @bgopesh
+20
Féach ar an gComhad
@@ -0,0 +1,20 @@
Copyright (c) 2018-2025 Advanced Micro Devices, Inc. All rights reserved.
[MITx11 License]
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
+171
Féach ar an gComhad
@@ -0,0 +1,171 @@
# ROC-tracer
> [!IMPORTANT]
We are phasing out development and support for roctracer/rocprofiler/rocprof/rocprofv2 in favor of rocprofiler-sdk/rocprofv3 in upcoming ROCm releases. Going forward, only critical defect fixes will be addressed for older versions of profiling tools and libraries. We encourage all users to upgrade to the latest version, rocprofiler-sdk library and rocprofv3 tool, to ensure continued support and access to new features.
> [!NOTE]
> The published documentation is available at [ROCTracer](https://rocm.docs.amd.com/projects/roctracer/en/latest/index.html) in an organized, easy-to-read format, with search and a table of contents.
- **ROC-tracer library: Runtimes Generic Callback/Activity APIs**
The goal of the implementation is to provide a generic independent from specific runtime profiler to trace API and asynchronous activity.
The API provides functionality for registering the runtimes API callbacks and asynchronous activity records pool support.
- **ROC-TX library: Code Annotation Events API**
Includes API for:
- `roctxMark`
- `roctxRangePush`
- `roctxRangePop`
## Usage
### `rocTracer` API
To use the rocTracer API you need the API header and to link your application with `roctracer` .so library:
- `/opt/rocm/include/roctracer/roctracer.h`
API header.
- `/opt/rocm/lib/libroctracer64.so`
.so library.
### `rocTX` API
To use the rocTX API you need the API header and to link your application with `roctx` .so library:
- `/opt/rocm/include/roctracer/roctx.h`
API header.
- `/opt/rocm/lib/libroctx64.so`
.so library.
## Library source tree
- `doc`
Documentation.
- `inc`
Include header files.
- `roctracer.h`
`rocTracer` library public API header.
- `roctx.h`
`rocTX` library public API header.
- `src`
Library sources.
- `core`
`rocTracer` library API sources.
- `roctx`
`rocTX` library API sources.
- `util`
Library utils sources.
- `test`
Test suit.
- `MatrixTranspose`
Test based on HIP MatrixTranspose sample.
## Documentation
- API description:
- ['roctracer' / 'rocTX' profiling C API specification](doc/roctracer_spec.md)
- Code examples:
- [HIP API ops, GPU Activity Tracing](doc/roctracer_spec.md#41-hip-api-ops-gpu-activity-tracing)
- [MatrixTranspose HIP sample with all APIs/activity tracing enabled](doc/roctracer_spec.md#42-matrixtranspose-hip-sample-with-all-apisactivity-tracing-enabled)
## Build and run tests
- ROCm is required
- Packages required:
1. For Ubuntu 18.04 and Ubuntu 20.04 the following adds the needed packages:
````shell
apt install python3 python3-pip gcc g++ libatomic1 make rocm-llvm-dev \
cmake doxygen graphviz texlive-full
````
2. For CentOS 8.1 and RHEL 8.1 the following adds the needed packages:
````shell
yum install -y python3 python3-pip gcc gcc-g++ make rocm-llvm-devel \
cmake libatomic doxygen graphviz texlive \
texlive-xtab texlive-multirow texlive-sectsty \
texlive-tocloft texlive-tabu texlive-adjustbox
````
3. For SLES 15 Service Pack 15 the following adds the needed packages:
````shell
zypper in python3 python3-pip gcc gcc-g++ make rocm-llvm-devel \
cmake libatomic doxygen graphviz \
texlive-scheme-medium texlive-hanging texlive-stackengine \
texlive-tocloft texlive-etoc texlive-tabu
````
- Python modules requirements: `CppHeaderParser`, `argparse`.
To install:
```sh
pip3 install CppHeaderParser argparse
```
- Clone development branch of `roctracer`:
```sh
git clone -b amd-master https://github.com/ROCm-Developer-Tools/roctracer
```
- To build `roctracer` library:
```sh
cd <your path>/roctracer
./build.sh
```
- To build and run test:
```sh
cd <your path>/roctracer/build
make mytest
run.sh
```
## Installation
Install by:
```sh
make install
```
or:
```sh
make package && dpkg -i *.deb
```
+75
Féach ar an gComhad
@@ -0,0 +1,75 @@
#!/bin/bash -e
################################################################################
# Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to
# deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
# sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
################################################################################
SRC_DIR=`dirname $0`
COMPONENT="roctracer"
ROCM_PATH="${ROCM_PATH:=/opt/rocm}"
LD_RUNPATH_FLAG=" -Wl,--enable-new-dtags -Wl,--rpath,$ROCM_PATH/lib:$ROCM_PATH/lib64"
DEFAULTS=defaults.sh
fatal() {
echo "$1"
exit 1
}
umask 022
if [ -e "$DEFAULTS" ] ; then source "$DEFAULTS"; fi
if [ -z "$ROCTRACER_ROOT" ]; then ROCTRACER_ROOT=$SRC_DIR; fi
if [ -z "$BUILD_DIR" ] ; then BUILD_DIR=$PWD; fi
if [ -z "$BUILD_TYPE" ] ; then BUILD_TYPE="release"; fi
if [ -z "$PACKAGE_ROOT" ] ; then PACKAGE_ROOT=$ROCM_PATH; fi
if [ -z "$PACKAGE_PREFIX" ] ; then PACKAGE_PREFIX="$ROCM_PATH/$COMPONENT"; fi
if [ -z "$PREFIX_PATH" ] ; then PREFIX_PATH=$PACKAGE_ROOT; fi
if [ -z "$HIP_VDI" ] ; then HIP_VDI=0; fi
if [ -n "$ROCM_RPATH" ] ; then LD_RUNPATH_FLAG=" -Wl,--enable-new-dtags -Wl,--rpath,${ROCM_RPATH}"; fi
if [ -z "$GPU_LIST" ] ; then GPU_LIST="gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102"; fi
ROCTRACER_ROOT=$(cd $ROCTRACER_ROOT && echo $PWD)
if [ "$TO_CLEAN" = "yes" ] ; then rm -rf $BUILD_DIR; fi
mkdir -p $BUILD_DIR
pushd $BUILD_DIR
cmake \
-DCMAKE_MODULE_PATH=$ROCM_PATH/hip/cmake \
-DCMAKE_BUILD_TYPE=$BUILD_TYPE \
-DCMAKE_PREFIX_PATH="$PREFIX_PATH" \
-DCMAKE_INSTALL_PREFIX=$PACKAGE_ROOT \
-DCPACK_PACKAGING_INSTALL_PREFIX=$PACKAGE_PREFIX \
-DCPACK_GENERATOR="${CPACKGEN:-"DEB;RPM"}" \
-DCMAKE_SHARED_LINKER_FLAGS="$LD_RUNPATH_FLAG" \
-DGPU_TARGETS="$GPU_LIST" \
-DCPACK_OBJCOPY_EXECUTABLE="${PACKAGE_ROOT}/llvm/bin/llvm-objcopy" \
-DCPACK_READELF_EXECUTABLE="${PACKAGE_ROOT}/llvm/bin/llvm-readelf" \
-DCPACK_STRIP_EXECUTABLE="${PACKAGE_ROOT}/llvm/bin/llvm-strip" \
-DCPACK_OBJDUMP_EXECUTABLE="${PACKAGE_ROOT}/llvm/bin/llvm-objdump" \
$ROCTRACER_ROOT
make
make mytest
make package
exit 0
+66
Féach ar an gComhad
@@ -0,0 +1,66 @@
#!/bin/bash -x
################################################################################
# Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to
# deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
# sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
################################################################################
SRC_DIR=`dirname $0`
COMPONENT="roctracer"
ROCM_PATH="${ROCM_PATH:=/opt/rocm}"
LD_RUNPATH_FLAG=" -Wl,--enable-new-dtags -Wl,--rpath,$ROCM_PATH/lib:$ROCM_PATH/lib64"
DEFAULTS=defaults.sh
fatal() {
echo "$1"
exit 1
}
umask 022
if [ -e "$DEFAULTS" ] ; then source "$DEFAULTS"; fi
if [ -z "$ROCTRACER_ROOT" ]; then ROCTRACER_ROOT=$SRC_DIR; fi
if [ -z "$BUILD_DIR" ] ; then BUILD_DIR=$PWD; fi
if [ -z "$BUILD_TYPE" ] ; then BUILD_TYPE="release"; fi
if [ -z "$PACKAGE_ROOT" ] ; then PACKAGE_ROOT=$ROCM_PATH; fi
if [ -z "$PACKAGE_PREFIX" ] ; then PACKAGE_PREFIX="$ROCM_PATH/$COMPONENT"; fi
if [ -z "$PREFIX_PATH" ] ; then PREFIX_PATH=$PACKAGE_ROOT; fi
if [ -z "$HIP_VDI" ] ; then HIP_VDI=0; fi
if [ -n "$ROCM_RPATH" ] ; then LD_RUNPATH_FLAG=" -Wl,--enable-new-dtags -Wl,--rpath,${ROCM_RPATH}"; fi
ROCTRACER_ROOT=$(cd $ROCTRACER_ROOT && echo $PWD)
if [ "$TO_CLEAN" = "yes" ] ; then rm -rf $BUILD_DIR; fi
mkdir -p $BUILD_DIR
pushd $BUILD_DIR
cmake \
-DCMAKE_MODULE_PATH=$ROCM_PATH/hip/cmake \
-DCMAKE_BUILD_TYPE=$BUILD_TYPE \
-DCMAKE_PREFIX_PATH="$PREFIX_PATH" \
-DCMAKE_INSTALL_PREFIX=$PACKAGE_ROOT \
-DCMAKE_SHARED_LINKER_FLAGS="$LD_RUNPATH_FLAG" \
-DLIBRARY_TYPE=STATIC \
$ROCTRACER_ROOT
make
exit 0
+24
Féach ar an gComhad
@@ -0,0 +1,24 @@
################################################################################
# Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to
# deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
# sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
################################################################################
BUILD_DIR=build
TO_CLEAN=yes
Tá difríocht comhad cosc orthu toisc go bhfuil sé ró-mhór Difríocht Luchtaigh
+754
Féach ar an gComhad
@@ -0,0 +1,754 @@
# ROC Tracer / ROC-TX Libraries Specification
```
ROC Tracer API version 2
ROC-TX API version 1
- The rocTracer API is agnostic to specific runtime and may trace
the runtime API calls and asynchronous GPU activity.
- The rocTX API is provided for application code annotation.
```
## 1. High level overview
```
The goal of the implementation is to provide a runtime independent API
for tracing of runtime calls and asynchronous activity, like GPU kernel
dispatches and memory moves. The tracing includes callback API for
runtime API tracing and activity API for asynchronous activity records
logging.
Depending on particular runtime intercepting mechanism, the rocTracer
library can be dynamically linked, dynamically loaded by the runtime as
a plugin or some API wrapper can be loaded using LD_PRELOAD.
The library has a C API.
The rocTracer library is an API that intercepts runtime API calls and
traces asynchronous activity. The activity tracing results are recorded
in a ring buffer.
The rocTX contains application code instrumentation API to support high
level correlation of runtime API/activity events. The API includes mark
and nested ranges.
```
## 2. General API
### 2.1. Description
```
The library supports method for getting the error number and error string
of the last failed library API call. It allows to check the conformance
of used library API header and the library binary, the version macros and
API methods can be used.
Returning the error and error string methods:
• roctracer_status_t error code enumeration
• roctracer_error_string method for returning the error string
Library version:
• ROCTRACER_VERSION_MAJOR API major version macro
• ROCTRACER_VERSION_MINOR API minor version macro
• roctracer_version_major library major version
• roctracer_version_minor library minor version
```
### 2.2. Error codes and error string methods
```
Error code enumeration:
typedef enum {
ROCTRACER_STATUS_SUCCESS = 0,
ROCTRACER_STATUS_ERROR = 1,
ROCTRACER_STATUS_UNINIT = 2,
ROCTRACER_STATUS_BREAK = 3,
ROCTRACER_STATUS_BAD_DOMAIN = 4,
ROCTRACER_STATUS_BAD_PARAMETER = 5,
ROCTRACER_STATUS_HIP_API_ERR = 6,
ROCTRACER_STATUS_HCC_OPS_ERR = 7,
ROCTRACER_STATUS_ROCTX_ERR = 8,
} roctracer_status_t;
Return error string:
const char* roctracer_error_string();
```
### 2.3. Library version
```
The library provides major and minor versions. Major version is for
incompatible API changes and minor version for bug fixes.
API version macros defined in the library API header roctracer.h:
ROCTRACER_VERSION_MAJOR
ROCTRACER_VERSION_MINOR
Methods to check library major and minor venison:
uint32_t roctracer_major_version();
uint32_t roctracer_minor_version();
```
## 3. Frontend API
### 3.1. Description
```
The rocTracer provides support for runtime API callbacks and activity
records logging. The APIs of different runtimes at different levels
are considered as different API domains with assigned domain IDs. For
example, language level and driver level. The API callbacks provide
the API calls arguments and are called on two phases on “enter” and
on “exit”. The activity records are logged to the ring buffer and can
be associated with the respective API calls using the correlation ID.
Activity API can be used to enable collecting of the records with
timestamping data for API calls and asynchronous activity like the
kernel submits, memory copies and barriers
Tracing domains:
• roctracer_domain_t runtime API domains, HIP, HSA, etc…
• roctracer_op_string Return Op string by given domain and
activity Op code
• roctracer_op_code Return Op code and kind by given string
Callback API:
• roctracer_rtapi_callback_t runtime API callback type
• roctracer_enable_op_callback enable runtime API callback
by domain and Op code
• roctracer_enable_domain_callback enable runtime API callback
by domain for all Ops
• roctracer_enable_callback enable runtime API callback for
all domains, all Ops
• roctracer_disable_op_callback disable runtime API callback
by domain and Op code
• roctracer_enable_op_callback enable runtime API callback
by domain for all Ops
• roctracer_enable_op_callback enable runtime API callback for
all domains, all Ops
Activity API:
• roctracer_record_t activity record
• roctracer_pool_t records pool type
• roctracer_allocator_t tracer allocator type
• roctracer_buffer_callback_t pool callback type
• roctracer_open_pool[_expl] create records pool
• roctracer_close_pool[_expl] close records pool
• roctracer_default_pool[_expl] get/set default pool
• roctracer_properties_t tracer properties
• roctracer_enable_op_activity[_expl] enable activity records logging
• roctracer_enable_domain_activity[_expl] enable activity records logging
• roctracer_enable_activity[_expl] enable activity records logging
• roctracer_disable_op_activity disable activity records logging
• roctracer_disable_domain_activity disable activity records logging
• roctracer_disable_activity disable activity records logging
• roctracer_flush_activity[_expl] disable activity records logging
• roctracer_next_record return next record
• roctracer_get_timestamp return correlated GPU/CPU system timestamp
External correlation ID API:
• roctracer_activity_push_external_correlation_id - push an external
correlation id for the calling thread
• roctracer_activity_pop_external_correlation_id - pop an external
correlation id for the calling thread
Tracing control API:
• roctracer_start tracing start
• roctracer_stop tracer stop
```
### 3.2. Tracing Domains
```
Various tracing domains are supported. Each domain is assigned with
a domain ID. The domains include HSA, HIP runtime levels.
Traced API domains:
typedef enum {
ACTIVITY_DOMAIN_HSA_API = 0, // HSA API domain
ACTIVITY_DOMAIN_HSA_OPS = 1, // HSA async activity domain
ACTIVITY_DOMAIN_HIP_API = 2, // HIP API domain
ACTIVITY_DOMAIN_HIP_OPS = 3, // HIP async activity domain
ACTIVITY_DOMAIN_KFD_API = 4, // KFD API domain
ACTIVITY_DOMAIN_EXT_API = 5, // External ID domain
ACTIVITY_DOMAIN_ROCTX = 6, // ROCTX domain
ACTIVITY_DOMAIN_NUMBER = 7
} activity_domain_t;
Return name by given domain and Op code:
const char* roctracer_op_string( // NULL returned on error and error number is set
uint32_t domain, // tracing domain
uint32_t op, // activity op code
uint32_t kind); // activity kind
Return Op code and kind by given string:
roctracer_status_t roctracer_op_code(
uint32_t domain, // tracing domain
const char* str, // [in] op string
uint32_t* op, // [out] op code
uint32_t* kind); // [out] op kind code if not NULL
```
### 3.3. Callback API
```
The tracer provides support for runtime API callbacks and activity records
logging. The API callbacks provide the API calls arguments and are called
on two phases on “enter”, on “exit”.
API phase passed to the callbacks:
typedef enum {
ROCTRACER_API_PHASE_ENTER,
ROCTRACER_API_PHASE_EXIT,
} roctracer_api_phase_t;
Runtime API callback type:
typedef void (*roctracer_rtapi_callback_t)(
uint32_t domain, // runtime API domain
uint32_t cid, // API call ID
const void* data, // [in] callback data with correlation id and the call
// arguments
void* arg); // [in/out] user passed data
Enable runtime API callbacks:
roctracer_status_t roctracer_enable_op_callback(
activity_domain_t domain, // tracing domain
uint32_t op, // API call ID
activity_rtapi_callback_t callback, // callback function pointer
void* arg); // [in/out] callback arg
roctracer_status_t roctracer_enable_domain_callback(
activity_domain_t domain, // tracing domain
activity_rtapi_callback_t callback, // callback function pointer
void* arg); // [in/out] callback arg
roctracer_status_t roctracer_enable_callback(
activity_rtapi_callback_t callback, // callback function pointer
void* arg); // [in/out] callback arg
Disable runtime API callbacks:
roctracer_status_t roctracer_disable_op_callback(
activity_domain_t domain, // tracing domain
uint32_t op); // API call ID
roctracer_status_t roctracer_disable_domain_callback(
activity_domain_t domain); // tracing domain
roctracer_status_t roctracer_disable_callback();
```
### 3.4 Activity API
The activity records are asynchronously logged to the pool and can be
associated with the respective API callbacks using the correlation ID.
Activity API can be used to enable collecting the records with
timestamp data for API calls and GPU activity like kernel submits,
memory copies, and barriers.
```
// Correlation id
typedef uint64_t activity_correlation_id_t;
Activity record type:
// Activity record type
struct activity_record_t {
uint32_t domain; // activity domain id
activity_kind_t kind; // activity kind
activity_op_t op; // activity op
activity_correlation_id_t correlation_id; // activity ID
uint64_t begin_ns; // host begin timestamp
uint64_t end_ns; // host end timestamp
union {
struct {
int device_id; // device id
uint64_t queue_id; // queue id
};
struct {
uint32_t process_id; // device id
uint32_t thread_id; // thread id
};
struct {
activity_correlation_id_t external_id; // external correlation id
};
};
size_t bytes; // data size bytes
};
```
> [!NOTE]
> rocprofiler reported device ids are in sync with node-ids reported by KFD(kernel). This can easily be verified by `rocm-smi` under `Node`.
> Please also note that this device id might not be in sync with the ones provided by `hipGetDeviceProperties` which includes CPU agents and starts from 0.
```
Return next record:
static inline int roctracer_next_record(
const activity_record_t* record, // [in] record ptr
const activity_record_t** next); // [out] next record ptr
Tracer allocator type:
typedef void (*roctracer_allocator_t)(
char** ptr, // memory pointer
size_t size, // memory size
void* arg); // allocator arg
Pool callback type:
typedef void (*roctracer_buffer_callback_t)(
const char* begin, // [in] available buffered trace records
const char* end, // [in] end of buffered trace records
void* arg); // [in/out] callback arg
Tracer properties:
typedef struct {
uint32_t mode; // roctracer mode
size_t buffer_size; // buffer size
// power of 2
roctracer_allocator_t alloc_fun; // memory allocator
// function pointer
void* alloc_arg; // memory allocator
// function pointer
roctracer_buffer_callback_t buffer_callback_fun; // tracer record
// callback function
void* buffer_callback_arg; // tracer record
// callback arg
} roctracer_properties_t;
Tracer memory pool handle type:
typedef void roctracer_pool_t;
Create tracer memory pool:
roctracer_status_t roctracer_open_pool(
const roctracer_properties_t* properties); // tracer pool properties
roctracer_status_t roctracer_open_pool_expl(
const roctracer_properties_t* properties, // tracer pool properties
roctracer_pool_t** pool); // [out] returns tracer pool if
// not NULL, otherwise sets the
// default one if it is not set
// yet; otherwise the error is
// generated
Close tracer memory pool:
roctracer_status_t roctracer_close_pool();
roctracer_status_t roctracer_close_pool_expl(
roctracer_pool_t* pool); // memory pool, NULL means default pool
Return current default pool. Set new default pool if the argument is not NULL:
roctracer_pool_t* roctracer_default_pool();
roctracer_pool_t* roctracer_default_pool_expl(
roctracer_pool_t* pool); // new default pool if not NULL
```
Enable activity records logging:
```
roctracer_status_t roctracer_enable_op_activity(
activity_domain_t domain, // tracing domain
uint32_t op); // activity op ID
roctracer_status_t roctracer_enable_op_activity_expl(
activity_domain_t domain, // tracing domain
uint32_t op, // activity op ID
roctracer_pool_t* pool); // memory pool, NULL means default pool
roctracer_status_t roctracer_enable_domain_activity(
activity_domain_t domain); // tracing domain
roctracer_status_t roctracer_enable_domain_activity_expl(
activity_domain_t domain, // tracing domain
roctracer_pool_t* pool); // memory pool, NULL means default pool
roctracer_status_t roctracer_enable_activity();
roctracer_status_t roctracer_enable_activity_expl(
roctracer_pool_t* pool); // memory pool, NULL means default pool
Disable activity records logging:
roctracer_status_t roctracer_disable_op_activity(
activity_domain_t domain, // tracing domain
uint32_t op); // activity op ID
roctracer_status_t roctracer_disable_domain_activity(
activity_domain_t domain); // tracing domain
roctracer_status_t roctracer_disable_activity();
Flush available activity records:
roctracer_status_t roctracer_flush_activity();
roctracer_status_t roctracer_flush_activity_expl(
roctracer_pool_t* pool); // memory pool, NULL means default pool
Return correlated GPU/CPU system timestamp:
roctracer_status_t roctracer_get_timestamp(
uint64_t* timestamp); // [out] return timestamp
```
External correlation ID API
```
The API provides activity records to associate rocTracer correlation IDs with
IDs provided by external APIs. The external ID records are identified by
ACTIVITY_DOMAIN_EXT_API domain value.
Using the push method an external ID is pushed to a per CPU thread stack and
the pop method can be used to remove the last pushed ID.
An external ID record is inserted before any generated rocTracer activity record
if the same CPU external ID stack is non-empty.
Notifies that the calling thread is entering an external API region.
Push an external correlation id for the calling thread.
roctracer_status_t roctracer_activity_push_external_correlation_id(
activity_correlation_id_t id); // external correlation id
Notifies that the calling thread is leaving an external API region.
Pop an external correlation id for the calling thread.
roctracer_status_t roctracer_activity_pop_external_correlation_id(
activity_correlation_id_t* last_id); // returns the last external correlation id
// if not NULL
```
Tracing control API
```
Tracing start:
void roctracer_start();
Tracing stop:
void roctracer_stop();
```
## 4. rocTracer Usage Code Examples
### 4.1. HIP API ops, GPU Activity Tracing
```
#include <roctracer/roctracer_hip.h>
// HIP API callback function
void hip_api_callback(
uint32_t domain,
uint32_t cid,
const void* callback_data,
void* arg)
{
(void)arg;
const hip_api_data_t* data = reinterpret_cast <const hip_api_data_t*>
(callback_data);
fprintf(stdout, "<%s id(%u)\tcorrelation_id(%lu) %s> ",
roctracer_id_string(ACTIVITY_DOMAIN_HIP_API, cid),
cid,
data->correlation_id,
(data->phase == ACTIVITY_API_PHASE_ENTER) ? "on-enter" : "on-exit");
<some code . . .>
}
// Activity tracing callback
void activity_callback(const char* begin, const char* end, void* arg) {
const roctracer_record_t* record = reinterpret_cast<const
roctracer_record_t*>(begin);
const roctracer_record_t* end_record = reinterpret_cast<const
roctracer_record_t*>(end);
fprintf(stdout, "\tActivity records:\n");
while (record < end_record) {
const char * name = roctracer_op_string(record->domain,
record->activity_id, 0);
fprintf(stdout, "\t%s\tcorrelation_id(%lu) time_ns(%lu:%lu)
device_id(%d) stream_id(%lu)\n",
name,
record->correlation_id,
record->begin_ns,
record->end_ns,
record->device_id,
record->stream_id
);
<some code . . .>
ROCTRACER_CALL(roctracer_next_record(record, &record));
}
}
int main() {
// Allocating tracing pool
roctracer_properties_t properties{};
properties.buffer_size = 12;
properties.buffer_callback_fun = activity_callback;
ROCTRACER_CALL(roctracer_open_pool(&properties));
// Enable HIP API callbacks. HIP_API_ID_ANY can be used to trace all HIP
// API calls.
ROCTRACER_CALL(roctracer_enable_op_callback(ACTIVITY_DOMAIN_HIP_API,
HIP_API_ID_hipModuleLaunchKernel,
hip_api_callback, NULL));
ROCTRACER_CALL(roctracer_enable_op_activity(ACTIVITY_DOMAIN_HIP_API,
HIP_API_ID_hipModuleLaunchKernel));
// Enable HIP kernel dispatch activity tracing
ROCTRACER_CALL(roctracer_enable_op_activity(ACTIVITY_DOMAIN_HIP_OPS,
HIP_OP_ID_DISPATCH));
<test code>
// Disable tracing and closing the pool
ROCTRACER_CALL(roctracer_disable_callback());
ROCTRACER_CALL(roctracer_disable_activity());
ROCTRACER_CALL(roctracer_close_pool());
}
```
### 4.2. MatrixTranspose HIP sample with all APIs/activity tracing enabled
```
This shows a MatrixTranspose HIP sample with enabled tracing of
all HIP API and all GPU asynchronous activity.
/*
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <iostream>
// hip header file
#include <hip/hip_runtime.h>
#ifndef ITERATIONS
# define ITERATIONS 100
#endif
#define WIDTH 1024
#define NUM (WIDTH * WIDTH)
#define THREADS_PER_BLOCK_X 4
#define THREADS_PER_BLOCK_Y 4
#define THREADS_PER_BLOCK_Z 1
// Device (Kernel) function, it must be void
// hipLaunchParm provides the execution configuration
__global__ void matrixTranspose(hipLaunchParm lp, float* out, float* in,
const int width) {
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
out[y * width + x] = in[x * width + y];
}
// CPU implementation of matrix transpose
void matrixTransposeCPUReference(float* output, float* input, const unsigned
int width) {
for (unsigned int j = 0; j < width; j++) {
for (unsigned int i = 0; i < width; i++) {
output[i * width + j] = input[j * width + i];
}
}
}
int iterations = ITERATIONS;
void start_tracing();
void stop_tracing();
int main() {
float* Matrix;
float* TransposeMatrix;
float* cpuTransposeMatrix;
float* gpuMatrix;
float* gpuTransposeMatrix;
hipDeviceProp_t devProp;
hipGetDeviceProperties(&devProp, 0);
std::cout << "Device name " << devProp.name << std::endl;
int i;
int errors;
while (iterations-- > 0) {
start_tracing();
Matrix = (float*)malloc(NUM * sizeof(float));
TransposeMatrix = (float*)malloc(NUM * sizeof(float));
cpuTransposeMatrix = (float*)malloc(NUM * sizeof(float));
// initialize the input data
for (i = 0; i < NUM; i++) {
Matrix[i] = (float)i * 10.0f;
}
// allocate the memory on the device side
hipMalloc((void**)&gpuMatrix, NUM * sizeof(float));
hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float));
// Memory transfer from host to device
hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float),
hipMemcpyHostToDevice);
// Lauching kernel from host
hipLaunchKernel(matrixTranspose,
dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH /
THREADS_PER_BLOCK_Y),
dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0,
gpuTransposeMatrix, gpuMatrix, WIDTH);
// Memory transfer from device to host
hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float),
hipMemcpyDeviceToHost);
// CPU MatrixTranspose computation
matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH);
// verify the results
errors = 0;
double eps = 1.0E-6;
for (i = 0; i < NUM; i++) {
if (std::abs(TransposeMatrix[i] - cpuTransposeMatrix[i]) > eps) {
errors++;
}
}
if (errors != 0) {
printf("FAILED: %d errors\n", errors);
} else {
printf("PASSED!\n");
}
// free the resources on device side
hipFree(gpuMatrix);
hipFree(gpuTransposeMatrix);
// free the resources on host side
free(Matrix);
free(TransposeMatrix);
free(cpuTransposeMatrix);
stop_tracing();
}
return errors;
}
/////////////////////////////////////////////////////////////////////////////
// HIP Callbacks/Activity tracing
/////////////////////////////////////////////////////////////////////////////
#include <roctracer/roctracer_hip.h>
// Macro to check ROC-tracer calls status
#define ROCTRACER_CALL(call) \
do { \
int err = call; \
if (err != 0) { \
std::cerr << roctracer_error_string() << std::endl << std::flush; \
abort(); \
} \
} while (0)
// HIP API callback function
void hip_api_callback(
uint32_t domain,
uint32_t cid,
const void* callback_data,
void* arg)
{
(void)arg;
const hip_api_data_t* data = reinterpret_cast<const hip_api_data_t*>
(callback_data);
fprintf(stdout, "<%s id(%u)\tcorrelation_id(%lu) %s> ",
roctracer_op_string(ACTIVITY_DOMAIN_HIP_API, cid, 0),
cid,
data->correlation_id,
(data->phase == ACTIVITY_API_PHASE_ENTER) ? "on-enter" : "on-exit");
if (data->phase == ACTIVITY_API_PHASE_ENTER) {
switch (cid) {
case HIP_API_ID_hipMemcpy:
fprintf(stdout, "dst(%p) src(%p) size(0x%x) kind(%u)",
data->args.hipMemcpy.dst,
data->args.hipMemcpy.src,
(uint32_t)(data->args.hipMemcpy.sizeBytes),
(uint32_t)(data->args.hipMemcpy.kind));
break;
case HIP_API_ID_hipMalloc:
fprintf(stdout, "ptr(%p) size(0x%x)",
data->args.hipMalloc.ptr,
(uint32_t)(data->args.hipMalloc.size));
break;
case HIP_API_ID_hipFree:
fprintf(stdout, "ptr(%p),
data->args.hipFree.ptr);
break;
case HIP_API_ID_hipModuleLaunchKernel:
fprintf(stdout, "kernel(\"%s\") stream(%p)",
hipKernelNameRef(data->args.hipModuleLaunchKernel.f),
data->args.hipModuleLaunchKernel.stream);
break;
default:
break;
}
} else {
switch (cid) {
case HIP_API_ID_hipMalloc:
fprintf(stdout, "*ptr(0x%p)",
*(data->args.hipMalloc.ptr));
break;
default:
break;
}
}
fprintf(stdout, "\n"); fflush(stdout);
}
// Activity tracing callback
// hipMalloc id(3) correlation_id(1):
// begin_ns(1525888652762640464) end_ns(1525888652762877067)
void activity_callback(const char* begin, const char* end, void* arg) {
const roctracer_record_t* record = reinterpret_cast
<const roctracer_record_t*>(begin);
const roctracer_record_t* end_record = reinterpret_cast
<const roctracer_record_t*>(end);
fprintf(stdout, "\tActivity records:\n"); fflush(stdout);
while (record < end_record) {
const char * name = roctracer_op_string(record->domain,
record->activity_id, 0);
fprintf(stdout, "\t%s\tcorrelation_id(%lu) time_ns(%lu:%lu) \
device_id(%d) stream_id(%lu)",
name,
record->correlation_id,
record->begin_ns,
record->end_ns,
record->device_id,
record->stream_id
);
if (record->kind == hc::HSA_OP_ID_COPY)
fprintf(stdout, " bytes(0x%zx)", record->bytes);
fprintf(stdout, "\n");
fflush(stdout);
ROCTRACER_CALL(roctracer_next_record(record, &record));
}
}
// Start tracing routine
void start_tracing() {
std::cout << "# START #############################" << std::endl
<< std::flush;
// Allocating tracing pool
roctracer_properties_t properties{};
properties.buffer_size = 0x1000;
properties.buffer_callback_fun = activity_callback;
ROCTRACER_CALL(roctracer_open_pool(&properties));
// Enable API callbacks, all domains
ROCTRACER_CALL(roctracer_enable_callback(hip_api_callback, NULL));
// Enable activity tracing, all domains
ROCTRACER_CALL(roctracer_enable_activity());
}
// Stop tracing routine
void stop_tracing() {
ROCTRACER_CALL(roctracer_disable_api_callback());
ROCTRACER_CALL(roctracer_disable_api_activity());
ROCTRACER_CALL(roctracer_close_pool());
std::cout << "# STOP #############################" << std::endl
<< std::flush;
}
/////////////////////////////////////////////////////////////////////////////
```
## 5. rocTX application code annotation API
```
Basic annotation API: markers and nested ranges.
// A marker created by given ASCII massage
void roctxMark(const char* message);
// Returns the 0 based level of a nested range being started by given message associated to this range.
// A negative value is returned on the error.
int roctxRangePush(const char* message);
// Marks the end of a nested range.
// Returns the 0 based level the range.
// A negative value is returned on the error.
int roctxRangePop();
```
+4
Féach ar an gComhad
@@ -0,0 +1,4 @@
---
BasedOnStyle: InheritParentConfig
ColumnLimit: 79
...
+107
Féach ar an gComhad
@@ -0,0 +1,107 @@
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#ifndef EXT_PROF_PROTOCOL_H_
#define EXT_PROF_PROTOCOL_H_
#include <stdint.h>
#include <stdlib.h>
/* Traced API domains */
typedef enum {
ACTIVITY_DOMAIN_HSA_API = 0, /* HSA API domain */
ACTIVITY_DOMAIN_HSA_OPS = 1, /* HSA async activity domain */
ACTIVITY_DOMAIN_HIP_OPS = 2, /* HIP async activity domain */
ACTIVITY_DOMAIN_HCC_OPS =
ACTIVITY_DOMAIN_HIP_OPS, /* HCC async activity domain */
ACTIVITY_DOMAIN_HIP_VDI =
ACTIVITY_DOMAIN_HIP_OPS, /* HIP VDI async activity domain */
ACTIVITY_DOMAIN_HIP_API = 3, /* HIP API domain */
ACTIVITY_DOMAIN_KFD_API = 4, /* KFD API domain */
ACTIVITY_DOMAIN_EXT_API = 5, /* External ID domain */
ACTIVITY_DOMAIN_ROCTX = 6, /* ROCTX domain */
ACTIVITY_DOMAIN_HSA_EVT = 7, /* HSA events */
ACTIVITY_DOMAIN_NUMBER
} activity_domain_t;
/* API callback type */
typedef void (*activity_rtapi_callback_t)(uint32_t domain, uint32_t cid,
const void* data, void* arg);
typedef uint32_t activity_kind_t;
typedef uint32_t activity_op_t;
/* API callback phase */
typedef enum {
ACTIVITY_API_PHASE_ENTER = 0,
ACTIVITY_API_PHASE_EXIT = 1
} activity_api_phase_t;
/* Trace record types */
/* Correlation id */
typedef uint64_t activity_correlation_id_t;
/* Timestamp in nanoseconds */
typedef uint64_t roctracer_timestamp_t;
/* Activity record type */
typedef struct activity_record_s {
uint32_t domain; /* activity domain id */
activity_kind_t kind; /* activity kind */
activity_op_t op; /* activity op */
union {
struct {
activity_correlation_id_t correlation_id; /* activity ID */
roctracer_timestamp_t begin_ns; /* host begin timestamp */
roctracer_timestamp_t end_ns; /* host end timestamp */
};
struct {
uint32_t se; /* sampled SE */
uint64_t cycle; /* sample cycle */
uint64_t pc; /* sample PC */
} pc_sample;
};
union {
struct {
int device_id; /* device id */
uint64_t queue_id; /* queue id */
};
struct {
uint32_t process_id; /* device id */
uint32_t thread_id; /* thread id */
};
struct {
activity_correlation_id_t external_id; /* external correlation id */
};
};
union {
size_t bytes; /* data size bytes */
const char* kernel_name; /* kernel name */
const char* mark_message;
};
} activity_record_t;
/* Activity sync callback type */
typedef void (*activity_sync_callback_t)(uint32_t cid, activity_record_t* record, const void* data,
void* arg);
/* Activity async callback type */
typedef void (*activity_async_callback_t)(uint32_t op, void* record, void* arg);
#endif /* EXT_PROF_PROTOCOL_H_ */
+779
Féach ar an gComhad
@@ -0,0 +1,779 @@
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
/** \mainpage ROC Tracer API Specification
*
* \section introduction Introduction
*
* ROCtracer library, Runtimes Generic Callback/Activity APIs.
*
* The goal of the implementation is to provide a generic independent from
* specific runtime profiler to trace API and asynchronous activity.
*
* The API provides functionality for registering the runtimes API callbacks
* and asynchronous activity records pool support.
*
* \section known_limitations Known Limitations and Restrictions
*
* The ROCtracer API library implementation currently has the following
* restrictions. Future releases aim to address these restrictions.
*
* 1. The ACTIVITY_DOMAIN_HSA_OPS operations HSA_OP_ID_DISPATCH,
* HSA_OP_ID_BARRIER, and HSA_OP_ID_RESERVED1 are not currently implemented.
*/
/**
* \file
* ROCtracer API interface.
*/
#ifndef ROCTRACER_H_
#define ROCTRACER_H_
/* Placeholder for calling convention and import/export macros */
#if !defined(ROCTRACER_CALL)
#define ROCTRACER_CALL
#endif /* !defined (ROCTRACER_CALL) */
#if !defined(ROCTRACER_EXPORT_DECORATOR)
#if defined(__GNUC__)
#define ROCTRACER_EXPORT_DECORATOR __attribute__((visibility("default")))
#elif defined(_MSC_VER)
#define ROCTRACER_EXPORT_DECORATOR __declspec(dllexport)
#endif /* defined (_MSC_VER) */
#endif /* !defined (ROCTRACER_EXPORT_DECORATOR) */
#if !defined(ROCTRACER_IMPORT_DECORATOR)
#if defined(__GNUC__)
#define ROCTRACER_IMPORT_DECORATOR
#elif defined(_MSC_VER)
#define ROCTRACER_IMPORT_DECORATOR __declspec(dllimport)
#endif /* defined (_MSC_VER) */
#endif /* !defined (ROCTRACER_IMPORT_DECORATOR) */
#define ROCTRACER_EXPORT ROCTRACER_EXPORT_DECORATOR ROCTRACER_CALL
#define ROCTRACER_IMPORT ROCTRACER_IMPORT_DECORATOR ROCTRACER_CALL
#if !defined(ROCTRACER)
#if defined(ROCTRACER_EXPORTS)
#define ROCTRACER_API ROCTRACER_EXPORT
#else /* !defined (ROCTRACER_EXPORTS) */
#define ROCTRACER_API ROCTRACER_IMPORT
#endif /* !defined (ROCTRACER_EXPORTS) */
#endif /* !defined (ROCTRACER) */
#include <stddef.h>
#include <stdint.h>
#include "ext/prof_protocol.h"
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
/** \defgroup symbol_versions_group Symbol Versions
*
* The names used for the shared library versioned symbols.
*
* Every function is annotated with one of the version macros defined in this
* section. Each macro specifies a corresponding symbol version string. After
* dynamically loading the shared library with \p dlopen, the address of each
* function can be obtained using \p dlvsym with the name of the function and
* its corresponding symbol version string. An error will be reported by \p
* dlvsym if the installed library does not support the version for the
* function specified in this version of the interface.
*
* @{
*/
/**
* The function was introduced in version 4.1 of the interface and has the
* symbol version string of ``"ROCTRACER_4.1"``.
*/
#define ROCTRACER_VERSION_4_1
/** @} */
/** \defgroup versioning_group Versioning
*
* Version information about the interface and the associated installed
* library.
*
* The semantic version of the interface following semver.org rules. A client
* that uses this interface is only compatible with the installed library if
* the major version numbers match and the interface minor version number is
* less than or equal to the installed library minor version number.
*
* @{
*/
/**
* The major version of the interface as a macro so it can be used by the
* preprocessor.
*/
#define ROCTRACER_VERSION_MAJOR 4
/**
* The minor version of the interface as a macro so it can be used by the
* preprocessor.
*/
#define ROCTRACER_VERSION_MINOR 1
/**
* Query the major version of the installed library.
*
* Return the major version of the installed library. This can be used to
* check if it is compatible with this interface version. This function can be
* used even when the library is not initialized.
*/
ROCTRACER_API uint32_t roctracer_version_major() ROCTRACER_VERSION_4_1;
/**
* Query the minor version of the installed library.
*
* Return the minor version of the installed library. This can be used to
* check if it is compatible with this interface version. This function can be
* used even when the library is not initialized.
*/
ROCTRACER_API uint32_t roctracer_version_minor() ROCTRACER_VERSION_4_1;
/** @} */
/** \defgroup status_codes_group Status Codes
*
* Most operations return a status code to indicate success or error.
*
* @{
*/
/**
* ROC Tracer API status codes.
*/
typedef enum {
/**
* The function has executed successfully.
*/
ROCTRACER_STATUS_SUCCESS = 0,
/**
* A generic error has occurred.
*/
ROCTRACER_STATUS_ERROR = -1,
/**
* The domain ID is invalid.
*/
ROCTRACER_STATUS_ERROR_INVALID_DOMAIN_ID = -2,
/**
* An invalid argument was given to the function.
*/
ROCTRACER_STATUS_ERROR_INVALID_ARGUMENT = -3,
/**
* No default pool is defined.
*/
ROCTRACER_STATUS_ERROR_DEFAULT_POOL_UNDEFINED = -4,
/**
* The default pool is already defined.
*/
ROCTRACER_STATUS_ERROR_DEFAULT_POOL_ALREADY_DEFINED = -5,
/**
* Memory allocation error.
*/
ROCTRACER_STATUS_ERROR_MEMORY_ALLOCATION = -6,
/**
* External correlation ID pop mismatch.
*/
ROCTRACER_STATUS_ERROR_MISMATCHED_EXTERNAL_CORRELATION_ID = -7,
/**
* The operation is not currently implemented. This error may be reported by
* any function. Check the \ref known_limitations section to determine the
* status of the library implementation of the interface.
*/
ROCTRACER_STATUS_ERROR_NOT_IMPLEMENTED = -8,
/**
* Deprecated error code.
*/
ROCTRACER_STATUS_UNINIT = 2,
/**
* Deprecated error code.
*/
ROCTRACER_STATUS_BREAK = 3,
/**
* Deprecated error code.
*/
ROCTRACER_STATUS_BAD_DOMAIN = ROCTRACER_STATUS_ERROR_INVALID_DOMAIN_ID,
/**
* Deprecated error code.
*/
ROCTRACER_STATUS_BAD_PARAMETER = ROCTRACER_STATUS_ERROR_INVALID_ARGUMENT,
/**
* Deprecated error code.
*/
ROCTRACER_STATUS_HIP_API_ERR = 6,
/**
* Deprecated error code.
*/
ROCTRACER_STATUS_HIP_OPS_ERR = 7,
/**
* Deprecated error code.
*/
ROCTRACER_STATUS_HCC_OPS_ERR = ROCTRACER_STATUS_HIP_OPS_ERR,
/**
* Deprecated error code.
*/
ROCTRACER_STATUS_HSA_ERR = 7,
/**
* Deprecated error code.
*/
ROCTRACER_STATUS_ROCTX_ERR = 8,
} roctracer_status_t;
/**
* Query the textual description of the last error for the current thread.
*
* Returns a NUL terminated string describing the error of the last ROC Tracer
* API call by the calling thread that did not return success. The empty
* string is returned if there is no previous error. The last error is not
* cleared.
*
* \return Return the error string. The caller owns the returned string and
* should use \p free() to deallocate it.
*/
ROCTRACER_API const char* roctracer_error_string() ROCTRACER_VERSION_4_1;
/** @} */
/** \defgroup domain_group Traced Runtime Domains
*
* The ROC Tracer API can trace multiple runtime libraries. Each library can
* have API operations and asynchronous operations that can be traced.
*
* @{
*/
/**
* Enumeration of domains that can be traced.
*/
typedef activity_domain_t roctracer_domain_t;
/**
* Query textual name of an operation of a domain.
*
* @param[in] domain Domain being queried.
*
* @param[in] op Operation within \p domain.
*
* @param[in] kind \todo Define kind.
*
* @return Returns the NUL terminated string for the operation name, or NULL if
* the domain or operation are invalid. The string is owned by the ROC Tracer
* library.
*/
ROCTRACER_API const char* roctracer_op_string(
uint32_t domain, uint32_t op, uint32_t kind) ROCTRACER_VERSION_4_1;
/**
* Query the operation code given a domain and the name of an operation.
*
* @param[in] domain The domain being queried.
*
* @param[in] str The NUL terminated name of the operation name being queried.
*
* @param[out] op The operation code.
*
* @param[out] kind If not NULL then the operation kind code.
*
* @retval ::ROCTRACER_STATUS_SUCCESS The function has been executed
* successfully. \p op and \p kind have been updated.
*
* @retval ::ROCTRACER_STATUS_ERROR_INVALID_ARGUMENT The \p op is invalid for
* \p domain.
*
* @retval ::ROCTRACER_STATUS_ERROR_INVALID_DOMAIN_ID The domain is invalid or
* not supported.
*/
ROCTRACER_API roctracer_status_t
roctracer_op_code(uint32_t domain, const char* str, uint32_t* op,
uint32_t* kind) ROCTRACER_VERSION_4_1;
/**
* Set the properties of a domain.
*
* @param[in] domain The domain.
*
* @param[in] properties The properties. Each domain defines its own type for
* the properties. Some domains require the properties to be set before they
* can be enabled.
*
* @retval ::ROCTRACER_STATUS_SUCCESS The function has been executed
* successfully.
*/
ROCTRACER_API roctracer_status_t roctracer_set_properties(
roctracer_domain_t domain, void* properties) ROCTRACER_VERSION_4_1;
/** @} */
/** \defgroup callback_api_group Callback API
*
* ROC tracer provides support for runtime API callbacks and activity
* records logging. The API callbacks provide the API calls arguments and are
* called on different phases, on enter, on exit, on kernel completion.
*
* @{
*/
/**
* Runtime API callback type.
*
* The callback that will be invoked when an enabled runtime API is called. The
* callback is invoked on entry and on exit.
*/
typedef activity_rtapi_callback_t roctracer_rtapi_callback_t;
/**
* Enable runtime API callback for a specific operation of a domain.
*
* @param domain The domain.
*
* @param op The operation ID in \p domain.
*
* @param callback The callback to invoke each time the operation is performed
* on entry and exit.
*
* @param arg Value to pass as last argument of \p callback.
*
* @retval ::ROCTRACER_STATUS_SUCCESS The function has been executed
* successfully.
*
* @retval ::ROCTRACER_STATUS_ERROR_INVALID_DOMAIN_ID \p domain is invalid.
*
* @retval ::ROCTRACER_STATUS_ERROR_INVALID_ARGUMENT \p op is invalid for \p
* domain.
*/
ROCTRACER_API roctracer_status_t roctracer_enable_op_callback(
activity_domain_t domain, uint32_t op, activity_rtapi_callback_t callback,
void* arg) ROCTRACER_VERSION_4_1;
/**
* Enable runtime API callback for all operations of a domain.
*
* @param domain The domain
*
* @param callback The callback to invoke each time the operation is performed
* on entry and exit.
*
* @param arg Value to pass as last argument of \p callback.
*
* @retval ::ROCTRACER_STATUS_SUCCESS The function has been executed
* successfully.
*
* @retval ::ROCTRACER_STATUS_ERROR_INVALID_DOMAIN_ID \p domain is invalid.
*/
ROCTRACER_API roctracer_status_t roctracer_enable_domain_callback(
activity_domain_t domain, activity_rtapi_callback_t callback,
void* arg) ROCTRACER_VERSION_4_1;
/**
* Disable runtime API callback for a specific operation of a domain.
*
* @param domain The domain
*
* @param op The operation in \p domain.
*
* @retval ::ROCTRACER_STATUS_SUCCESS The function has been executed
* successfully.
*
* @retval ::ROCTRACER_STATUS_ERROR_INVALID_DOMAIN_ID \p domain is invalid.
*
* @retval ::ROCTRACER_STATUS_ERROR_INVALID_ARGUMENT \p op is invalid for \p
* domain.
*/
ROCTRACER_API roctracer_status_t roctracer_disable_op_callback(
activity_domain_t domain, uint32_t op) ROCTRACER_VERSION_4_1;
/**
* Disable runtime API callback for all operations of a domain.
*
* @param domain The domain
*
* @retval ::ROCTRACER_STATUS_SUCCESS The function has been executed
* successfully.
*
* @retval ::ROCTRACER_STATUS_ERROR_INVALID_DOMAIN_ID \p domain is invalid.
*/
ROCTRACER_API roctracer_status_t roctracer_disable_domain_callback(
activity_domain_t domain) ROCTRACER_VERSION_4_1;
/** @} */
/** \defgroup activity_api_group Activity API
*
* The activity records are asynchronously logged to the pool and can be
* associated with the respective API callbacks using the correlation ID.
* Activity API can be used to enable collecting of the records with
* timestamping data for API calls and the kernel submits.
*
* @{
*/
/**
* Activity record.
*
* Asynchronous activity events generate activity records.
*/
typedef activity_record_t roctracer_record_t;
/**
* Get a pointer to the next activity record.
*
* A memory pool generates buffers that contain multiple activity records.
* This function steps to the next activity record.
*
* @param[in] record Pointer to ac activity record in a memory pool buffer.
*
* @param[out] next Pointer to the following activity record in the memory pool
* buffer.
*
* @retval ::ROCTRACER_STATUS_SUCCESS The function has been executed
* successfully.
*/
ROCTRACER_API roctracer_status_t
roctracer_next_record(const activity_record_t* record,
const activity_record_t** next) ROCTRACER_VERSION_4_1;
/**
* Memory pool allocator callback.
*
* If \p *ptr is NULL, then allocate memory of \p size bytes and save address
* in \p *ptr.
*
* If \p *ptr is non-NULL and size is non-0, then reallocate the memory at \p
* *ptr with size \p size and save the address in \p *ptr. The memory will have
* been allocated by the same callback.
*
* If \p *ptr is non-NULL and size is 0, then deallocate the memory at \p *ptr.
* The memory will have been allocated by the same callback.
*
* \p size is the size of the memory allocation or reallocation, or 0 if
* deallocating.
*
* \p arg Argument provided in the ::roctracer_properties_t passed to the
* ::roctracer_open_pool function.
*/
typedef void (*roctracer_allocator_t)(char** ptr, size_t size, void* arg);
/**
* Memory pool buffer callback.
*
* The callback that will be invoked when a memory pool buffer becomes full or
* is flushed.
*
* \p begin pointer to first entry entry in the buffer.
*
* \p end pointer to one past the end entry in the buffer.
*
* \p arg the argument specified when the callback was defined.
*/
typedef void (*roctracer_buffer_callback_t)(const char* begin, const char* end,
void* arg);
/**
* Memory pool properties.
*
* Defines the properties when a tracer memory pool is created.
*/
typedef struct {
/**
* ROC Tracer mode.
*/
uint32_t mode;
/**
* Size of buffer in bytes.
*/
size_t buffer_size;
/**
* The allocator function to use to allocate and deallocate the buffer. If
* NULL then \p malloc, \p realloc, and \p free are used.
*/
roctracer_allocator_t alloc_fun;
/**
* The argument to pass when invoking the \p alloc_fun allocator.
*/
void* alloc_arg;
/**
* The function to call when a buffer becomes full or is flushed.
*/
roctracer_buffer_callback_t buffer_callback_fun;
/**
* The argument to pass when invoking the \p buffer_callback_fun callback.
*/
void* buffer_callback_arg;
} roctracer_properties_t;
/**
* Tracer memory pool type.
*/
typedef void roctracer_pool_t;
/**
* Create tracer memory pool.
*
* If \p pool is not NULL, returns the created memory pool. Does not change the
* default memory pool.
*
* If \p pool is NULL, sets the default memory pool to the created pool if not
* already defined. Otherwise, return an error.
*
* @param[in] properties Tracer memory pool properties.
*
* @param[out] pool Tracer memory pool created if not NULL.
*
* @retval ::ROCTRACER_STATUS_SUCCESS The function has been executed
* successfully.
*
* @retval ROCTRACER_STATUS_ERROR_DEFAULT_POOL_ALREADY_DEFINED \p pool is NULL
* and the default pool is already defined. Unable to create the pool.
*
* @retval ROCTRACER_STATUS_ERROR_MEMORY_ALLOCATION Unable to allocate memory
* for the \p pool. Unable to create the pool.
*/
ROCTRACER_API roctracer_status_t
roctracer_open_pool_expl(const roctracer_properties_t* properties,
roctracer_pool_t** pool) ROCTRACER_VERSION_4_1;
/**
* Create tracer memory pool.
*
* Sets the default memory pool to the created pool if not already defined.
* Otherwise, return an error.
*
* @param[in] properties Tracer memory pool properties.
*
* @retval ::ROCTRACER_STATUS_SUCCESS The function has been executed
* successfully.
*
* @retval ROCTRACER_STATUS_ERROR_DEFAULT_POOL_ALREADY_DEFINED The default pool
* is already defined. Unable to create the pool.
*
* @retval ROCTRACER_STATUS_ERROR_MEMORY_ALLOCATION Unable to allocate memory
* for the \p pool. Unable to create the pool.
*/
ROCTRACER_API roctracer_status_t roctracer_open_pool(
const roctracer_properties_t* properties) ROCTRACER_VERSION_4_1;
/**
* Close tracer memory pool.
*
* All enabled activities that use the pool must have completed writing to the
* pool, before deleting the pool. Deleting a pool automatically disables any
* activities that specify the pool, and flushes it.
*
* @param[in] pool Memory pool to close. If NULL, the default memory pool is
* closed if defined. The default memory pool is set to undefined if closed.
*
* @retval ::ROCTRACER_STATUS_SUCCESS The function has been executed
* successfully or pool was NULL and there is no default pool.
*/
ROCTRACER_API roctracer_status_t
roctracer_close_pool_expl(roctracer_pool_t* pool) ROCTRACER_VERSION_4_1;
/**
* Close default tracer memory pool, if defined, and set to undefined.
*
* All enabled activities that use the pool must have completed writing to the
* pool, before deleting the pool. Deleting a pool automatically disables any
* activities that specify the pool, and flushes it.
*
* @retval ::ROCTRACER_STATUS_SUCCESS The function has been executed
* successfully or there is no default pool.
*/
ROCTRACER_API roctracer_status_t roctracer_close_pool() ROCTRACER_VERSION_4_1;
/**
* Query and set the default memory pool.
*
* @param[in] pool If not NULL, change the current default pool to \p pool. If
* NULL, the default pool is not changed.
*
* @return Return the current default memory pool before any change, or NULL if
* none is defined.
*/
ROCTRACER_API roctracer_pool_t* roctracer_default_pool_expl(
roctracer_pool_t* pool) ROCTRACER_VERSION_4_1;
/**
* Query the current default memory pool.
*
* @return Return the current default memory pool, or NULL is none is defined.
*/
ROCTRACER_API roctracer_pool_t* roctracer_default_pool() ROCTRACER_VERSION_4_1;
/**
* Enable activity record logging for a specified operation of a domain
* providing a memory pool.
*
* @param[in] domain The domain.
*
* @param[in] op The activity operation ID in \p domain.
*
* @param[in] pool The memory pool to write the activity record. If NULL, use
* the default memory pool.
*
* @retval ::ROCTRACER_STATUS_SUCCESS The function has been executed
* successfully.
*
* @retval ROCTRACER_STATUS_ERROR \p pool is NULL and no default pool is
* defined.
*/
ROCTRACER_API roctracer_status_t roctracer_enable_op_activity_expl(
activity_domain_t domain, uint32_t op,
roctracer_pool_t* pool) ROCTRACER_VERSION_4_1;
/**
* Enable activity record logging for a specified operation of a domain using
* the default memory pool.
*
* @param[in] domain The domain.
*
* @param[in] op The activity operation ID in \p domain.
*
* @retval ::ROCTRACER_STATUS_SUCCESS The function has been executed
* successfully.
*
* @retval ROCTRACER_STATUS_ERROR No default pool is defined.
*/
ROCTRACER_API roctracer_status_t roctracer_enable_op_activity(
activity_domain_t domain, uint32_t op) ROCTRACER_VERSION_4_1;
/**
* Enable activity record logging for all operations of a domain providing a
* memory pool.
*
* @param[in] domain The domain.
*
* @param[in] pool The memory pool to write the activity record. If NULL, use
* the default memory pool.
*
* @retval ::ROCTRACER_STATUS_SUCCESS The function has been executed
* successfully.
*
* @retval ROCTRACER_STATUS_ERROR \p pool is NULL and no default pool is
* defined.
*/
ROCTRACER_API roctracer_status_t roctracer_enable_domain_activity_expl(
activity_domain_t domain, roctracer_pool_t* pool) ROCTRACER_VERSION_4_1;
/**
* Enable activity record logging for all operations of a domain using the
* default memory pool.
*
* @param[in] domain The domain.
*
* @retval ::ROCTRACER_STATUS_SUCCESS The function has been executed
* successfully.
*
* @retval ROCTRACER_STATUS_ERROR No default pool is defined.
*/
ROCTRACER_API roctracer_status_t roctracer_enable_domain_activity(
activity_domain_t domain) ROCTRACER_VERSION_4_1;
/**
* Disable activity record logging for a specified operation of a domain.
*
* @param[in] domain The domain.
*
* @param[in] op The activity operation ID in \p domain.
*
* @retval ::ROCTRACER_STATUS_SUCCESS The function has been executed
* successfully.
*/
ROCTRACER_API roctracer_status_t roctracer_disable_op_activity(
activity_domain_t domain, uint32_t op) ROCTRACER_VERSION_4_1;
/**
* Disable activity record logging for all operations of a domain.
*
* @param[in] domain The domain.
*
* @retval ::ROCTRACER_STATUS_SUCCESS The function has been executed
* successfully.
*/
ROCTRACER_API roctracer_status_t roctracer_disable_domain_activity(
activity_domain_t domain) ROCTRACER_VERSION_4_1;
/**
* Flush available activity records for a memory pool.
*
* If flushing encounters an activity record still being written, flushing
* stops. Use a subsequent flush when the record has completed being written to
* resume the flush.
*
* @param[in] pool The memory pool to flush. If NULL, flushes the default
* memory pool.
*
* @retval ::ROCTRACER_STATUS_SUCCESS The function has been executed
* successfully.
*/
ROCTRACER_API roctracer_status_t
roctracer_flush_activity_expl(roctracer_pool_t* pool) ROCTRACER_VERSION_4_1;
/**
* Flush available activity records for the default memory pool.
*
* If flushing encounters an activity record still being written, flushing
* stops. Use a subsequent flush when the record has completed being written to
* resume the flush.
*
* @retval ::ROCTRACER_STATUS_SUCCESS The function has been executed
* successfully.
*/
ROCTRACER_API roctracer_status_t roctracer_flush_activity()
ROCTRACER_VERSION_4_1;
/** @} */
/** \defgroup timestamp_group Timestamp Operations
*
*
*
* @{
*/
/**
* Get the system clock timestamp.
*
* @param[out] timestamp The system clock timestamp in nano seconds.
*
* @retval ::ROCTRACER_STATUS_SUCCESS The function has been executed
* successfully.
*/
ROCTRACER_API roctracer_status_t roctracer_get_timestamp(
roctracer_timestamp_t* timestamp) ROCTRACER_VERSION_4_1;
/** @} */
#ifdef __cplusplus
} /* extern "C" block */
#endif /* __cplusplus */
#endif /* ROCTRACER_H_ */
+81
Féach ar an gComhad
@@ -0,0 +1,81 @@
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
////////////////////////////////////////////////////////////////////////////////
//
// ROC Tracer Extension API
//
// The API provides functionality for application annotation with event and
// external ranges correlation
//
////////////////////////////////////////////////////////////////////////////////
#ifndef ROCTRACER_EXT_H_
#define ROCTRACER_EXT_H_
#include "roctracer.h"
/* Extension API opcodes */
typedef enum {
ACTIVITY_EXT_OP_MARK = 0,
ACTIVITY_EXT_OP_EXTERN_ID = 1
} activity_ext_op_t;
typedef void (*roctracer_start_cb_t)();
typedef void (*roctracer_stop_cb_t)();
typedef struct {
roctracer_start_cb_t start_cb;
roctracer_stop_cb_t stop_cb;
} roctracer_ext_properties_t;
#ifdef __cplusplus
extern "C" {
#endif // __cplusplus
////////////////////////////////////////////////////////////////////////////////
// Application annotation API
// Tracing start API
void ROCTRACER_API roctracer_start() ROCTRACER_VERSION_4_1;
// Tracing stop API
void ROCTRACER_API roctracer_stop() ROCTRACER_VERSION_4_1;
////////////////////////////////////////////////////////////////////////////////
// External correlation id API
// Notifies that the calling thread is entering an external API region.
// Push an external correlation id for the calling thread.
roctracer_status_t ROCTRACER_API
roctracer_activity_push_external_correlation_id(activity_correlation_id_t id)
ROCTRACER_VERSION_4_1;
// Notifies that the calling thread is leaving an external API region.
// Pop an external correlation id for the calling thread.
// 'lastId' returns the last external correlation if not NULL
roctracer_status_t ROCTRACER_API
roctracer_activity_pop_external_correlation_id(
activity_correlation_id_t* last_id) ROCTRACER_VERSION_4_1;
#ifdef __cplusplus
} // extern "C" block
#endif // __cplusplus
#endif // ROCTRACER_EXT_H_
+24
Féach ar an gComhad
@@ -0,0 +1,24 @@
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#pragma message( \
"This file has been deprecated and marked for removal. Please use roctracer_hip.h instead.")
#include "roctracer_hip.h"
+38
Féach ar an gComhad
@@ -0,0 +1,38 @@
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#ifndef ROCTRACER_HIP_H_
#define ROCTRACER_HIP_H_
#include "roctracer.h"
#include <hip/hip_runtime.h>
#include <hip/hip_deprecated.h>
#include "hip_ostream_ops.h"
#include <hip/amd_detail/hip_prof_str.h>
typedef enum {
HIP_OP_ID_DISPATCH = 0,
HIP_OP_ID_COPY = 1,
HIP_OP_ID_BARRIER = 2,
HIP_OP_ID_NUMBER = 3
} hip_op_id_t;
#endif // ROCTRACER_HIP_H_
+112
Féach ar an gComhad
@@ -0,0 +1,112 @@
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#ifndef ROCTRACER_HSA_H_
#define ROCTRACER_HSA_H_
#include "roctracer.h"
#include <hsa/hsa.h>
#include <hsa/hsa_ext_amd.h>
#include "hsa_ostream_ops.h"
#include "hsa_prof_str.h"
// HSA OP ID enumeration
enum hsa_op_id_t {
HSA_OP_ID_DISPATCH = 0,
HSA_OP_ID_COPY = 1,
HSA_OP_ID_BARRIER = 2,
HSA_OP_ID_RESERVED1 = 3,
HSA_OP_ID_NUMBER
};
// HSA EVT ID enumeration
enum hsa_evt_id_t {
HSA_EVT_ID_ALLOCATE = 0, // Memory allocate callback
HSA_EVT_ID_DEVICE = 1, // Device assign callback
HSA_EVT_ID_MEMCOPY = 2, // Memcopy callback
HSA_EVT_ID_SUBMIT = 3, // Packet submission callback
HSA_EVT_ID_KSYMBOL = 4, // Loading/unloading of kernel symbol
HSA_EVT_ID_CODEOBJ = 5, // Loading/unloading of device code object
HSA_EVT_ID_NUMBER
};
struct hsa_ops_properties_t {
void* reserved1[4];
};
// HSA EVT data type
typedef struct {
union {
struct {
const void* ptr; // allocated area ptr
size_t size; // allocated area size, zero size means 'free' callback
hsa_amd_segment_t segment; // allocated area's memory segment type
hsa_amd_memory_pool_global_flag_t
global_flag; // allocated area's memory global flag
int is_code; // equal to 1 if code is allocated
} allocate;
struct {
hsa_device_type_t type; // type of assigned device
uint32_t id; // id of assigned device
hsa_agent_t agent; // device HSA agent handle
const void* ptr; // ptr the device is assigned to
} device;
struct {
const void* dst; // memcopy dst ptr
const void* src; // memcopy src ptr
size_t size; // memcopy size bytes
} memcopy;
struct {
const void* packet; // submitted to GPU packet
const char*
kernel_name; // kernel name, NULL if not a kernel dispatch packet
hsa_queue_t* queue; // HSA queue the packet was submitted to
uint32_t device_type; // type of device the packet is submitted to
uint32_t device_id; // id of device the packet is submitted to
} submit;
struct {
uint64_t object; // kernel symbol object
const char* name; // kernel symbol name
uint32_t name_length; // kernel symbol name length
int unload; // symbol executable destroy
} ksymbol;
struct {
uint32_t storage_type; // code object storage type
int storage_file; // origin file descriptor
uint64_t memory_base; // origin memory base
uint64_t memory_size; // origin memory size
uint64_t load_base; // code object load base
uint64_t load_size; // code object load size
uint64_t load_delta; // code object load size
uint32_t uri_length; // URI string length (not including the terminating
// NUL character)
const char* uri; // URI string
int unload; // unload flag
} codeobj;
};
} hsa_evt_data_t;
#endif // ROCTRACER_HSA_H_
+137
Féach ar an gComhad
@@ -0,0 +1,137 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
/** \section roctracer_plugin_api ROCtracer Plugin API
*
* The ROCtracer Plugin API is used by the ROCtracer Tool to output all tracing
* information. Different implementations of the ROCtracer Plugin API can be
* developed that output the tracing data in different formats.
* The ROCtracer Tool can be configured to load a specific library that
* supports the user desired format.
*
* The API is not thread safe. It is the responsibility of the ROCtracer Tool
* to ensure the operations are synchronized and not called concurrently. There
* is no requirement for the ROCtracer Tool to report trace data in any
* specific order. If the format supported by plugin requires specific
* ordering, it is the responsibility of the plugin implementation to perform
* any necessary sorting.
*/
/**
* \file
* ROCtracer Tool Plugin API interface.
*/
#ifndef ROCTRACER_PLUGIN_H_
#define ROCTRACER_PLUGIN_H_
#include "roctracer.h"
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
/** \defgroup initialization_group Initialization and Finalization
*
* The ROCtracer Plugin API must be initialized before using any of the
* operations to report trace data, and finalized after the last trace data has
* been reported.
*
* @{
*/
/**
* Initialize plugin.
*
* Must be called before any other operation.
*
* @param[in] roctracer_major_version The major version of the ROCtracer API
* being used by the ROCtracer Tool. An error is reported if this does not
* match the major version of the ROCtracer API used to build the plugin
* library. This ensures compatibility of the trace data format.
*
* @param[in] roctracer_minor_version The minor version of the ROCtracer API
* being used by the ROCtracer Tool. An error is reported if the
* \p roctracer_major_version matches and this is greater than the minor
* version of the ROCtracer API used to build the plugin library. This ensures
* compatibility of the trace data format.
*
* @return Returns 0 on success and -1 on error.
*/
ROCTRACER_EXPORT int roctracer_plugin_initialize(
uint32_t roctracer_major_version, uint32_t roctracer_minor_version);
/**
* Finalize plugin.
*
* This must be called after ::roctracer_plugin_initialize and after all trace
* data has been reported by ::roctracer_plugin_write_callback_record and
* ::roctracer_plugin_write_activity_records.
*/
ROCTRACER_EXPORT void roctracer_plugin_finalize();
/** @} */
/** \defgroup trace_record_write_functions Trace data reporting
*
* Operations to output trace data.
*
* @{
*/
/**
* Report a single callback trace data.
*
* @param[in] record Primarily domain independent trace data.
*
* @param[in] callback_data Domain specific trace data. The type of this
* argument depends on the values of \p record.domain.
*
* @return Returns 0 on success and -1 on error.
*/
ROCTRACER_EXPORT int roctracer_plugin_write_callback_record(
const roctracer_record_t* record, const void* callback_data);
/**
* Report a range of activity trace data.
*
* Reports a range of primarily domain independent trace data. The range is
* specified by a pointer to the first record and a pointer to one past the
* last record. ::roctracer_next_record is used to iterate the range in forward
* order.
*
* @param[in] begin Pointer to the first record.
*
* @param[in] end Pointer to one past the last record.
*
* @return Returns 0 on success and -1 on error.
*/
ROCTRACER_EXPORT int roctracer_plugin_write_activity_records(
const roctracer_record_t* begin, const roctracer_record_t* end);
/** @} */
#ifdef __cplusplus
} /* extern "C" */
#endif /* __cplusplus */
#endif /* ROCTRACER_PLUGIN_H_ */
+67
Féach ar an gComhad
@@ -0,0 +1,67 @@
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#ifndef ROCTRACER_ROCTX_H_
#define ROCTRACER_ROCTX_H_
#include "roctx.h"
/**
* ROCTX API ID enumeration
*/
enum roctx_api_id_t {
ROCTX_API_ID_roctxMarkA = 0,
ROCTX_API_ID_roctxRangePushA = 1,
ROCTX_API_ID_roctxRangePop = 2,
ROCTX_API_ID_roctxRangeStartA = 3,
ROCTX_API_ID_roctxRangeStop = 4,
ROCTX_API_ID_NUMBER,
};
/**
* ROCTX callbacks data type
*/
typedef struct roctx_api_data_s {
union {
struct {
const char* message;
roctx_range_id_t id;
};
struct {
const char* message;
} roctxMarkA;
struct {
const char* message;
} roctxRangePushA;
struct {
const char* message;
} roctxRangePop;
struct {
const char* message;
roctx_range_id_t id;
} roctxRangeStartA;
struct {
const char* message;
roctx_range_id_t id;
} roctxRangeStop;
} args;
} roctx_api_data_t;
#endif /* ROCTRACER_ROCTX_H_ */
+229
Féach ar an gComhad
@@ -0,0 +1,229 @@
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
/** \mainpage ROCTX API Specification
*
* \section introduction Introduction
* ROCTX is a library that implements the AMD code annotation API. It provides
* the support necessary to annotate events and code ranges in applications.
*/
/**
* \file
* ROCTX API interface.
*/
#ifndef ROCTX_H_
#define ROCTX_H_ 1
/* Placeholder for calling convention and import/export macros */
#if !defined(ROCTX_CALL)
#define ROCTX_CALL
#endif /* !defined (ROCTX_CALL) */
#if !defined(ROCTX_EXPORT_DECORATOR)
#if defined(__GNUC__)
#define ROCTX_EXPORT_DECORATOR __attribute__((visibility("default")))
#elif defined(_MSC_VER)
#define ROCTX_EXPORT_DECORATOR __declspec(dllexport)
#endif /* defined (_MSC_VER) */
#endif /* !defined (ROCTX_EXPORT_DECORATOR) */
#if !defined(ROCTX_IMPORT_DECORATOR)
#if defined(__GNUC__)
#define ROCTX_IMPORT_DECORATOR
#elif defined(_MSC_VER)
#define ROCTX_IMPORT_DECORATOR __declspec(dllimport)
#endif /* defined (_MSC_VER) */
#endif /* !defined (ROCTX_IMPORT_DECORATOR) */
#define ROCTX_EXPORT ROCTX_EXPORT_DECORATOR ROCTX_CALL
#define ROCTX_IMPORT ROCTX_IMPORT_DECORATOR ROCTX_CALL
#if !defined(ROCTX)
#if defined(ROCTX_EXPORTS)
#define ROCTX_API ROCTX_EXPORT
#else /* !defined (ROCTX_EXPORTS) */
#define ROCTX_API ROCTX_IMPORT
#endif /* !defined (ROCTX_EXPORTS) */
#endif /* !defined (ROCTX) */
#include <stdint.h>
#if defined(__cplusplus)
extern "C" {
#endif /* defined(__cplusplus) */
/** \defgroup symbol_versions_group Symbol Versions
*
* The names used for the shared library versioned symbols.
*
* Every function is annotated with one of the version macros defined in this
* section. Each macro specifies a corresponding symbol version string. After
* dynamically loading the shared library with \p dlopen, the address of each
* function can be obtained using \p dlvsym with the name of the function and
* its corresponding symbol version string. An error will be reported by \p
* dlvsym if the installed library does not support the version for the
* function specified in this version of the interface.
*
* @{
*/
/**
* The function was introduced in version 4.1 of the interface and has the
* symbol version string of ``"ROCTX_4.1"``.
*/
#define ROCTX_VERSION_4_1
/** @} */
/** \defgroup versioning_group Versioning
*
* Version information about the interface and the associated installed
* library.
*
* @{
*/
/**
* The semantic version of the interface following
* [semver.org][semver] rules.
*
* A client that uses this interface is only compatible with the installed
* library if the major version numbers match and the interface minor version
* number is less than or equal to the installed library minor version number.
*/
/**
* The major version of the interface as a macro so it can be used by the
* preprocessor.
*/
#define ROCTX_VERSION_MAJOR 4
/**
* The minor version of the interface as a macro so it can be used by the
* preprocessor.
*/
#define ROCTX_VERSION_MINOR 1
/**
* Query the major version of the installed library.
*
* Return the major version of the installed library. This can be used to check
* if it is compatible with this interface version.
*
* \return Returns the major version number.
*/
ROCTX_API uint32_t roctx_version_major() ROCTX_VERSION_4_1;
/**
* Query the minor version of the installed library.
*
* Return the minor version of the installed library. This can be used to check
* if it is compatible with this interface version.
*
* \return Returns the minor version number.
*/
ROCTX_API uint32_t roctx_version_minor() ROCTX_VERSION_4_1;
/** @} */
/** \defgroup marker_group ROCTX Markers
*
* Marker annotations are used to describe events in a ROCm application.
*
* @{
*/
/**
* Mark an event.
*
* \param[in] message The message associated with the event.
*/
ROCTX_API void roctxMarkA(const char* message) ROCTX_VERSION_4_1;
#define roctxMark(message) roctxMarkA(message)
/** @} */
/** \defgroup range_group ROCTX Ranges
*
* Range annotations are used to describe events in a ROCm application.
*
* @{
*/
/**
* Start a new nested range.
*
* Nested ranges are stacked and local to the current CPU thread.
*
* \param[in] message The message associated with this range.
*
* \return Returns the level this nested range is started at. Nested range
* levels are 0 based.
*/
ROCTX_API int roctxRangePushA(const char* message) ROCTX_VERSION_4_1;
#define roctxRangePush(message) roctxRangePushA(message)
/**
* Stop the current nested range.
*
* Stop the current nested range, and pop it from the stack. If a nested range
* was active before the last one was started, it becomes again the current
* nested range.
*
* \return Returns the level the stopped nested range was started at, or a
* negative value if there was no nested range active.
*/
ROCTX_API int roctxRangePop() ROCTX_VERSION_4_1;
/**
* ROCTX range ID.
*
* This is the range ID used to identify start/end ranges.
*/
typedef uint64_t roctx_range_id_t;
/**
* Starts a process range.
*
* Start/stop ranges can be started and stopped in different threads. Each
* timespan is assigned a unique range ID.
*
* \param[in] message The message associated with this range.
*
* \return Returns the ID of the new range.
*/
ROCTX_API roctx_range_id_t roctxRangeStartA(const char* message)
ROCTX_VERSION_4_1;
#define roctxRangeStart(message) roctxRangeStartA(message)
/**
* Stop a process range.
*/
ROCTX_API void roctxRangeStop(roctx_range_id_t id) ROCTX_VERSION_4_1;
/** @} */
#if defined(__cplusplus)
} /* extern "C" */
#endif /* defined (__cplusplus) */
#endif /* ROCTX_H_ */
+23
Féach ar an gComhad
@@ -0,0 +1,23 @@
################################################################################
## Copyright (c) 2022 Advanced Micro Devices, Inc.
##
## Permission is hereby granted, free of charge, to any person obtaining a copy
## of this software and associated documentation files (the "Software"), to
## deal in the Software without restriction, including without limitation the
## rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
## sell copies of the Software, and to permit persons to whom the Software is
## furnished to do so, subject to the following conditions:
##
## The above copyright notice and this permission notice shall be included in
## all copies or substantial portions of the Software.
##
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
## FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
## IN THE SOFTWARE.
################################################################################
add_subdirectory(file)
+7
Féach ar an gComhad
@@ -0,0 +1,7 @@
{
global: roctracer_plugin_initialize;
roctracer_plugin_finalize;
roctracer_plugin_write_callback_record;
roctracer_plugin_write_activity_records;
local: *;
};
+55
Féach ar an gComhad
@@ -0,0 +1,55 @@
################################################################################
## Copyright (c) 2022 Advanced Micro Devices, Inc.
##
## Permission is hereby granted, free of charge, to any person obtaining a copy
## of this software and associated documentation files (the "Software"), to
## deal in the Software without restriction, including without limitation the
## rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
## sell copies of the Software, and to permit persons to whom the Software is
## furnished to do so, subject to the following conditions:
##
## The above copyright notice and this permission notice shall be included in
## all copies or substantial portions of the Software.
##
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
## FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
## IN THE SOFTWARE.
################################################################################
find_package(amd_comgr REQUIRED CONFIG
PATHS
/opt/rocm/
PATH_SUFFIXES
lib/cmake/amd_comgr
)
MESSAGE(STATUS "Code Object Manager found at ${amd_comgr_DIR}.")
file(GLOB FILE_SOURCES "*.cpp")
add_library(file_plugin ${LIBRARY_TYPE} ${FILE_SOURCES})
set_target_properties(file_plugin PROPERTIES
CXX_VISIBILITY_PRESET hidden
LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/../exportmap
LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}
INSTALL_RPATH "${ROCM_APPEND_PRIVLIB_RPATH}")
target_compile_definitions(file_plugin
PRIVATE HIP_PROF_HIP_API_STRING=1 __HIP_PLATFORM_AMD__=1)
target_include_directories(file_plugin PRIVATE ${PROJECT_SOURCE_DIR}/inc)
target_link_options(file_plugin PRIVATE -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../exportmap -Wl,--no-undefined)
target_link_libraries(file_plugin PRIVATE util roctracer amd_comgr hsa-runtime64::hsa-runtime64 stdc++fs amd_comgr)
install(TARGETS file_plugin LIBRARY
DESTINATION ${CMAKE_INSTALL_LIBDIR}/${PROJECT_NAME}
COMPONENT runtime)
install(TARGETS file_plugin LIBRARY
DESTINATION ${CMAKE_INSTALL_LIBDIR}/${PROJECT_NAME}
COMPONENT asan)
+399
Féach ar an gComhad
@@ -0,0 +1,399 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#include "debug.h"
#include <roctracer_ext.h>
#include <roctracer_hip.h>
#include <roctracer_hsa.h>
#include <roctracer_plugin.h>
#include <roctracer_roctx.h>
#include <cstddef>
#include <cstdint>
#include <experimental/filesystem>
#include <fstream>
#include <memory>
#include <optional>
#include <ostream>
#include <sstream>
#include <string>
#include <amd_comgr/amd_comgr.h>
#include <stdarg.h>
#include <stdio.h>
#include <string.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <unistd.h>
#include <cassert>
// Macro to check ROCtracer calls status
#define CHECK_ROCTRACER(call) \
do { \
if ((call) != 0) fatal("%s", roctracer_error_string()); \
} while (false)
namespace fs = std::experimental::filesystem;
namespace {
uint32_t GetPid() {
static uint32_t pid = syscall(__NR_getpid);
return pid;
}
/* The function extracts the kernel name from
input string. By using the iterators it finds the
window in the string which contains only the kernel name.
For example 'Foo<int, float>::foo(a[], int (int))' -> 'foo'*/
std::string truncate_name(const std::string& name) {
auto rit = name.rbegin();
auto rend = name.rend();
uint32_t counter = 0;
char open_token = 0;
char close_token = 0;
while (rit != rend) {
if (counter == 0) {
switch (*rit) {
case ')':
counter = 1;
open_token = ')';
close_token = '(';
break;
case '>':
counter = 1;
open_token = '>';
close_token = '<';
break;
case ']':
counter = 1;
open_token = ']';
close_token = '[';
break;
case ' ':
++rit;
continue;
}
if (counter == 0) break;
} else {
if (*rit == open_token) counter++;
if (*rit == close_token) counter--;
}
++rit;
}
auto rbeg = rit;
while ((rit != rend) && (*rit != ' ') && (*rit != ':')) rit++;
return name.substr(rend - rit, rit - rbeg);
}
#define amd_comgr_(call) \
do { \
if (amd_comgr_status_t status = amd_comgr_##call; status != AMD_COMGR_STATUS_SUCCESS) { \
const char* reason = ""; \
amd_comgr_status_string(status, &reason); \
fatal(#call " failed: %s", reason); \
} \
} while (false)
// C++ symbol demangle
std::string cxx_demangle(const std::string& symbol) {
amd_comgr_data_t mangled_data;
amd_comgr_(create_data(AMD_COMGR_DATA_KIND_BYTES, &mangled_data));
amd_comgr_(set_data(mangled_data, symbol.size(), symbol.data()));
amd_comgr_data_t demangled_data;
amd_comgr_(demangle_symbol_name(mangled_data, &demangled_data));
size_t demangled_size = 0;
amd_comgr_(get_data(demangled_data, &demangled_size, nullptr));
std::string demangled_str;
demangled_str.resize(demangled_size);
amd_comgr_(get_data(demangled_data, &demangled_size, demangled_str.data()));
amd_comgr_(release_data(mangled_data));
amd_comgr_(release_data(demangled_data));
return demangled_str;
}
class file_plugin_t {
private:
class output_file_t {
public:
output_file_t(std::string name) : name_(std::move(name)) {}
std::string name() const { return name_; }
template <typename T> std::ostream& operator<<(T&& value) {
if (!is_open()) open();
return stream_ << std::forward<T>(value);
}
std::ostream& operator<<(std::ostream& (*func)(std::ostream&)) {
if (!is_open()) open();
return stream_ << func;
}
void open() {
// If the stream is already in the failed state, there's no need to try to open the file.
if (fail()) return;
const char* output_dir = getenv("ROCP_OUTPUT_DIR");
if (output_dir == nullptr) {
stream_.copyfmt(std::cout);
stream_.clear(std::cout.rdstate());
stream_.basic_ios<char>::rdbuf(std::cout.rdbuf());
return;
}
fs::path output_prefix(output_dir);
if (!fs::is_directory(fs::status(output_prefix))) {
if (!stream_.fail()) warning("Cannot open output directory '%s'", output_dir);
stream_.setstate(std::ios_base::failbit);
return;
}
std::stringstream ss;
ss << GetPid() << "_" << name_;
stream_.open(output_prefix / ss.str());
}
bool is_open() const { return stream_.is_open(); }
bool fail() const { return stream_.fail(); }
private:
const std::string name_;
std::ofstream stream_;
};
output_file_t* get_output_file(uint32_t domain, uint32_t op = 0) {
switch (domain) {
case ACTIVITY_DOMAIN_ROCTX:
return &roctx_file_;
case ACTIVITY_DOMAIN_HSA_API:
return &hsa_api_file_;
case ACTIVITY_DOMAIN_HIP_API:
return &hip_api_file_;
case ACTIVITY_DOMAIN_HIP_OPS:
return &hip_activity_file_;
case ACTIVITY_DOMAIN_HSA_OPS:
if (op == HSA_OP_ID_COPY) {
return &hsa_async_copy_file_;
} else if (op == HSA_OP_ID_RESERVED1) {
return &pc_sample_file_;
}
default:
assert(!"domain/op not supported!");
break;
}
return nullptr;
}
public:
file_plugin_t() {
// Dumping HSA handles for agents
output_file_t hsa_handles("hsa_handles.txt");
[[maybe_unused]] hsa_status_t status = hsa_iterate_agents(
[](hsa_agent_t agent, void* user_data) {
auto* file = static_cast<decltype(hsa_handles)*>(user_data);
hsa_device_type_t type;
if (hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &type) != HSA_STATUS_SUCCESS)
return HSA_STATUS_ERROR;
*file << std::hex << std::showbase << agent.handle << " agent "
<< ((type == HSA_DEVICE_TYPE_CPU) ? "cpu" : "gpu") << "\n";
return HSA_STATUS_SUCCESS;
},
&hsa_handles);
assert(status == HSA_STATUS_SUCCESS && "failed to iterate HSA agents");
if (hsa_handles.fail()) {
warning("Cannot write to '%s'", hsa_handles.name().c_str());
return;
}
// App begin timestamp begin_ts_file.txt
output_file_t begin_ts("begin_ts_file.txt");
roctracer_timestamp_t app_begin_timestamp;
CHECK_ROCTRACER(roctracer_get_timestamp(&app_begin_timestamp));
begin_ts << std::dec << app_begin_timestamp << "\n";
if (begin_ts.fail()) {
warning("Cannot write to '%s'", begin_ts.name().c_str());
return;
}
valid_ = true;
}
int write_callback_record(const roctracer_record_t* record, const void* callback_data) {
std::stringstream ss;
output_file_t* output_file{nullptr};
switch (record->domain) {
case ACTIVITY_DOMAIN_ROCTX: {
const roctx_api_data_t* data = reinterpret_cast<const roctx_api_data_t*>(callback_data);
output_file = get_output_file(ACTIVITY_DOMAIN_ROCTX);
ss << std::dec << record->begin_ns << " " << record->process_id << ":" << record->thread_id
<< " " << record->op << ":" << data->args.id << ":\""
<< (data->args.message ? data->args.message : "") << "\""
<< "\n";
*output_file << ss.str();
break;
}
case ACTIVITY_DOMAIN_HSA_API: {
const hsa_api_data_t* data = reinterpret_cast<const hsa_api_data_t*>(callback_data);
output_file = get_output_file(ACTIVITY_DOMAIN_HSA_API);
ss << std::dec << record->begin_ns << ":"
<< ((record->op == HSA_API_ID_hsa_shut_down) ? record->begin_ns : record->end_ns) << " "
<< record->process_id << ":" << record->thread_id << " "
<< hsa_api_data_pair_t(record->op, *data) << " :" << std::dec << data->correlation_id
<< "\n";
*output_file << ss.str();
break;
}
case ACTIVITY_DOMAIN_HIP_API: {
const hip_api_data_t* data = reinterpret_cast<const hip_api_data_t*>(callback_data);
std::string kernel_name;
if (record->kernel_name) {
static bool truncate = []() {
const char* env_var = getenv("ROCP_TRUNCATE_NAMES");
return env_var && std::atoi(env_var) != 0;
}();
kernel_name = cxx_demangle(record->kernel_name);
if (truncate) kernel_name = truncate_name(kernel_name);
kernel_name = " kernel=" + kernel_name;
}
output_file = get_output_file(ACTIVITY_DOMAIN_HIP_API);
ss << std::dec << record->begin_ns << ":" << record->end_ns << " " << record->process_id
<< ":" << record->thread_id << " " << hipApiString((hip_api_id_t)record->op, data)
<< kernel_name << " :" << std::dec << data->correlation_id << "\n";
*output_file << ss.str();
break;
}
default:
warning("write_callback_record: ignored record for domain %d", record->domain);
break;
}
return (output_file && output_file->fail()) ? -1 : 0;
}
int write_activity_records(const roctracer_record_t* begin, const roctracer_record_t* end) {
while (begin != end) {
std::stringstream ss;
output_file_t* output_file{nullptr};
const char* name = roctracer_op_string(begin->domain, begin->op, begin->kind);
switch (begin->domain) {
case ACTIVITY_DOMAIN_HIP_OPS: {
// The post-processing script cannot handle HIP ops without a correlation ID. The
// correlation ID is needed to connect the record to a HIP stream and originating thread.
// The script could be modified to handle ops without correlation IDs, but for backward
// compatibilty, we are simply dropping the records here.
if (begin->correlation_id == 0) break;
output_file = get_output_file(ACTIVITY_DOMAIN_HIP_OPS);
ss << std::dec << begin->begin_ns << ":" << begin->end_ns << " " << begin->device_id
<< ":" << begin->queue_id << " "
<< ((begin->op == HIP_OP_ID_DISPATCH && begin->kernel_name != nullptr)
? truncate_name(cxx_demangle(begin->kernel_name))
: name)
<< ":" << begin->correlation_id << ":" << GetPid() << "\n";
*output_file << ss.str();
break;
}
case ACTIVITY_DOMAIN_HSA_OPS:
output_file = get_output_file(ACTIVITY_DOMAIN_HSA_OPS, begin->op);
if (begin->op == HSA_OP_ID_COPY) {
ss << std::dec << begin->begin_ns << ":" << begin->end_ns
<< " async-copy:" << begin->correlation_id << ":" << GetPid() << "\n";
*output_file << ss.str();
break;
} else if (begin->op == HSA_OP_ID_RESERVED1) {
ss << std::dec << begin->pc_sample.se << " " << begin->pc_sample.cycle << " "
<< std::hex << std::showbase << begin->pc_sample.pc << " " << name << "\n";
*output_file << ss.str();
break;
}
[[fallthrough]];
default: {
warning("write_activity_records: ignored activity for domain %d", begin->domain);
break;
}
}
if (output_file && output_file->fail()) return -1;
CHECK_ROCTRACER(roctracer_next_record(begin, &begin));
}
return 0;
}
bool is_valid() const { return valid_; }
private:
bool valid_{false};
output_file_t roctx_file_{"roctx_trace.txt"}, hsa_api_file_{"hsa_api_trace.txt"},
hip_api_file_{"hip_api_trace.txt"}, hip_activity_file_{"hcc_ops_trace.txt"},
hsa_async_copy_file_{"async_copy_trace.txt"}, pc_sample_file_{"pcs_trace.txt"};
};
file_plugin_t* file_plugin = nullptr;
} // namespace
ROCTRACER_EXPORT int roctracer_plugin_initialize(uint32_t roctracer_major_version,
uint32_t roctracer_minor_version) {
if (roctracer_major_version != ROCTRACER_VERSION_MAJOR ||
roctracer_minor_version < ROCTRACER_VERSION_MINOR)
return -1;
if (file_plugin != nullptr) return -1;
file_plugin = new file_plugin_t();
if (file_plugin->is_valid()) return 0;
// The plugin failed to initialied, destroy it and return an error.
delete file_plugin;
file_plugin = nullptr;
return -1;
}
ROCTRACER_EXPORT void roctracer_plugin_finalize() {
if (!file_plugin) return;
delete file_plugin;
file_plugin = nullptr;
}
ROCTRACER_EXPORT int roctracer_plugin_write_callback_record(const roctracer_record_t* record,
const void* callback_data) {
if (!file_plugin || !file_plugin->is_valid()) return -1;
return file_plugin->write_callback_record(record, callback_data);
}
ROCTRACER_EXPORT int roctracer_plugin_write_activity_records(const roctracer_record_t* begin,
const roctracer_record_t* end) {
if (!file_plugin || !file_plugin->is_valid()) return -1;
return file_plugin->write_activity_records(begin, end);
}
+37
Féach ar an gComhad
@@ -0,0 +1,37 @@
#!/bin/bash
################################################################################
# Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to
# deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
# sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
################################################################################
ROCM_PATH="${ROCM_PATH:=/opt/rocm}"
fatal() {
echo "$1"
exit 1
}
if [ -z "$BUILD_DIR" ] ; then export BUILD_DIR=$PWD; fi
cd $BUILD_DIR
./run.sh
exit 0
+320
Féach ar an gComhad
@@ -0,0 +1,320 @@
#!/usr/bin/env python3
################################################################################
# Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to
# deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
# sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
################################################################################
import sys, os, re
import filecmp
import argparse
events_count = {}
events_order = {}
events_order_r = {}
trace2info = {}
trace2info_filename = 'test/golden_traces/tests_trace_cmp_levels.txt'
# Parses trace comparison config file and stores the info in a dictionary
def parse_trace_levels(trace_config_filename, check_trace_flag):
status = 0
f = open(trace_config_filename)
trace2info = {}
for line in f:
if check_trace_flag == 0:
return (trace2info, status)
if (check_trace_flag == None) and re.match('^# dummy',line):
return (trace2info, status)
status = 1
lis = line.split(' ')
trace_name = lis[0]
comp_level = lis[1]
no_events_cnt = ''
events2ignore = ''
events2chkcnt = ''
events2chkord = ''
events2ch = ''
for l in lis:
if no_events_cnt == ' ':
no_events_cnt = l
if events2ignore == ' ':
events2ignore = l
if events2chkcnt == ' ':
events2chkcnt = l
if events2chkord == ' ':
events2chkord = l
if events2ch == ' ':
events2ch = l
events2chkcnt = l
no_events_cnt = l
if l == '--ignore-count':
no_events_cnt = ' '
if l == '--ignore-event':
events2ignore = ' '
if l == '--check-count':
events2chkcnt = ' '
if l == '--check-order':
events2chkord = ' '
if l == '--check-events':
events2ch = ' '
trace2info[trace_name] = (comp_level,no_events_cnt,events2ignore,events2chkcnt,events2chkord,events2ch)
return (trace2info, status)
# diff multi lines strings to show events differences
def diff_strings(cnt_r, cnt, metric):
global events_order_r
global events_order
print ("\nDiffs (if any):\n")
if metric == 'cnt':
evt_ptrn = re.compile(r'(\w+).*$')
#cnt_ptrn = re.compile(r'(\w+): count (\d+)$')
for evt in cnt_r.split('\n'):
mevt_ptrn = evt_ptrn.match(evt)
#mcnt_ptrn = cnt_ptrn.match(evt)
if mevt_ptrn:
if not re.search(mevt_ptrn.group(1), cnt):
print ('+ ' + evt)
elif not re.search(evt, cnt):
print ('>D< ' + evt)
for evt in cnt.split('\n'):
mevt_ptrn = evt_ptrn.match(evt)
#mcnt_ptrn = cnt_ptrn.match(evt)
if mevt_ptrn:
if not re.search(mevt_ptrn.group(1), cnt_r):
print ('- ' + evt)
if metric == 'or':
cnt_tid_r = 0
for tid_r in sorted (events_order_r.keys()):
if len(events_order) == 0:
print ("+ " + str(events_order_r[tid_r]) + "\n\n")
continue
cnt_tid = 0
for tid in sorted (events_order.keys()):
if cnt_tid == cnt_tid_r:
if events_order_r[tid_r] != events_order[tid]:
#print (">D< " + str(events_order_r[tid_r]) + "\n")
#print (">D< " + str(events_order[tid]) + "\n\n")
diff_cnt_r = 0
found_diff_evt = 0
for evt in events_order_r[tid_r]:
diff_cnt = 0
for evt2 in events_order[tid]:
if diff_cnt == diff_cnt_r:
if evt != evt2:
print (">I< Difference starts at tid rank: " + str(cnt_tid) + " event index: " + str(diff_cnt_r) + ", tid_r " + str(tid_r) + ", tid " + str(tid) + ", with evts " + evt + " and " + evt2 + "\n")
found_diff_evt = 1
break
diff_cnt += 1
diff_cnt_r += 1
if found_diff_evt: break
if len(events_order_r[tid_r]) != len(events_order[tid]) and found_diff_evt == 0:
print (">I< Difference starts at tid rank: " + str(cnt_tid) + " event index: " + str(min(len(events_order_r[tid_r]), len(events_order[tid]))) + ", with missing evts\n")
break
cnt_tid += 1
cnt_tid_r += 1
if len(events_order_r) == 0:
for tid in sorted (events_order.keys()):
print ("- " + str(events_order[tid]) + "\n")
# check trace againt golden reference and returns 0 for pass, 1 for fail
def check_trace_status(tracename, verbose, check_trace_flag):
global events_order_r
global events_order
(trace2info, status) = parse_trace_levels(trace2info_filename, check_trace_flag)
if len(trace2info) == 0:
if status == 1:
print ("Error: no trace comparison info found in config file " + trace2info_filename + "\n")
print('FAILED!')
return 1
if status == 0:
print('PASSED!')
return 0
trace = 'test/golden_traces/' + tracename + '.txt'
rtrace = '/tmp/test/out/' + tracename + '.out'
if os.path.basename(tracename) in trace2info.keys():
(trace_level, no_events_cnt, events2ignore, events2chkcnt, events2chkord, events2ch) = trace2info[os.path.basename(tracename)]
trace_level = trace_level.rstrip('\n')
no_events_cnt = no_events_cnt.rstrip('\n')
events2ignore = events2ignore.rstrip('\n')
events2chkcnt = events2chkcnt.rstrip('\n')
events2chkord = events2chkord.rstrip('\n')
events2ch = events2ch.rstrip('\n')
else:
print('Trace ' + os.path.basename(tracename) + ' not found in ' + trace2info_filename)
print('FAILED!')
return 1
if no_events_cnt == '':
no_events_cnt = 'empty-regex'
if events2ignore == '':
events2ignore = 'empty-regex'
if events2chkcnt == '':
events2chkcnt = ''
if events2chkord == '':
events2chkord = ''
if trace_level == '--check-none':
print('PASSED!')
return 0
if trace_level == '--check-diff':
if filecmp.cmp(trace,rtrace):
print('PASSED!')
return 0
else:
print('FAILED!')
os.system('/usr/bin/diff --brief ' + trace + ' ' + rtrace)
return 1
metric = ''
if trace_level == '--check-count' or trace_level == '--check-events':
metric = 'cnt'
if trace_level == '--check-order':
metric = 'or'
cnt_r = gen_events_info(rtrace,trace_level,no_events_cnt,events2ignore,events2chkcnt,events2chkord,verbose)
events_order_r = {}
for tid in sorted (events_order.keys()) :
events_order_r[tid] = events_order[tid]
cnt = gen_events_info(trace,trace_level,no_events_cnt,events2ignore,events2chkcnt,events2chkord,verbose)
if verbose:
print ('\n' + rtrace + ':\n')
print (cnt_r)
print ('\n' + trace + ':\n')
print (cnt)
diff_strings(cnt_r, cnt, metric)
if cnt_r == cnt:
print('PASSED!')
return 0
else:
print('FAILED!')
return 1
# Parses roctracer trace file for regression purpose
# and generates events count per event (when cnt is on) or events order per tid (when order is on)
def gen_events_info(tracefile, trace_level, no_events_cnt, events2ignore, events2chkcnt, events2chkord, verbose):
global events_order
metric = ''
if trace_level == '--check-count' or trace_level == '--check-events':
metric = 'cnt'
if trace_level == '--check-order':
metric = 'or'
events_count = {}
events_order = {}
res = ''
re_no_events_cnt = r'{}'.format(no_events_cnt)
re_events2ignore = r'{}'.format(events2ignore)
re_events2chkcnt = r'{}'.format(events2chkcnt)
re_events2chkord = r'{}'.format(events2chkord)
test_act_pattern = re.compile(r'\s*(\w+)\s+.*_id\((\d+)\)$')
#' hipSetDevice correlation_id(1) time_ns(1548622357525055:1548622357542015) process_id(126283) thread_id(126283)'
#' hcCommandKernel correlation_id(6) time_ns(1548622661443020:1548622662666935) device_id(0) queue_id(0)'
test_api_cb_pattern = re.compile(r'.*<(\w+)\s+.*tid\((\d+)\)>')
# <hsaKmtGetVersion id(2) correlation_id(0) on-enter pid(26224) tid(26224)>
# below is roctx pattern
# <hipLaunchKernel pid(123) tid(123)>
tool_record = re.compile(r'\d+:\d+\s+\d+:(\d+)\s+(\w+)')
# tool_api_record
# 1822810364769411:1822810364771941 116477:116477 hsa_agent_get_info(<agent 0x8990e0>, 17, 0x7ffeac015fec) = 0
# tool_gpu_act_record
# 3632773658039902:3632773658046462 0:0 hcCommandMarker:273
roctx_record = re.compile(r'\d+\s\d+:(\d)+\s(\d):\d+:\".*\"')
with open(tracefile) as f:
for line in f:
if re.search("before", line) or re.search("after",line):#roctx before/after not real events
continue
line=line.rstrip('\n')
event = ''
test_act_pattern_match = test_act_pattern.match(line)
if test_act_pattern_match:
event = test_act_pattern_match.group(1)
tid = int(test_act_pattern_match.group(2))
test_api_cb_pattern_match = test_api_cb_pattern.match(line)
if test_api_cb_pattern_match:
event = test_api_cb_pattern_match.group(1)
tid = int(test_api_cb_pattern_match.group(2))
tool_record_match = tool_record.match(line)
if tool_record_match:
event = tool_record_match.group(2)
tid = int(tool_record_match.group(1))
roctx_record_match = roctx_record.match(line)
if roctx_record_match:
event = roctx_record_match.group(2)
tid = int(roctx_record_match.group(1))
if event == '' or event == '(null)': #some traces has these null events
continue
if re.search(re_events2ignore,event):
continue
if metric == 'cnt' and re.search(re_events2chkcnt,event):
if event in events_count:
events_count[event] = events_count[event] + 1
else:
if not re.search(re_no_events_cnt,event):
events_count[event] = 1
if metric == 'or' and re.search(re_events2chkord,event):
if tid in events_order.keys():
if re.search(re_no_events_cnt,event):
if event != events_order[tid][-1]: #Add event only if it is not last event in the list
events_order[tid].append(event)
else:
events_order[tid].append(event)
else:
events_order[tid] = [event]
if metric == 'cnt':
for event,count in events_count.items():
if re.search(re_no_events_cnt,event):
res = res + event + '\n'
else:
res = res + event + " : count " + str(count) + '\n'
if metric == 'or':
for tid in sorted (events_order.keys()) :
res = res + str(events_order[tid])
if metric == 'cnt':
newres = res.split('\n')
newres = sorted(newres)
res = str(newres)
return res
parser = argparse.ArgumentParser(description='check_trace.py: check a trace aainst golden ref. Returns 0 for success, 1 for failure')
requiredNamed = parser.add_argument_group('Required arguments')
requiredNamed.add_argument('-in', metavar='file', help='Name of trace to be checked', required=True)
requiredNamed.add_argument('-v', action='store_true', help='debug info', required=False)
requiredNamed.add_argument('-ck', metavar='N', type=int, help='check trace 0|1', required=False)
args = vars(parser.parse_args())
if __name__ == '__main__':
sys.exit(check_trace_status(args['in'],args['v'],args['ck']))
+296
Féach ar an gComhad
@@ -0,0 +1,296 @@
#!/usr/bin/env python3
################################################################################
# Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to
# deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
# sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
################################################################################
import os, sys, re
import CppHeaderParser
import argparse
import string
LICENSE = \
'/*\n' + \
'Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.\n' + \
'\n' + \
'Permission is hereby granted, free of charge, to any person obtaining a copy\n' + \
'of this software and associated documentation files (the "Software"), to deal\n' + \
'in the Software without restriction, including without limitation the rights\n' + \
'to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n' + \
'copies of the Software, and to permit persons to whom the Software is\n' + \
'furnished to do so, subject to the following conditions:\n' + \
'\n' + \
'The above copyright notice and this permission notice shall be included in\n' + \
'all copies or substantial portions of the Software.\n' + \
'\n' + \
'THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n' + \
'IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n' + \
'FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n' + \
'AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n' + \
'LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n' + \
'OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n' + \
'THE SOFTWARE.\n' + \
'*/\n'
header_basic = \
'namespace detail {\n' + \
' inline static void print_escaped_string(std::ostream& out, const char *v, size_t len) {\n' + \
' out << \'"\'; \n' + \
' for (size_t i = 0; i < len && v[i]; ++i) {\n' + \
' switch (v[i]) {\n' + \
' case \'\\"\': out << "\\\\\\""; break;\n' + \
' case \'\\\\\': out << "\\\\\\\\"; break;\n' + \
' case \'\\b\': out << "\\\\\\b"; break;\n' + \
' case \'\\f\': out << "\\\\\\f"; break;\n' + \
' case \'\\n\': out << "\\\\\\n"; break;\n' + \
' case \'\\r\': out << "\\\\\\r"; break;\n' + \
' case \'\\t\': out << "\\\\\\t"; break;\n' + \
' default:\n' + \
' if (std::isprint((unsigned char)v[i])) std::operator<<(out, v[i]);\n' + \
' else {\n' + \
' std::ios_base::fmtflags flags(out.flags());\n' + \
' out << "\\\\x" << std::setfill(\'0\') << std::setw(2) << std::hex << (unsigned int)(unsigned char)v[i];\n' + \
' out.flags(flags);\n' + \
' }\n' + \
' break;\n' + \
' }\n' + \
' }\n' + \
' out << \'"\'; \n' + \
' }\n' + \
'\n' + \
' template <typename T>\n' + \
' inline static std::ostream& operator<<(std::ostream& out, const T& v) {\n' + \
' using std::operator<<;\n' + \
' static bool recursion = false;\n' + \
' if (recursion == false) { recursion = true; out << v; recursion = false; }\n' + \
' return out;\n }\n' + \
'\n' + \
' inline static std::ostream &operator<<(std::ostream &out, const unsigned char &v) {\n' + \
' out << (unsigned int)v;\n' + \
' return out;\n }\n' + \
'\n' + \
' inline static std::ostream &operator<<(std::ostream &out, const char &v) {\n' + \
' out << (unsigned char)v;\n' + \
' return out;\n }\n' + \
'\n' + \
' template <size_t N>\n' + \
' inline static std::ostream &operator<<(std::ostream &out, const char (&v)[N]) {\n' + \
' print_escaped_string(out, v, N);\n' + \
' return out;\n }\n' + \
'\n' + \
' inline static std::ostream &operator<<(std::ostream &out, const char *v) {\n' + \
' print_escaped_string(out, v, strlen(v));\n' + \
' return out;\n }\n'
structs_analyzed = {}
global_ops = ''
global_str = ''
output_filename_h = None
apiname = ""
# process_struct traverses recursively all structs to extract all fields
def process_struct(file_handle, cppHeader_struct, cppHeader, parent_hier_name, apiname):
# file_handle: handle for output file {api_name}_ostream_ops.h to be generated
# cppHeader_struct: cppHeader struct being processed
# cppHeader: cppHeader object created by CppHeaderParser.CppHeader(...)
# parent_hier_name: parent hierarchical name used for nested structs/enums
# apiname: for example hip.
global global_str
if cppHeader_struct == 'max_align_t': #function pointers not working in cppheaderparser
return
if cppHeader_struct not in cppHeader.classes:
return
if cppHeader_struct in structs_analyzed:
return
structs_analyzed[cppHeader_struct] = 1
for l in reversed(range(len(cppHeader.classes[cppHeader_struct]["properties"]["public"]))):
key = 'name'
name = ""
if key in cppHeader.classes[cppHeader_struct]["properties"]["public"][l]:
if parent_hier_name != '':
name = parent_hier_name + '.' + cppHeader.classes[cppHeader_struct]["properties"]["public"][l][key]
else:
name = cppHeader.classes[cppHeader_struct]["properties"]["public"][l][key]
if name == '':
continue
key2 = 'type'
mtype = ""
if key2 in cppHeader.classes[cppHeader_struct]["properties"]["public"][l]:
mtype = cppHeader.classes[cppHeader_struct]["properties"]["public"][l][key2]
if mtype == '':
continue
key3 = 'array_size'
array_size = ""
if key3 in cppHeader.classes[cppHeader_struct]["properties"]["public"][l]:
array_size = cppHeader.classes[cppHeader_struct]["properties"]["public"][l][key3]
key4 = 'property_of_class'
prop = ""
if key4 in cppHeader.classes[cppHeader_struct]["properties"]["public"][l]:
prop = cppHeader.classes[cppHeader_struct]["properties"]["public"][l][key4]
str = ''
if "union" not in mtype:
indent = ""
str += " if (std::string(\"" + cppHeader_struct + "::" + name + "\").find(" + apiname.upper() + "_structs_regex" + ") != std::string::npos) {\n"
indent = " "
str += indent + " std::operator<<(out, \"" + name + "=\");\n"
if (name == 'reserved' and apiname.upper() == 'HIP'):
str += indent + " roctracer::" + apiname.lower() + "_support::detail::operator<<(out, 0);\n"
else:
str += indent + " roctracer::" + apiname.lower() + "_support::detail::operator<<(out, v." + name + ");\n"
str += indent + " std::operator<<(out, \", \");\n"
str += " }\n"
if "void" not in mtype:
global_str += str
else:
if prop != '':
next_cppHeader_struct = prop + "::"
process_struct(file_handle, next_cppHeader_struct, cppHeader, name, apiname)
next_cppHeader_struct = prop + "::" + mtype + " "
process_struct(file_handle, next_cppHeader_struct, cppHeader, name, apiname)
next_cppHeader_struct = cppHeader_struct + "::"
process_struct(file_handle, next_cppHeader_struct, cppHeader, name, apiname)
# Parses API header file and generates ostream ops files ostream_ops.h
def gen_cppheader(infilepath, outfilepath, rank):
# infilepath: API Header file to be parsed
# outfilepath: Output file where ostream operators are written
global global_ops
global output_filename_h
global apiname
global global_str
try:
cppHeader = CppHeaderParser.CppHeader(infilepath)
except CppHeaderParser.CppParseError as e:
print(e)
sys.exit(1)
if rank == 0 or rank == 2:
mpath = os.path.dirname(outfilepath)
if mpath == "":
mpath = os.getcwd()
apiname = outfilepath.replace(mpath + "/","")
output_filename_h = open(outfilepath,"w+")
apiname = apiname.replace("_ostream_ops.h","")
apiname = apiname.upper()
output_filename_h.write("// automatically generated\n")
output_filename_h.write(LICENSE + '\n')
header_s = \
'#ifndef INC_' + apiname + '_OSTREAM_OPS_H_\n' + \
'#define INC_' + apiname + '_OSTREAM_OPS_H_\n' + \
'\n'
if apiname.upper() == 'HIP':
header_s = \
header_s + \
'#include <hip/hip_runtime.h>\n' + \
'#include <hip/hip_deprecated.h>\n'
header_s = \
header_s + \
'#include "roctracer.h"\n' + \
'\n' + \
'#ifdef __cplusplus\n' + \
'#include <iostream>\n' + \
'#include <iomanip>\n' + \
'#include <string>\n' + \
'#include <cstring>\n'
output_filename_h.write(header_s)
output_filename_h.write('\n')
output_filename_h.write('namespace roctracer {\n')
output_filename_h.write('namespace ' + apiname.lower() + '_support {\n')
output_filename_h.write('static int ' + apiname.upper() + '_depth_max = 1;\n')
output_filename_h.write('static int ' + apiname.upper() + '_depth_max_cnt = 0;\n')
output_filename_h.write('static std::string ' + apiname.upper() + '_structs_regex = \"\";\n')
output_filename_h.write('// begin ostream ops for '+ apiname + ' \n')
output_filename_h.write("// basic ostream ops\n")
output_filename_h.write(header_basic)
output_filename_h.write("// End of basic ostream ops\n\n")
for c in cppHeader.classes.copy():
# Types defined inside of unions are incorrectly prepended with "union " after parsing by CppHeaderParser
# Remove "union " from the beginning of the full class name to correct the eventual output
if "union " in c[0:6] and "::union" not in c[-8:]:
new_name = c[6:]
cppHeader.classes[new_name] = cppHeader.classes[c]
del cppHeader.classes[c]
for c in cppHeader.classes:
if c[-2] == ':' and c[-1] == ':': continue #ostream operator cannot be overloaded for anonymous struct therefore it is skipped
if "::union" in c:
continue
if c in structs_analyzed:
continue
if c == 'max_align_t' or c == '__fsid_t': # Skipping as it is defined in multiple domains
continue
if c.startswith("_") or c.startswith("pthread_") or c.startswith("__pthread_"):
continue
if len(cppHeader.classes[c]["properties"]["public"]) != 0:
output_filename_h.write("inline static std::ostream& operator<<(std::ostream& out, const " + c + "& v)\n")
output_filename_h.write("{\n")
output_filename_h.write(" std::operator<<(out, '{');\n")
output_filename_h.write(" " + apiname.upper() + "_depth_max_cnt++;\n")
output_filename_h.write(" if (" + apiname.upper() + "_depth_max == -1 || " + apiname.upper() + "_depth_max_cnt <= " + apiname.upper() + "_depth_max" + ") {\n" )
process_struct(output_filename_h, c, cppHeader, "", apiname)
global_str = "\n".join(global_str.split("\n")[0:-3])
if global_str != '': global_str += "\n }\n"
output_filename_h.write(global_str)
output_filename_h.write(" };\n")
output_filename_h.write(" " + apiname.upper() + "_depth_max_cnt--;\n")
output_filename_h.write(" std::operator<<(out, '}');\n")
output_filename_h.write(" return out;\n")
output_filename_h.write("}\n")
global_str = ''
global_ops += "inline static std::ostream& operator<<(std::ostream& out, const " + c + "& v)\n" + "{\n" + " roctracer::" + apiname.lower() + "_support::detail::operator<<(out, v);\n" + " return out;\n" + "}\n\n"
if rank == 1 or rank == 2:
footer = '// end ostream ops for '+ apiname + ' \n'
footer += '};};};\n\n'
output_filename_h.write(footer)
output_filename_h.write(global_ops)
footer = '#endif //__cplusplus\n' + \
'#endif // INC_' + apiname + '_OSTREAM_OPS_H_\n' + \
' \n'
output_filename_h.write(footer)
output_filename_h.close()
print('File ' + outfilepath + ' generated')
return
parser = argparse.ArgumentParser(description='genOstreamOps.py: generates ostream operators for all typedefs in provided input file.')
requiredNamed = parser.add_argument_group('Required arguments')
requiredNamed.add_argument('-in', metavar='fileList', help='Comma separated list of header files to be parsed', required=True)
requiredNamed.add_argument('-out', metavar='file', help='Output file with ostream operators', required=True)
args = vars(parser.parse_args())
if __name__ == '__main__':
flist = args['in'].split(',')
if len(flist) == 1:
gen_cppheader(flist[0], args['out'],2)
else:
for i in range(len(flist)):
if i == 0:
gen_cppheader(flist[i], args['out'],0)
elif i == len(flist)-1:
gen_cppheader(flist[i], args['out'],1)
else:
gen_cppheader(flist[i], args['out'],-1)
+581
Féach ar an gComhad
@@ -0,0 +1,581 @@
#!/usr/bin/env python3
################################################################################
# Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to
# deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
# sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
################################################################################
from __future__ import print_function
import os, sys, re
H_OUT='hsa_prof_str.h'
CPP_OUT='hsa_prof_str.inline.h'
API_TABLES_H = 'hsa_api_trace.h'
API_HEADERS_H = (
('CoreApi', 'hsa.h'),
('AmdExt', 'hsa_ext_amd.h'),
('ImageExt', 'hsa_ext_image.h'),
('AmdExt', API_TABLES_H),
)
LICENSE = \
'/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.\n' + \
'\n' + \
' Permission is hereby granted, free of charge, to any person obtaining a copy\n' + \
' of this software and associated documentation files (the "Software"), to deal\n' + \
' in the Software without restriction, including without limitation the rights\n' + \
' to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n' + \
' copies of the Software, and to permit persons to whom the Software is\n' + \
' furnished to do so, subject to the following conditions:\n' + \
'\n' + \
' The above copyright notice and this permission notice shall be included in\n' + \
' all copies or substantial portions of the Software.\n' + \
'\n' + \
' THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n' + \
' IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n' + \
' FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n' + \
' AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n' + \
' LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n' + \
' OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n' + \
' THE SOFTWARE. */\n'
#############################################################
# Error handler
def fatal(module, msg):
print (module + ' Error: "' + msg + '"', file = sys.stderr)
sys.exit(1)
# Get next text block
def NextBlock(pos, record):
if len(record) == 0: return pos
space_pattern = re.compile(r'(\s+)')
word_pattern = re.compile(r'([\w\*]+)')
if record[pos] != '(':
m = space_pattern.match(record, pos)
if not m:
m = word_pattern.match(record, pos)
if m:
return pos + len(m.group(1))
else:
fatal('NextBlock', "bad record '" + record + "' pos(" + str(pos) + ")")
else:
count = 0
for index in range(pos, len(record)):
if record[index] == '(':
count = count + 1
elif record[index] == ')':
count = count - 1
if count == 0:
index = index + 1
break
if count != 0:
fatal('NextBlock', "count is not zero (" + str(count) + ")")
if record[index - 1] != ')':
fatal('NextBlock', "last char is not ')' '" + record[index - 1] + "'")
return index
#############################################################
# API table parser class
class API_TableParser:
def fatal(self, msg):
fatal('API_TableParser', msg)
def __init__(self, header, name):
self.name = name
if not os.path.isfile(header):
self.fatal("file '" + header + "' not found")
self.inp = open(header, 'r')
self.beg_pattern = re.compile('^\s*struct\s+' + name + 'Table\s*{\s*$')
self.end_pattern = re.compile('^\s*};\s*$')
self.array = []
self.parse()
# normalizing a line
def norm_line(self, line):
return re.sub(r'^\s+', r' ', line[:-1])
# check for start record
def is_start(self, record):
return self.beg_pattern.match(record)
# check for end record
def is_end(self, record):
return self.end_pattern.match(record)
# check for declaration entry record
def is_entry(self, record):
return re.match(r'^\s*decltype\(([^\)]*)\)', record)
# parse method
def parse(self):
active = 0
for line in self.inp.readlines():
record = self.norm_line(line)
if self.is_start(record): active = 1
if active != 0:
if self.is_end(record): return
m = self.is_entry(record)
if m:
self.array.append(m.group(1))
#############################################################
# API declaration parser class
class API_DeclParser:
def fatal(self, msg):
fatal('API_DeclParser', msg)
def __init__(self, header, array, data):
if not os.path.isfile(header):
self.fatal("file '" + header + "' not found")
self.inp = open(header, 'r')
self.end_pattern = re.compile('\);\s*$')
self.data = data
for call in array:
if call in data:
self.fatal(call + ' is already found')
self.parse(call)
# api record filter
def api_filter(self, record):
record = re.sub(r'\sHSA_API\s', r' ', record)
record = re.sub(r'\sHSA_DEPRECATED\s', r' ', record)
return record
# check for start record
def is_start(self, call, record):
return re.search('\s' + call + '\s*\(', record)
# check for API method record
def is_api(self, call, record):
record = self.api_filter(record)
return re.match('\s+\S+\s+' + call + '\s*\(', record)
# check for end record
def is_end(self, record):
return self.end_pattern.search(record)
# parse method args
def get_args(self, record):
struct = {'ret': '', 'args': '', 'astr': {}, 'alst': [], 'tlst': []}
record = re.sub(r'^\s+', r'', record)
record = re.sub(r'\s*(\*+)\s*', r'\1 ', record)
rind = NextBlock(0, record)
struct['ret'] = record[0:rind]
pos = record.find('(')
end = NextBlock(pos, record);
args = record[pos:end]
args = re.sub(r'^\(\s*', r'', args)
args = re.sub(r'\s*\)$', r'', args)
args = re.sub(r'\s*,\s*', r',', args)
struct['args'] = re.sub(r',', r', ', args)
if len(args) == 0: return struct
pos = 0
args = args + ','
while pos < len(args):
ind1 = NextBlock(pos, args) # type
ind2 = NextBlock(ind1, args) # space
if args[ind2] != '(':
while ind2 < len(args):
end = NextBlock(ind2, args)
if args[end] == ',': break
else: ind2 = end
name = args[ind2:end]
else:
ind3 = NextBlock(ind2, args) # field
m = re.match(r'\(\s*\*\s*(\S+)\s*\)', args[ind2:ind3])
if not m:
self.fatal("bad block3 '" + args + "' : '" + args[ind2:ind3] + "'")
name = m.group(1)
end = NextBlock(ind3, args) # the rest
item = args[pos:end]
struct['astr'][name] = item
struct['alst'].append(name)
struct['tlst'].append(item)
if args[end] != ',':
self.fatal("no comma '" + args + "'")
pos = end + 1
return struct
# parse given api
def parse(self, call):
record = ''
active = 0
found = 0
api_name = ''
prev_line = ''
self.inp.seek(0)
for line in self.inp.readlines():
record += ' ' + line[:-1]
record = re.sub(r'^\s*', r' ', record)
if active == 0:
if self.is_start(call, record):
active = 1
m = self.is_api(call, record)
if not m:
record = ' ' + prev_line + ' ' + record
m = self.is_api(call, record)
if not m:
self.fatal("bad api '" + line + "'")
if active == 1:
if self.is_end(record):
self.data[call] = self.get_args(record)
active = 0
found = 0
if active == 0: record = ''
prev_line = line
#############################################################
# API description parser class
class API_DescrParser:
def fatal(self, msg):
fatal('API_DescrParser', msg)
def __init__(self, out_h_file, hsa_dir, api_table_h, api_headers, license):
out_macro = re.sub(r'[\/\.]', r'_', out_h_file.upper()) + '_'
self.h_content = ''
self.cpp_content = ''
self.api_names = []
self.api_calls = {}
self.api_rettypes = set()
self.api_id = {}
api_data = {}
api_list = []
ns_calls = []
for i in range(0, len(api_headers)):
(name, header) = api_headers[i]
if i < len(api_headers) - 1:
api = API_TableParser(hsa_dir + api_table_h, name)
api_list = api.array
self.api_names.append(name)
self.api_calls[name] = api_list
else:
api_list = ns_calls
ns_calls = []
for call in api_list:
if call in api_data:
self.fatal("call '" + call + "' is already found")
API_DeclParser(hsa_dir + header, api_list, api_data)
for call in api_list:
if not call in api_data:
# Not-supported functions
ns_calls.append(call)
else:
# API ID map
self.api_id[call] = 'HSA_API_ID_' + call
# Return types
self.api_rettypes.add(api_data[call]['ret'])
self.api_rettypes.discard('void')
self.api_data = api_data
self.ns_calls = ns_calls
self.h_content += "/* Generated by " + os.path.basename(__file__) + " */\n" + license + "\n\n"
self.h_content += "/* HSA API tracing primitives\n"
for (name, header) in api_headers:
self.h_content += " '" + name + "', header '" + header + "', " + str(len(self.api_calls[name])) + ' funcs\n'
for call in self.ns_calls:
self.h_content += ' ' + call + ' was not parsed\n'
self.h_content += " */\n"
self.h_content += '\n'
self.h_content += '#ifndef ' + out_macro + '\n'
self.h_content += '#define ' + out_macro + '\n'
self.h_content += self.add_section('API ID enumeration', ' ', self.gen_id_enum)
self.h_content += '/* Declarations of APIs intended for use only by tools. */\n'
self.h_content += 'typedef void (*hsa_amd_queue_intercept_packet_writer)(const void*, uint64_t);\n'
self.h_content += 'typedef void (*hsa_amd_queue_intercept_handler)(const void*, uint64_t, uint64_t, void*,\n'
self.h_content += ' hsa_amd_queue_intercept_packet_writer);\n'
self.h_content += 'typedef void (*hsa_amd_runtime_queue_notifier)(const hsa_queue_t*, hsa_agent_t, void*);\n'
self.h_content += self.add_section('API arg structure', ' ', self.gen_arg_struct)
self.h_content += self.add_section('API output stream', ' ', self.gen_out_stream)
self.h_content += '#endif /* ' + out_macro + ' */\n'
self.cpp_content += "/* Generated by " + os.path.basename(__file__) + " */\n" + license + "\n\n"
self.cpp_content += '#include <hsa/hsa_api_trace.h>\n'
self.cpp_content += '#include <atomic>\n'
self.cpp_content += 'namespace roctracer::hsa_support::detail {\n'
self.cpp_content += 'static CoreApiTable CoreApi_saved_before_cb;\n'
self.cpp_content += 'static AmdExtTable AmdExt_saved_before_cb;\n'
self.cpp_content += 'static ImageExtTable ImageExt_saved_before_cb;\n\n'
self.cpp_content += self.add_section('API callback functions', '', self.gen_callbacks)
self.cpp_content += self.add_section('API intercepting code', '', self.gen_intercept)
self.cpp_content += self.add_section('API get_name function', ' ', self.gen_get_name)
self.cpp_content += self.add_section('API get_code function', ' ', self.gen_get_code)
self.cpp_content += '\n};\n'
# add code section
def add_section(self, title, gap, fun):
content = ''
n = 0
content += '\n/* section: ' + title + ' */\n\n'
content += fun(-1, '-', '-', {})
for index in range(len(self.api_names)):
last = (index == len(self.api_names) - 1)
name = self.api_names[index]
if n != 0:
if gap == '': content += fun(n, name, '-', {})
content += '\n'
content += gap + '/* block: ' + name + ' API */\n'
for call in self.api_calls[name]:
content += fun(n, name, call, self.api_data[call])
n += 1
content += fun(n, '-', '-', {})
return content
# generate API ID enumeration
def gen_id_enum(self, n, name, call, data):
content = ''
if n == -1:
content += 'enum hsa_api_id_t {\n'
return content
if call != '-':
content += ' ' + self.api_id[call] + ' = ' + str(n) + ',\n'
else:
content += '\n'
content += ' HSA_API_ID_DISPATCH = ' + str(n) + ',\n'
content += ' HSA_API_ID_NUMBER = ' + str(n + 1) + ',\n'
content += '};\n'
return content
# generate API args structure
def gen_arg_struct(self, n, name, call, struct):
content = ''
if n == -1:
content += 'struct hsa_api_data_t {\n'
content += ' uint64_t correlation_id;\n'
content += ' uint32_t phase;\n'
content += ' union {\n'
for ret_type in self.api_rettypes:
content += ' ' + ret_type + ' ' + ret_type + '_retval;\n'
content += ' };\n'
content += ' union {\n'
return content
if call != '-':
content += ' struct {\n'
for (var, item) in struct['astr'].items():
content += ' ' + item + ';\n'
if call == "hsa_amd_memory_async_copy_rect" and item == "const hsa_dim3_t* range":
content += ' hsa_dim3_t range__val;\n'
content += ' } ' + call + ';\n'
else:
content += ' } args;\n'
content += ' uint64_t *phase_data;\n'
content += '};\n'
return content
# generate API callbacks
def gen_callbacks(self, n, name, call, struct):
content = ''
if n == -1:
content += '/* section: Static declarations */\n'
content += '\n'
if call != '-':
call_id = self.api_id[call];
ret_type = struct['ret']
content += 'static ' + ret_type + ' ' + call + '_callback(' + struct['args'] + ') {\n'
content += ' hsa_trace_data_t trace_data;\n'
content += ' bool enabled{false};\n'
content += '\n'
content += ' if (auto function = report_activity.load(std::memory_order_relaxed); function &&\n'
content += ' (enabled =\n'
content += ' function(ACTIVITY_DOMAIN_HSA_API, ' + call_id + ', &trace_data) == 0)) {\n'
content += ' if (trace_data.phase_enter != nullptr) {\n'
for var in struct['alst']:
item = struct['astr'][var];
if re.search(r'char\* ', item):
# FIXME: we should not strdup the char* arguments here, as the callback will not outlive the scope of this function. Instead, we
# should generate a helper function to capture the content of the arguments similar to hipApiArgsInit for HIP. We also need a
# helper to free the memory that is allocated to capture the content.
content += ' trace_data.api_data.args.' + call + '.' + var + ' = ' + '(' + var + ' != NULL) ? strdup(' + var + ')' + ' : NULL;\n'
else:
content += ' trace_data.api_data.args.' + call + '.' + var + ' = ' + var + ';\n'
if call == 'hsa_amd_memory_async_copy_rect' and var == 'range':
content += ' trace_data.api_data.args.' + call + '.' + var + '__val = ' + '*(' + var + ');\n'
content += ' trace_data.phase_enter(' + call_id + ', &trace_data);\n'
content += ' }\n'
content += ' }\n'
content += '\n'
if ret_type != 'void':
content += ' trace_data.api_data.' + ret_type + '_retval = '
content += ' ' + name + '_saved_before_cb.' + call + '_fn(' + ', '.join(struct['alst']) + ');\n'
content += '\n'
content += ' if (enabled && trace_data.phase_exit != nullptr)\n'
content += ' trace_data.phase_exit(' + call_id + ', &trace_data);\n'
if ret_type != 'void':
content += ' return trace_data.api_data.' + ret_type + '_retval;\n'
content += '}\n'
return content
# generate API intercepting code
def gen_intercept(self, n, name, call, struct):
content = ''
if n > 0 and call == '-':
content += '};\n'
if n == 0 or (call == '-' and name != '-'):
content += 'static void Install' + name + 'Wrappers(' + name + 'Table* table) {\n'
content += ' ' + name + '_saved_before_cb = *table;\n'
if call != '-':
if call != 'hsa_shut_down':
content += ' table->' + call + '_fn = ' + call + '_callback;\n'
else:
content += ' { void* p = (void*)' + call + '_callback; (void)p; }\n'
return content
# generate API name function
def gen_get_name(self, n, name, call, struct):
content = ''
if n == -1:
content += 'static const char* GetApiName(uint32_t id) {\n'
content += ' switch (id) {\n'
return content
if call != '-':
content += ' case ' + self.api_id[call] + ': return "' + call + '";\n'
else:
content += ' }\n'
content += ' return "unknown";\n'
content += '}\n'
return content
# generate API code function
def gen_get_code(self, n, name, call, struct):
content = ''
if n == -1:
content += 'static uint32_t GetApiCode(const char* str) {\n'
return content
if call != '-':
content += ' if (strcmp("' + call + '", str) == 0) return ' + self.api_id[call] + ';\n'
else:
content += ' return HSA_API_ID_NUMBER;\n'
content += '}\n'
return content
# generate stream operator
def gen_out_stream(self, n, name, call, struct):
content = ''
if n == -1:
content += '#ifdef __cplusplus\n'
content += '#include "hsa_ostream_ops.h"\n'
content += 'typedef std::pair<uint32_t, hsa_api_data_t> hsa_api_data_pair_t;\n'
content += 'inline std::ostream& operator<< (std::ostream& out, const hsa_api_data_pair_t& data_pair) {\n'
content += ' const uint32_t cid = data_pair.first;\n'
content += ' const hsa_api_data_t& api_data = data_pair.second;\n'
content += ' switch(cid) {\n'
return content
if call != '-':
content += ' case ' + self.api_id[call] + ': {\n'
content += ' out << "' + call + '(";\n'
arg_list = struct['alst']
if len(arg_list) != 0:
for ind in range(len(arg_list)):
arg_var = arg_list[ind]
arg_val = 'api_data.args.' + call + '.' + arg_var
if re.search(r'char\* ', struct['astr'][arg_var]):
content += ' out << "0x" << std::hex << (uint64_t)' + arg_val
else:
content += ' out << ' + arg_val
if call == "hsa_amd_memory_async_copy_rect" and arg_var == "range":
content += ' << ", ";\n'
content += ' out << ' + arg_val + '__val'
'''
arg_item = struct['tlst'][ind]
if re.search(r'\(\* ', arg_item): arg_pref = ''
elif re.search(r'void\* ', arg_item): arg_pref = ''
elif re.search(r'\*\* ', arg_item): arg_pref = '**'
elif re.search(r'\* ', arg_item): arg_pref = '*'
else: arg_pref = ''
if arg_pref != '':
content += ' if (' + arg_val + ') out << ' + arg_pref + '(' + arg_val + '); else out << ' + arg_val
else:
content += ' out << ' + arg_val
'''
if ind < len(arg_list) - 1: content += ' << ", ";\n'
else: content += ';\n'
if struct['ret'] != 'void':
content += ' out << ") = " << api_data.' + struct['ret'] + '_retval;\n'
else:
content += ' out << ") = void";\n'
content += ' break;\n'
content += ' }\n'
else:
content += ' default:\n'
content += ' out << "ERROR: unknown API";\n'
content += ' abort();\n'
content += ' }\n'
content += ' return out;\n'
content += '}\n'
content += '#endif\n'
return content
#############################################################
# main
# Usage
if len(sys.argv) != 3:
print ("Usage:", sys.argv[0], " <OUT prefix> <HSA runtime include path>", file=sys.stderr)
sys.exit(1)
else:
PREFIX = sys.argv[1] + '/'
HSA_DIR = sys.argv[2] + '/'
descr = API_DescrParser(H_OUT, HSA_DIR, API_TABLES_H, API_HEADERS_H, LICENSE)
out_file = PREFIX + H_OUT
print ('Generating "' + out_file + '"')
f = open(out_file, 'w')
f.write(descr.h_content[:-1])
f.close()
out_file = PREFIX + CPP_OUT
print ('Generating "' + out_file + '"')
f = open(out_file, 'w')
f.write(descr.cpp_content[:-1])
f.close()
#############################################################
+245
Féach ar an gComhad
@@ -0,0 +1,245 @@
################################################################################
## Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
##
## Permission is hereby granted, free of charge, to any person obtaining a copy
## of this software and associated documentation files (the "Software"), to
## deal in the Software without restriction, including without limitation the
## rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
## sell copies of the Software, and to permit persons to whom the Software is
## furnished to do so, subject to the following conditions:
##
## The above copyright notice and this permission notice shall be included in
## all copies or substantial portions of the Software.
##
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
## FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
## IN THE SOFTWARE.
################################################################################
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR})
if(${LIBRARY_TYPE} STREQUAL STATIC)
add_compile_definitions(STATIC_BUILD=1)
endif()
option(DEBUG_TRACE "Enable debug tracing")
if(DEBUG_TRACE)
add_compile_definitions(DEBUG_TRACE_ON=1)
endif()
find_package(Python3 COMPONENTS Interpreter REQUIRED)
execute_process(COMMAND ${Python3_EXECUTABLE} -c "import CppHeaderParser"
RESULT_VARIABLE CPP_HEADER_PARSER
OUTPUT_QUIET)
if(NOT ${CPP_HEADER_PARSER} EQUAL 0)
message(FATAL_ERROR "\
The \"CppHeaderParser\" Python3 package is not installed. \
Please install it using the following command: \"pip3 install CppHeaderParser\".\
")
endif()
get_property(HSA_RUNTIME_INCLUDE_DIRECTORIES TARGET hsa-runtime64::hsa-runtime64 PROPERTY INTERFACE_INCLUDE_DIRECTORIES)
find_file(HSA_H hsa.h
PATHS ${HSA_RUNTIME_INCLUDE_DIRECTORIES}
PATH_SUFFIXES hsa
NO_DEFAULT_PATH
REQUIRED)
get_filename_component(HSA_RUNTIME_INC_PATH ${HSA_H} DIRECTORY)
## Generate the HSA wrapper functions header
add_custom_command(
OUTPUT hsa_prof_str.h hsa_prof_str.inline.h
COMMAND ${Python3_EXECUTABLE} ${PROJECT_SOURCE_DIR}/script/hsaap.py ${CMAKE_CURRENT_BINARY_DIR} "${HSA_RUNTIME_INC_PATH}" > /dev/null
DEPENDS ${PROJECT_SOURCE_DIR}/script/hsaap.py
"${HSA_RUNTIME_INC_PATH}/hsa.h" "${HSA_RUNTIME_INC_PATH}/hsa_ext_amd.h"
"${HSA_RUNTIME_INC_PATH}/hsa_ext_image.h" "${HSA_RUNTIME_INC_PATH}/hsa_api_trace.h"
COMMENT "Generating hsa_prof_str.h,hsa_prof_str.inline.h...")
## Generate the HSA pretty printers
add_custom_command(
OUTPUT hsa_ostream_ops.h
COMMAND ${CMAKE_C_COMPILER} -E "${HSA_RUNTIME_INC_PATH}/hsa.h" -o hsa.h.i
COMMAND ${CMAKE_C_COMPILER} -E "${HSA_RUNTIME_INC_PATH}/hsa_ext_amd.h" -o hsa_ext_amd.h.i
BYPRODUCTS hsa.h.i hsa_ext_amd.h.i
COMMAND ${Python3_EXECUTABLE} ${PROJECT_SOURCE_DIR}/script/gen_ostream_ops.py
-in hsa.h.i,hsa_ext_amd.h.i -out hsa_ostream_ops.h > /dev/null
DEPENDS ${PROJECT_SOURCE_DIR}/script/gen_ostream_ops.py
"${HSA_RUNTIME_INC_PATH}/hsa.h" "${HSA_RUNTIME_INC_PATH}/hsa_ext_amd.h"
COMMENT "Generating hsa_ostream_ops.h...")
get_property(HIP_INCLUDE_DIRECTORIES TARGET hip::amdhip64 PROPERTY INTERFACE_INCLUDE_DIRECTORIES)
find_file(HIP_RUNTIME_API_H hip_runtime_api.h
PATHS ${HIP_INCLUDE_DIRECTORIES}
PATH_SUFFIXES hip
NO_DEFAULT_PATH
REQUIRED)
## Generate the HIP pretty printers
add_custom_command(
OUTPUT hip_ostream_ops.h
COMMAND ${CMAKE_C_COMPILER} "$<$<BOOL:${HIP_INCLUDE_DIRECTORIES}>:-I$<JOIN:${HIP_INCLUDE_DIRECTORIES},$<SEMICOLON>-I>>"
-E "${CMAKE_CURRENT_SOURCE_DIR}/roctracer/hip_full_api.h" -D__HIP_PLATFORM_AMD__=1 -D__HIP_ROCclr__=1 -o hip_runtime_api.h.i
BYPRODUCTS hip_runtime_api.h.i
COMMAND ${Python3_EXECUTABLE} ${PROJECT_SOURCE_DIR}/script/gen_ostream_ops.py
-in hip_runtime_api.h.i -out hip_ostream_ops.h > /dev/null
DEPENDS ${PROJECT_SOURCE_DIR}/script/gen_ostream_ops.py "${HIP_RUNTIME_API_H}"
COMMENT "Generating hip_ostream_ops.h..."
COMMAND_EXPAND_LISTS)
set(PUBLIC_HEADERS
roctx.h
roctracer.h
roctracer_ext.h
roctracer_hip.h
roctracer_hcc.h
roctracer_hsa.h
roctracer_roctx.h
roctracer_plugin.h
ext/prof_protocol.h)
foreach(header ${PUBLIC_HEADERS})
get_filename_component(header_subdir ${header} DIRECTORY)
install(FILES ${PROJECT_SOURCE_DIR}/inc/${header}
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}/${header_subdir}
COMPONENT dev)
endforeach()
set(GENERATED_HEADERS
hip_ostream_ops.h
hsa_prof_str.h
hsa_ostream_ops.h)
foreach(header ${GENERATED_HEADERS})
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${header}
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}
COMPONENT dev)
endforeach()
## Build the util library
file(GLOB UTIL_SOURCES "util/*.cpp")
add_library(util STATIC ${UTIL_SOURCES})
set_target_properties(util PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_include_directories(util
PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/util)
check_include_file(backtrace.h BACKTRACE_H)
if(BACKTRACE_H)
target_compile_definitions(util PRIVATE HAVE_BACKTRACE_H)
find_library(BACKTRACE_LIB "backtrace" ${CMAKE_C_IMPLICIT_LINK_DIRECTORIES})
endif()
if(BACKTRACE_LIB)
target_compile_definitions(util PRIVATE ENABLE_BACKTRACE)
target_link_libraries(util PRIVATE ${BACKTRACE_LIB})
endif()
## Build the ROCtracer library
file(GLOB ROCTRACER_SOURCES "roctracer/*.cpp")
add_library(roctracer ${LIBRARY_TYPE} ${ROCTRACER_SOURCES} ${GENERATED_HEADERS} hsa_prof_str.inline.h)
set_target_properties(roctracer PROPERTIES
CXX_VISIBILITY_PRESET hidden
OUTPUT_NAME "roctracer64"
DEFINE_SYMBOL "ROCTRACER_EXPORTS"
LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/roctracer/exportmap
VERSION ${PROJECT_VERSION}
SOVERSION ${PROJECT_VERSION_MAJOR})
target_compile_definitions(roctracer
PUBLIC AMD_INTERNAL_BUILD
PRIVATE PROF_API_IMPL HIP_PROF_HIP_API_STRING=1 __HIP_PLATFORM_AMD__=1)
target_include_directories(roctracer
PUBLIC
${HIP_INCLUDE_DIRECTORIES} ${HSA_RUNTIME_INCLUDE_DIRECTORIES}
$<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/inc>
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/roctracer ${CMAKE_CURRENT_SOURCE_DIR})
target_link_options(roctracer PRIVATE -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/roctracer/exportmap -Wl,--no-undefined)
target_link_libraries(roctracer PRIVATE util hsa-runtime64::hsa-runtime64 stdc++fs Threads::Threads dl)
install(TARGETS roctracer LIBRARY
DESTINATION ${CMAKE_INSTALL_LIBDIR}
COMPONENT runtime NAMELINK_SKIP)
## Install name link library in dev component
install(TARGETS roctracer LIBRARY
DESTINATION ${CMAKE_INSTALL_LIBDIR}
COMPONENT dev NAMELINK_ONLY)
install(TARGETS roctracer LIBRARY
DESTINATION ${CMAKE_INSTALL_LIBDIR}
COMPONENT asan)
## Build the ROCTX library
file(GLOB ROCTX_SOURCES "roctx/*.cpp")
add_library(roctx ${LIBRARY_TYPE} ${ROCTX_SOURCES})
set_target_properties(roctx PROPERTIES
CXX_VISIBILITY_PRESET hidden
OUTPUT_NAME "roctx64"
DEFINE_SYMBOL "ROCTX_EXPORTS"
LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/roctx/exportmap
VERSION ${PROJECT_VERSION}
SOVERSION ${PROJECT_VERSION_MAJOR})
target_include_directories(roctx
PUBLIC $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/inc>
PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
target_link_options(roctx PRIVATE -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/roctx/exportmap -Wl,--no-undefined)
install(TARGETS roctx LIBRARY
DESTINATION ${CMAKE_INSTALL_LIBDIR}
COMPONENT runtime NAMELINK_SKIP)
## Install name link library in dev component
install(TARGETS roctx LIBRARY
DESTINATION ${CMAKE_INSTALL_LIBDIR}
COMPONENT dev NAMELINK_ONLY)
install(TARGETS roctx LIBRARY
DESTINATION ${CMAKE_INSTALL_LIBDIR}
COMPONENT asan)
## Build the tracer_tool library
if (${LIBRARY_TYPE} STREQUAL SHARED)
file(GLOB TRACER_TOOL_SOURCES "tracer_tool/*.cpp")
add_library(roctracer_tool SHARED ${TRACER_TOOL_SOURCES})
set_target_properties(roctracer_tool PROPERTIES
CXX_VISIBILITY_PRESET hidden
LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/tracer_tool/exportmap
INSTALL_RPATH "${ROCM_APPEND_PRIVLIB_RPATH}")
target_compile_definitions(roctracer_tool
PRIVATE HIP_PROF_HIP_API_STRING=1 __HIP_PLATFORM_AMD__=1)
target_include_directories(roctracer_tool
PRIVATE
${PROJECT_SOURCE_DIR}/inc ${CMAKE_CURRENT_SOURCE_DIR}/roctracer
${CMAKE_CURRENT_SOURCE_DIR})
target_link_libraries(roctracer_tool util roctracer hsa-runtime64::hsa-runtime64 stdc++fs Threads::Threads atomic dl)
target_link_options(roctracer_tool PRIVATE -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/tracer_tool/exportmap -Wl,--no-undefined)
install(TARGETS roctracer_tool LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/${PROJECT_NAME} COMPONENT runtime)
install(TARGETS roctracer_tool LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/${PROJECT_NAME} COMPONENT asan)
add_library(hip_stats SHARED hip_stats/hip_stats.cpp)
set_target_properties(hip_stats PROPERTIES INSTALL_RPATH "${ROCM_APPEND_PRIVLIB_RPATH}")
target_compile_definitions(hip_stats PRIVATE __HIP_PLATFORM_AMD__)
target_link_libraries(hip_stats roctracer stdc++fs)
install(TARGETS hip_stats LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/${PROJECT_NAME} COMPONENT runtime)
install(TARGETS hip_stats LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/${PROJECT_NAME} COMPONENT asan)
endif()
+259
Féach ar an gComhad
@@ -0,0 +1,259 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#include "roctracer.h"
#include "roctracer_hip.h"
#include <cstdint>
#include <cstdlib>
#include <experimental/filesystem>
#include <iomanip>
#include <iostream>
#include <fstream>
#include <numeric>
#include <set>
#include <string>
#include <sstream>
#include <unordered_map>
#include <utility>
#define CHECK_ROCTRACER(call) \
do { \
roctracer_status_t status = call; \
if (status != ROCTRACER_STATUS_SUCCESS) { \
std::cerr << roctracer_error_string() << std::endl; \
abort(); \
} \
} while (false)
namespace {
constexpr uint64_t NextPowerOf2(uint64_t v) {
v += (v == 0);
v--;
v |= v >> 1;
v |= v >> 2;
v |= v >> 4;
v |= v >> 8;
v |= v >> 16;
v |= v >> 32;
return ++v;
}
constexpr size_t KiB = 1024;
constexpr size_t MiB = KiB * KiB;
constexpr size_t GiB = KiB * KiB * KiB;
std::string HumanReadableSize(size_t size, int precision) {
std::stringstream ss;
if (size < KiB)
ss << size;
else if (size < MiB)
ss << std::fixed << std::setprecision(precision) << (double)size / KiB << "K";
else if (size < GiB)
ss << std::fixed << std::setprecision(precision) << (double)size / MiB << "M";
else
ss << std::fixed << std::setprecision(precision) << (double)size / GiB << "G";
return ss.str();
}
struct FunctionStats {
uint64_t total_time_ns;
uint64_t count;
void Accumulate(uint64_t time_ns) {
total_time_ns += time_ns;
++count;
}
};
struct MemCopyStats {
uint64_t total_time_ns;
uint64_t total_byte_size;
uint64_t count;
void Accumulate(uint64_t time_ns, uint64_t byte_size) {
total_time_ns += time_ns;
total_byte_size += byte_size;
++count;
}
};
struct pair_hash {
template <typename T1, typename T2> std::size_t operator()(const std::pair<T1, T2>& pair) const {
return std::hash<T1>()(pair.first) ^ std::hash<T2>()(pair.second);
}
};
std::unordered_map<decltype(roctracer_record_t::op), FunctionStats> hip_api_stats;
std::unordered_map<std::string, FunctionStats> kernel_stats;
std::unordered_map<std::pair<decltype(roctracer_record_t::kind), size_t>, MemCopyStats, pair_hash>
memcpy_stats;
void CollectStatistics(const char* begin, const char* end, void* /* user_arg */) {
const auto* record = reinterpret_cast<const roctracer_record_t*>(begin);
while (record < reinterpret_cast<const roctracer_record_t*>(end)) {
auto elapsed_time_ns = record->end_ns - record->begin_ns;
if (record->domain == ACTIVITY_DOMAIN_HIP_OPS && record->op == HIP_OP_ID_DISPATCH) {
const char* kernel_name = record->kernel_name;
if (kernel_name == nullptr) kernel_name = "Unknown kernels";
kernel_stats[kernel_name].Accumulate(elapsed_time_ns);
} else if (record->domain == ACTIVITY_DOMAIN_HIP_OPS && record->op == HIP_OP_ID_COPY)
memcpy_stats[std::make_pair(record->kind, NextPowerOf2(record->bytes))].Accumulate(
elapsed_time_ns, record->bytes);
else if (record->domain == ACTIVITY_DOMAIN_HIP_API)
hip_api_stats[record->op].Accumulate(elapsed_time_ns);
CHECK_ROCTRACER(roctracer_next_record(record, &record));
}
}
namespace fs = std::experimental::filesystem;
void DumpStatistics() {
CHECK_ROCTRACER(roctracer_close_pool());
fs::path output_dir = []() {
const char* env_var = getenv("ROCP_OUTPUT_DIR");
return env_var != nullptr ? env_var : "";
}();
std::ofstream out;
if (output_dir.empty()) {
// If an output directory was not specified, then print the statistics to stdout.
out.copyfmt(std::cout);
out.clear(std::cout.rdstate());
out.basic_ios<char>::rdbuf(std::cout.rdbuf());
} else {
if (auto status = fs::status(output_dir); !fs::exists(status) || !fs::is_directory(status)) {
std::cerr << "error: ROCP_OUTPUT_DIR=" << output_dir << " is not a directory" << std::endl;
return;
}
}
auto compare = [](const auto& x, const auto& y) {
return x.second.total_time_ns > y.second.total_time_ns;
};
// Print the HIP API statistics sorted by descending total inclusive time.
if (!hip_api_stats.empty()) {
const char* filename = "hip_api_stats.csv";
if (!output_dir.empty()) out = std::ofstream(output_dir / filename);
if (out.good()) {
std::cout << "Dumping HIP API statistics." << std::endl;
uint64_t total_hip_api_time_ns =
std::accumulate(hip_api_stats.begin(), hip_api_stats.end(), 0,
[](uint64_t total_time_ns, const auto& stats) {
return total_time_ns + stats.second.total_time_ns;
});
out << "\"Name\",\"Calls\",\"TotalDurationNs\",\"AverageNs\",\"Percentage\"" << std::endl;
for (auto&& [op, stats] : std::set<decltype(hip_api_stats)::value_type, decltype(compare)>(
hip_api_stats.begin(), hip_api_stats.end(), compare))
out << "\"" << roctracer_op_string(ACTIVITY_DOMAIN_HIP_API, op, 0) << "\"," << stats.count
<< "," << stats.total_time_ns << "," << stats.total_time_ns / stats.count << ","
<< std::fixed << std::setprecision(4)
<< (double)stats.total_time_ns / total_hip_api_time_ns * 100 << std::endl;
} else {
std::cerr << "warning: could not open " << output_dir / filename << std::endl;
}
}
// Print the HIP kernel dispatch statistics sorted by descending execution time.
if (!kernel_stats.empty()) {
const char* filename = "hip_kernel_stats.csv";
if (!output_dir.empty()) out = std::ofstream(output_dir / filename);
if (out.good()) {
std::cout << "Dumping HIP kernel dispatch statistics." << std::endl;
uint64_t total_kernel_time_ns =
std::accumulate(kernel_stats.begin(), kernel_stats.end(), 0,
[](uint64_t total_time_ns, const auto& stats) {
return total_time_ns + stats.second.total_time_ns;
});
out << "\"Name\",\"Calls\",\"TotalDurationNs\",\"AverageNs\",\"Percentage\"" << std::endl;
for (auto&& [name, stats] : std::set<decltype(kernel_stats)::value_type, decltype(compare)>(
kernel_stats.begin(), kernel_stats.end(), compare))
out << "\"" << name << "\"," << stats.count << "," << stats.total_time_ns << ","
<< stats.total_time_ns / stats.count << "," << std::fixed << std::setprecision(4)
<< (double)stats.total_time_ns / total_kernel_time_ns * 100 << std::endl;
} else {
std::cerr << "warning: could not open " << output_dir / filename << std::endl;
}
}
// Print the HIP memory copy statistics sorted by descending transfer time.
if (!memcpy_stats.empty()) {
const char* filename = "hip_copy_stats.csv";
if (!output_dir.empty()) out = std::ofstream(output_dir / filename);
if (out.good()) {
std::cout << "Dumping HIP memory copy statistics." << std::endl;
uint64_t total_memory_copy_time_ns =
std::accumulate(memcpy_stats.begin(), memcpy_stats.end(), 0,
[](uint64_t total_time_ns, const auto& stats) {
return total_time_ns + stats.second.total_time_ns;
});
out << "\"Name\",\"Calls\",\"TotalBytes\",\"TotalDurationNs\",\"AverageNs\",\"Percentage\""
<< std::endl;
for (auto&& [kind, stats] : std::set<decltype(memcpy_stats)::value_type, decltype(compare)>(
memcpy_stats.begin(), memcpy_stats.end(), compare))
out << "\"" << roctracer_op_string(ACTIVITY_DOMAIN_HIP_OPS, HIP_OP_ID_COPY, kind.first)
<< "(" << HumanReadableSize(kind.second >> 1, 0) << "-"
<< HumanReadableSize(kind.second, 0) << ")"
<< "\"," << stats.count << "," << stats.total_byte_size << "," << stats.total_time_ns
<< "," << stats.total_time_ns / stats.count << "," << std::fixed << std::setprecision(4)
<< (double)stats.total_time_ns / total_memory_copy_time_ns * 100 << std::endl;
} else {
std::cerr << "warning: could not open " << output_dir / filename << std::endl;
}
}
}
} // namespace
#include <hsa/hsa_api_trace.h>
extern "C" ROCTRACER_EXPORT bool OnLoad(HsaApiTable* /* table */, uint64_t /* runtime_version */,
uint64_t /* failed_tool_count */,
const char* const* /* failed_tool_names */) {
roctracer_properties_t properties{};
properties.buffer_size = sizeof(roctracer_record_t) * 10000;
properties.buffer_callback_fun = CollectStatistics;
properties.buffer_callback_arg = nullptr;
CHECK_ROCTRACER(roctracer_open_pool(&properties));
CHECK_ROCTRACER(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_API));
CHECK_ROCTRACER(roctracer_enable_op_activity(ACTIVITY_DOMAIN_HIP_OPS, HIP_OP_ID_DISPATCH));
CHECK_ROCTRACER(roctracer_enable_op_activity(ACTIVITY_DOMAIN_HIP_OPS, HIP_OP_ID_COPY));
std::atexit([]() { DumpStatistics(); });
return true;
}
extern "C" ROCTRACER_EXPORT void OnUnload() {}
@@ -0,0 +1,73 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#include "roctracer.h"
extern "C" {
// Deprecated functions:
ROCTRACER_API int roctracer_load() { return 1; }
ROCTRACER_API void roctracer_unload() {}
ROCTRACER_API void roctracer_flush_buf() {}
ROCTRACER_API void roctracer_mark(const char*) {}
ROCTRACER_API roctracer_status_t roctracer_enable_callback(roctracer_rtapi_callback_t callback,
void* user_data) {
for (uint32_t domain = 0; domain < ACTIVITY_DOMAIN_NUMBER; ++domain)
if (auto status =
roctracer_enable_domain_callback((roctracer_domain_t)domain, callback, user_data);
status != ROCTRACER_STATUS_SUCCESS)
return status;
return ROCTRACER_STATUS_SUCCESS;
}
ROCTRACER_API roctracer_status_t roctracer_disable_callback() {
for (uint32_t domain = 0; domain < ACTIVITY_DOMAIN_NUMBER; ++domain)
if (auto status = roctracer_disable_domain_callback((roctracer_domain_t)domain);
status != ROCTRACER_STATUS_SUCCESS)
return status;
return ROCTRACER_STATUS_SUCCESS;
}
ROCTRACER_API roctracer_status_t roctracer_enable_activity_expl(roctracer_pool_t* pool) {
for (uint32_t domain = 0; domain < ACTIVITY_DOMAIN_NUMBER; ++domain)
if (auto status = roctracer_enable_domain_activity_expl((roctracer_domain_t)domain, pool);
status != ROCTRACER_STATUS_SUCCESS)
return status;
return ROCTRACER_STATUS_SUCCESS;
}
ROCTRACER_API roctracer_status_t roctracer_enable_activity() {
for (uint32_t domain = 0; domain < ACTIVITY_DOMAIN_NUMBER; ++domain)
if (auto status = roctracer_enable_domain_activity((roctracer_domain_t)domain);
status != ROCTRACER_STATUS_SUCCESS)
return status;
return ROCTRACER_STATUS_SUCCESS;
}
ROCTRACER_API roctracer_status_t roctracer_disable_activity() {
for (uint32_t domain = 0; domain < ACTIVITY_DOMAIN_NUMBER; ++domain)
if (auto status = roctracer_disable_domain_activity((roctracer_domain_t)domain);
status != ROCTRACER_STATUS_SUCCESS)
return status;
return ROCTRACER_STATUS_SUCCESS;
}
} // extern "C"
@@ -0,0 +1,99 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#include "correlation_id.h"
#include "roctracer.h"
#include <atomic>
#include <stack>
#include <vector>
namespace {
// A stack that can be used for TLS variables. TLS destructors are invoked before global destructors
// which is a problem if operations invoked by global destructors use TLS variables. If the TLS
// stack is destructed, it still has well defined behavior by always returning a dummy element.
template <typename T> class Stack : std::stack<T, std::vector<T>> {
using parent_type = typename std::stack<T, std::vector<T>>;
public:
Stack() { valid_.store(true, std::memory_order_relaxed); }
~Stack() { valid_.store(false, std::memory_order_relaxed); }
template <class... Args> auto& emplace(Args&&... args) {
return is_valid() ? parent_type::emplace(std::forward<Args>(args)...)
: dummy_element_ = T(std::forward<Args>(args)...);
}
void push(const T& v) {
if (is_valid()) parent_type::push(v);
}
void push(T&& v) {
if (is_valid()) parent_type::push(std::move(v));
}
void pop() {
if (is_valid()) parent_type::pop();
}
const auto& top() const { return is_valid() ? parent_type::top() : dummy_element_; }
auto& top() { return is_valid() ? parent_type::top() : (dummy_element_ = {}); }
bool is_valid() const { return valid_.load(std::memory_order_relaxed); }
size_t size() const { return is_valid() ? parent_type::size() : 0; }
bool empty() const { return size() == 0; }
private:
std::atomic<bool> valid_{false};
T dummy_element_; // Dummy element used when the stack is not valid.
};
thread_local Stack<activity_correlation_id_t> correlation_id_stack{};
thread_local Stack<activity_correlation_id_t> external_id_stack{};
} // namespace
namespace roctracer {
activity_correlation_id_t CorrelationIdPush() {
static std::atomic<uint64_t> counter{1};
return correlation_id_stack.emplace(counter.fetch_add(1, std::memory_order_relaxed));
}
void CorrelationIdPop() { correlation_id_stack.pop(); }
activity_correlation_id_t CorrelationId() {
return correlation_id_stack.empty() ? 0 : correlation_id_stack.top();
}
void ExternalCorrelationIdPush(activity_correlation_id_t external_id) {
external_id_stack.push(external_id);
}
std::optional<activity_correlation_id_t> ExternalCorrelationIdPop() {
if (external_id_stack.empty()) return std::nullopt;
auto external_id = external_id_stack.top();
external_id_stack.pop();
return std::make_optional(external_id);
}
std::optional<activity_correlation_id_t> ExternalCorrelationId() {
return external_id_stack.empty() ? std::nullopt : std::make_optional(external_id_stack.top());
}
} // namespace roctracer
@@ -0,0 +1,50 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#pragma once
#include "roctracer.h"
#include <optional>
namespace roctracer {
// Start a new correlation ID region and push it onto the thread local stack. Correlation ID
// regions are nested and per-thread.
activity_correlation_id_t CorrelationIdPush();
// Stop the current correlation ID region and pop it from the thread local stack.
void CorrelationIdPop();
// Return the ID currently active correlation ID region, or 0 if no regin is active.
activity_correlation_id_t CorrelationId();
// Start a new external correlation ID region for the given \p external_id. As for the internal
// correlation ID regions, external correlation ID regions are nested and per-thread.
void ExternalCorrelationIdPush(activity_correlation_id_t external_id);
// Stop the current external correlation ID region and return the external_id used to start the
// region. Return a nullopt if no region was active.
std::optional<activity_correlation_id_t> ExternalCorrelationIdPop();
// Return the current external correlation ID or nullopt is no region is active.
std::optional<activity_correlation_id_t> ExternalCorrelationId();
} // namespace roctracer
+51
Féach ar an gComhad
@@ -0,0 +1,51 @@
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#ifndef EXCEPTION_H_
#define EXCEPTION_H_
#include <sstream>
#include <stdexcept>
#include <string>
#include <sstream>
#define EXC_RAISING(error, stream) \
do { \
std::ostringstream oss; \
oss << __FUNCTION__ << "(), " << stream; \
throw roctracer::ApiError(error, oss.str()); \
} while (false)
namespace roctracer {
class ApiError : public std::runtime_error {
public:
explicit ApiError(roctracer_status_t status, const std::string& what_arg)
: std::runtime_error(what_arg), status_(status) {}
roctracer_status_t status() const noexcept { return status_; }
private:
const roctracer_status_t status_;
};
} // namespace roctracer
#endif // EXCEPTION_H_
+48
Féach ar an gComhad
@@ -0,0 +1,48 @@
ROCTRACER_4.0 {
global: OnLoad;
OnUnload;
roctracer_activity_pop_external_correlation_id;
roctracer_activity_push_external_correlation_id;
roctracer_close_pool_expl;
roctracer_default_pool_expl;
roctracer_disable_activity;
roctracer_disable_callback;
roctracer_disable_domain_activity;
roctracer_disable_domain_callback;
roctracer_disable_op_activity;
roctracer_disable_op_callback;
roctracer_enable_activity_expl;
roctracer_enable_callback;
roctracer_enable_domain_activity_expl;
roctracer_enable_domain_callback;
roctracer_enable_op_activity_expl;
roctracer_enable_op_callback;
roctracer_error_string;
roctracer_flush_activity_expl;
roctracer_flush_buf;
roctracer_get_timestamp;
roctracer_load;
roctracer_mark;
roctracer_op_code;
roctracer_open_pool_expl;
roctracer_op_string;
roctracer_set_properties;
roctracer_start;
roctracer_stop;
roctracer_unload;
roctracer_version_major;
roctracer_version_minor;
local: *;
};
ROCTRACER_4.1 {
global: HSA_AMD_TOOL_PRIORITY;
roctracer_close_pool;
roctracer_default_pool;
roctracer_enable_activity;
roctracer_enable_domain_activity;
roctracer_enable_op_activity;
roctracer_flush_activity;
roctracer_next_record;
roctracer_open_pool;
} ROCTRACER_4.0;
+2
Féach ar an gComhad
@@ -0,0 +1,2 @@
#include <hip/hip_runtime_api.h>
#include <hip/hip_deprecated.h>
+679
Féach ar an gComhad
@@ -0,0 +1,679 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#include "hsa_support.h"
#include "correlation_id.h"
#include "debug.h"
#include "exception.h"
#include "memory_pool.h"
#include "roctracer.h"
#include "roctracer_hsa.h"
#include <atomic>
#include <hsa/hsa.h>
#include <hsa/amd_hsa_signal.h>
#include <hsa/hsa_ven_amd_loader.h>
#include <unordered_map>
#include <optional>
#include <mutex>
namespace {
std::atomic<int (*)(activity_domain_t domain, uint32_t operation_id, void* data)> report_activity;
bool IsEnabled(activity_domain_t domain, uint32_t operation_id) {
auto report = report_activity.load(std::memory_order_relaxed);
return report && report(domain, operation_id, nullptr) == 0;
}
void ReportActivity(activity_domain_t domain, uint32_t operation_id, void* data) {
if (auto report = report_activity.load(std::memory_order_relaxed))
report(domain, operation_id, data);
}
} // namespace
#include "hsa_prof_str.inline.h"
namespace roctracer::hsa_support {
namespace {
CoreApiTable saved_core_api{};
AmdExtTable saved_amd_ext_api{};
hsa_ven_amd_loader_1_01_pfn_t hsa_loader_api{};
struct AgentInfo {
uint32_t id;
hsa_device_type_t type;
};
std::unordered_map<decltype(hsa_agent_t::handle), AgentInfo> agent_info_map;
class Tracker {
public:
enum { ENTRY_INV = 0, ENTRY_INIT = 1, ENTRY_COMPL = 2 };
enum entry_type_t {
DFLT_ENTRY_TYPE = 0,
API_ENTRY_TYPE = 1,
COPY_ENTRY_TYPE = 2,
KERNEL_ENTRY_TYPE = 3,
NUM_ENTRY_TYPE = 4
};
struct entry_t {
std::atomic<uint32_t> valid;
entry_type_t type;
uint64_t correlation_id;
roctracer_timestamp_t begin; // begin timestamp, ns
roctracer_timestamp_t end; // end timestamp, ns
hsa_agent_t agent;
uint32_t dev_index;
hsa_signal_t orig;
hsa_signal_t signal;
void (*handler)(const entry_t*);
union {
struct {
} copy;
struct {
const char* name;
hsa_agent_t agent;
uint32_t tid;
} kernel;
};
};
// Add tracker entry
inline static void Enable(entry_type_t type, const hsa_agent_t& agent, const hsa_signal_t& signal,
entry_t* entry) {
hsa_status_t status = HSA_STATUS_ERROR;
// Creating a new tracker entry
entry->type = type;
entry->agent = agent;
entry->dev_index = 0; // hsa_rsrc->GetAgentInfo(agent)->dev_index;
entry->orig = signal;
entry->valid.store(ENTRY_INIT, std::memory_order_release);
// Creating a proxy signal
status = saved_core_api.hsa_signal_create_fn(1, 0, NULL, &(entry->signal));
if (status != HSA_STATUS_SUCCESS) fatal("hsa_signal_create failed");
status = saved_amd_ext_api.hsa_amd_signal_async_handler_fn(
entry->signal, HSA_SIGNAL_CONDITION_LT, 1, Handler, entry);
if (status != HSA_STATUS_SUCCESS) fatal("hsa_amd_signal_async_handler failed");
}
// Delete tracker entry
inline static void Disable(entry_t* entry) {
saved_core_api.hsa_signal_destroy_fn(entry->signal);
entry->valid.store(ENTRY_INV, std::memory_order_release);
}
private:
// Entry completion
inline static void Complete(hsa_signal_value_t signal_value, entry_t* entry) {
static roctracer_timestamp_t sysclock_period = []() {
uint64_t sysclock_hz = 0;
hsa_status_t status =
saved_core_api.hsa_system_get_info_fn(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sysclock_hz);
if (status != HSA_STATUS_SUCCESS) fatal("hsa_system_get_info failed");
return (uint64_t)1000000000 / sysclock_hz;
}();
if (entry->type == COPY_ENTRY_TYPE) {
hsa_amd_profiling_async_copy_time_t async_copy_time{};
hsa_status_t status = saved_amd_ext_api.hsa_amd_profiling_get_async_copy_time_fn(
entry->signal, &async_copy_time);
if (status != HSA_STATUS_SUCCESS) fatal("hsa_amd_profiling_get_async_copy_time failed");
entry->begin = async_copy_time.start * sysclock_period;
entry->end = async_copy_time.end * sysclock_period;
} else {
assert(false && "should not reach here");
}
hsa_signal_t orig = entry->orig;
hsa_signal_t signal = entry->signal;
// Releasing completed entry
entry->valid.store(ENTRY_COMPL, std::memory_order_release);
assert(entry->handler != nullptr);
entry->handler(entry);
// Original intercepted signal completion
if (orig.handle) {
amd_signal_t* orig_signal_ptr = reinterpret_cast<amd_signal_t*>(orig.handle);
amd_signal_t* prof_signal_ptr = reinterpret_cast<amd_signal_t*>(signal.handle);
orig_signal_ptr->start_ts = prof_signal_ptr->start_ts;
orig_signal_ptr->end_ts = prof_signal_ptr->end_ts;
[[maybe_unused]] const hsa_signal_value_t new_value =
saved_core_api.hsa_signal_load_relaxed_fn(orig) - 1;
assert(signal_value == new_value && "Tracker::Complete bad signal value");
saved_core_api.hsa_signal_store_screlease_fn(orig, signal_value);
}
saved_core_api.hsa_signal_destroy_fn(signal);
delete entry;
}
// Handler for packet completion
static bool Handler(hsa_signal_value_t signal_value, void* arg) {
// Acquire entry
entry_t* entry = reinterpret_cast<entry_t*>(arg);
while (entry->valid.load(std::memory_order_acquire) != ENTRY_INIT) sched_yield();
// Complete entry
Tracker::Complete(signal_value, entry);
return false;
}
};
hsa_status_t HSA_API MemoryAllocateIntercept(hsa_region_t region, size_t size, void** ptr) {
hsa_status_t status = saved_core_api.hsa_memory_allocate_fn(region, size, ptr);
if (status != HSA_STATUS_SUCCESS) return status;
if (IsEnabled(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_ALLOCATE)) {
hsa_evt_data_t data{};
data.allocate.ptr = *ptr;
data.allocate.size = size;
if (saved_core_api.hsa_region_get_info_fn(region, HSA_REGION_INFO_SEGMENT,
&data.allocate.segment) != HSA_STATUS_SUCCESS ||
saved_core_api.hsa_region_get_info_fn(region, HSA_REGION_INFO_GLOBAL_FLAGS,
&data.allocate.global_flag) != HSA_STATUS_SUCCESS)
fatal("hsa_region_get_info failed");
ReportActivity(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_ALLOCATE, &data);
}
return HSA_STATUS_SUCCESS;
}
hsa_status_t MemoryAssignAgentIntercept(void* ptr, hsa_agent_t agent,
hsa_access_permission_t access) {
hsa_status_t status = saved_core_api.hsa_memory_assign_agent_fn(ptr, agent, access);
if (status != HSA_STATUS_SUCCESS) return status;
if (IsEnabled(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_DEVICE)) {
hsa_evt_data_t data{};
data.device.ptr = ptr;
if (saved_core_api.hsa_agent_get_info_fn(agent, HSA_AGENT_INFO_DEVICE, &data.device.type) !=
HSA_STATUS_SUCCESS)
fatal("hsa_agent_get_info failed");
ReportActivity(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_DEVICE, &data);
}
return HSA_STATUS_SUCCESS;
}
hsa_status_t MemoryCopyIntercept(void* dst, const void* src, size_t size) {
hsa_status_t status = saved_core_api.hsa_memory_copy_fn(dst, src, size);
if (status != HSA_STATUS_SUCCESS) return status;
if (IsEnabled(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_MEMCOPY)) {
hsa_evt_data_t data{};
data.memcopy.dst = dst;
data.memcopy.src = src;
data.memcopy.size = size;
ReportActivity(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_MEMCOPY, &data);
}
return HSA_STATUS_SUCCESS;
}
hsa_status_t MemoryPoolAllocateIntercept(hsa_amd_memory_pool_t pool, size_t size, uint32_t flags,
void** ptr) {
hsa_status_t status = saved_amd_ext_api.hsa_amd_memory_pool_allocate_fn(pool, size, flags, ptr);
if (size == 0 || status != HSA_STATUS_SUCCESS) return status;
if (IsEnabled(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_ALLOCATE)) {
hsa_evt_data_t data{};
data.allocate.ptr = *ptr;
data.allocate.size = size;
if (saved_amd_ext_api.hsa_amd_memory_pool_get_info_fn(
pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &data.allocate.segment) != HSA_STATUS_SUCCESS ||
saved_amd_ext_api.hsa_amd_memory_pool_get_info_fn(
pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &data.allocate.global_flag) !=
HSA_STATUS_SUCCESS)
fatal("hsa_region_get_info failed");
ReportActivity(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_ALLOCATE, &data);
}
if (IsEnabled(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_DEVICE)) {
auto callback_data = std::make_pair(pool, ptr);
auto agent_callback = [](hsa_agent_t agent, void* iterate_agent_callback_data) {
auto [pool, ptr] = *reinterpret_cast<decltype(callback_data)*>(iterate_agent_callback_data);
if (hsa_amd_memory_pool_access_t value;
saved_amd_ext_api.hsa_amd_agent_memory_pool_get_info_fn(
agent, pool, HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS, &value) != HSA_STATUS_SUCCESS ||
value != HSA_AMD_MEMORY_POOL_ACCESS_ALLOWED_BY_DEFAULT)
return HSA_STATUS_SUCCESS;
auto it = agent_info_map.find(agent.handle);
if (it == agent_info_map.end()) fatal("agent was not found in the agent_info map");
hsa_evt_data_t data{};
data.device.type = it->second.type;
data.device.id = it->second.id;
data.device.agent = agent;
data.device.ptr = ptr;
ReportActivity(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_DEVICE, &data);
return HSA_STATUS_SUCCESS;
};
saved_core_api.hsa_iterate_agents_fn(agent_callback, &callback_data);
}
return HSA_STATUS_SUCCESS;
}
hsa_status_t MemoryPoolFreeIntercept(void* ptr) {
if (IsEnabled(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_ALLOCATE)) {
hsa_evt_data_t data{};
data.allocate.ptr = ptr;
data.allocate.size = 0;
ReportActivity(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_ALLOCATE, &data);
}
return saved_amd_ext_api.hsa_amd_memory_pool_free_fn(ptr);
}
// Agent allow access callback 'hsa_amd_agents_allow_access'
hsa_status_t AgentsAllowAccessIntercept(uint32_t num_agents, const hsa_agent_t* agents,
const uint32_t* flags, const void* ptr) {
hsa_status_t status =
saved_amd_ext_api.hsa_amd_agents_allow_access_fn(num_agents, agents, flags, ptr);
if (status != HSA_STATUS_SUCCESS) return status;
if (IsEnabled(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_DEVICE)) {
while (num_agents--) {
hsa_agent_t agent = *agents++;
auto it = agent_info_map.find(agent.handle);
if (it == agent_info_map.end()) fatal("agent was not found in the agent_info map");
hsa_evt_data_t data{};
data.device.type = it->second.type;
data.device.id = it->second.id;
data.device.agent = agent;
data.device.ptr = ptr;
ReportActivity(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_DEVICE, &data);
}
}
return HSA_STATUS_SUCCESS;
}
struct CodeObjectCallbackArg {
activity_rtapi_callback_t callback_fun;
void* callback_arg;
bool unload;
};
hsa_status_t CodeObjectCallback(hsa_executable_t executable,
hsa_loaded_code_object_t loaded_code_object, void* arg) {
hsa_evt_data_t data{};
if (hsa_loader_api.hsa_ven_amd_loader_loaded_code_object_get_info(
loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_TYPE,
&data.codeobj.storage_type) != HSA_STATUS_SUCCESS)
fatal("hsa_ven_amd_loader_loaded_code_object_get_info failed");
if (data.codeobj.storage_type == HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_FILE) {
if (hsa_loader_api.hsa_ven_amd_loader_loaded_code_object_get_info(
loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_FILE,
&data.codeobj.storage_file) != HSA_STATUS_SUCCESS ||
data.codeobj.storage_file == -1)
fatal("hsa_ven_amd_loader_loaded_code_object_get_info failed");
data.codeobj.memory_base = data.codeobj.memory_size = 0;
} else if (data.codeobj.storage_type == HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY) {
if (hsa_loader_api.hsa_ven_amd_loader_loaded_code_object_get_info(
loaded_code_object,
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_MEMORY_BASE,
&data.codeobj.memory_base) != HSA_STATUS_SUCCESS ||
hsa_loader_api.hsa_ven_amd_loader_loaded_code_object_get_info(
loaded_code_object,
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_MEMORY_SIZE,
&data.codeobj.memory_size) != HSA_STATUS_SUCCESS)
fatal("hsa_ven_amd_loader_loaded_code_object_get_info failed");
data.codeobj.storage_file = -1;
} else if (data.codeobj.storage_type == HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_NONE) {
return HSA_STATUS_SUCCESS; // FIXME: do we really not care about these code objects?
} else {
fatal("unknown code object storage type: %d", data.codeobj.storage_type);
}
if (hsa_loader_api.hsa_ven_amd_loader_loaded_code_object_get_info(
loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_BASE,
&data.codeobj.load_base) != HSA_STATUS_SUCCESS ||
hsa_loader_api.hsa_ven_amd_loader_loaded_code_object_get_info(
loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_SIZE,
&data.codeobj.load_size) != HSA_STATUS_SUCCESS ||
hsa_loader_api.hsa_ven_amd_loader_loaded_code_object_get_info(
loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_DELTA,
&data.codeobj.load_delta) != HSA_STATUS_SUCCESS)
fatal("hsa_ven_amd_loader_loaded_code_object_get_info failed");
if (hsa_loader_api.hsa_ven_amd_loader_loaded_code_object_get_info(
loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI_LENGTH,
&data.codeobj.uri_length) != HSA_STATUS_SUCCESS)
fatal("hsa_ven_amd_loader_loaded_code_object_get_info failed");
std::string uri_str(data.codeobj.uri_length, '\0');
if (hsa_loader_api.hsa_ven_amd_loader_loaded_code_object_get_info(
loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI, uri_str.data()) !=
HSA_STATUS_SUCCESS)
fatal("hsa_ven_amd_loader_loaded_code_object_get_info failed");
data.codeobj.uri = uri_str.c_str();
data.codeobj.unload = *static_cast<bool*>(arg) ? 1 : 0;
ReportActivity(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_CODEOBJ, &data);
return HSA_STATUS_SUCCESS;
}
hsa_status_t ExecutableFreezeIntercept(hsa_executable_t executable, const char* options) {
hsa_status_t status = saved_core_api.hsa_executable_freeze_fn(executable, options);
if (status != HSA_STATUS_SUCCESS) return status;
if (IsEnabled(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_CODEOBJ)) {
bool unload = false;
hsa_loader_api.hsa_ven_amd_loader_executable_iterate_loaded_code_objects(
executable, CodeObjectCallback, &unload);
}
return HSA_STATUS_SUCCESS;
}
hsa_status_t ExecutableDestroyIntercept(hsa_executable_t executable) {
if (IsEnabled(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_CODEOBJ)) {
bool unload = true;
hsa_loader_api.hsa_ven_amd_loader_executable_iterate_loaded_code_objects(
executable, CodeObjectCallback, &unload);
}
return saved_core_api.hsa_executable_destroy_fn(executable);
}
std::atomic<bool> profiling_async_copy_enable{false};
hsa_status_t ProfilingAsyncCopyEnableIntercept(bool enable) {
hsa_status_t status = saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn(enable);
if (status == HSA_STATUS_SUCCESS) {
profiling_async_copy_enable.exchange(enable, std::memory_order_release);
}
return status;
}
void MemoryASyncCopyHandler(const Tracker::entry_t* entry) {
activity_record_t record{};
record.domain = ACTIVITY_DOMAIN_HSA_OPS;
record.op = HSA_OP_ID_COPY;
record.begin_ns = entry->begin;
record.end_ns = entry->end;
record.device_id = 0;
record.correlation_id = entry->correlation_id;
ReportActivity(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_COPY, &record);
}
hsa_status_t MemoryASyncCopyOnEngineIntercept(
void* dst, hsa_agent_t dst_agent, const void* src, hsa_agent_t src_agent, size_t size,
uint32_t num_dep_signals, const hsa_signal_t* dep_signals, hsa_signal_t completion_signal,
hsa_amd_sdma_engine_id_t engine_id, bool force_copy_on_sdma) {
bool is_enabled = IsEnabled(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_COPY);
// FIXME: what happens if the state changes before returning?
[[maybe_unused]] hsa_status_t status = saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn(
profiling_async_copy_enable.load(std::memory_order_relaxed) || is_enabled);
assert(status == HSA_STATUS_SUCCESS && "hsa_amd_profiling_async_copy_enable failed");
if (!is_enabled) {
return saved_amd_ext_api.hsa_amd_memory_async_copy_on_engine_fn(
dst, dst_agent, src, src_agent, size, num_dep_signals, dep_signals, completion_signal,
engine_id, force_copy_on_sdma);
}
Tracker::entry_t* entry = new Tracker::entry_t();
entry->handler = MemoryASyncCopyHandler;
entry->correlation_id = CorrelationId();
Tracker::Enable(Tracker::COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry);
status = saved_amd_ext_api.hsa_amd_memory_async_copy_on_engine_fn(
dst, dst_agent, src, src_agent, size, num_dep_signals, dep_signals, entry->signal, engine_id,
force_copy_on_sdma);
if (status != HSA_STATUS_SUCCESS) Tracker::Disable(entry);
return status;
}
hsa_status_t MemoryASyncCopyIntercept(void* dst, hsa_agent_t dst_agent, const void* src,
hsa_agent_t src_agent, size_t size, uint32_t num_dep_signals,
const hsa_signal_t* dep_signals,
hsa_signal_t completion_signal) {
bool is_enabled = IsEnabled(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_COPY);
// FIXME: what happens if the state changes before returning?
[[maybe_unused]] hsa_status_t status = saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn(
profiling_async_copy_enable.load(std::memory_order_relaxed) || is_enabled);
assert(status == HSA_STATUS_SUCCESS && "hsa_amd_profiling_async_copy_enable failed");
if (!is_enabled) {
return saved_amd_ext_api.hsa_amd_memory_async_copy_fn(
dst, dst_agent, src, src_agent, size, num_dep_signals, dep_signals, completion_signal);
}
Tracker::entry_t* entry = new Tracker::entry_t();
entry->handler = MemoryASyncCopyHandler;
entry->correlation_id = CorrelationId();
Tracker::Enable(Tracker::COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry);
status = saved_amd_ext_api.hsa_amd_memory_async_copy_fn(
dst, dst_agent, src, src_agent, size, num_dep_signals, dep_signals, entry->signal);
if (status != HSA_STATUS_SUCCESS) Tracker::Disable(entry);
return status;
}
hsa_status_t MemoryASyncCopyRectIntercept(const hsa_pitched_ptr_t* dst,
const hsa_dim3_t* dst_offset,
const hsa_pitched_ptr_t* src,
const hsa_dim3_t* src_offset, const hsa_dim3_t* range,
hsa_agent_t copy_agent, hsa_amd_copy_direction_t dir,
uint32_t num_dep_signals, const hsa_signal_t* dep_signals,
hsa_signal_t completion_signal) {
bool is_enabled = IsEnabled(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_COPY);
// FIXME: what happens if the state changes before returning?
[[maybe_unused]] hsa_status_t status = saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn(
profiling_async_copy_enable.load(std::memory_order_relaxed) || is_enabled);
assert(status == HSA_STATUS_SUCCESS && "hsa_amd_profiling_async_copy_enable failed");
if (!is_enabled) {
return saved_amd_ext_api.hsa_amd_memory_async_copy_rect_fn(
dst, dst_offset, src, src_offset, range, copy_agent, dir, num_dep_signals, dep_signals,
completion_signal);
}
Tracker::entry_t* entry = new Tracker::entry_t();
entry->handler = MemoryASyncCopyHandler;
entry->correlation_id = CorrelationId();
Tracker::Enable(Tracker::COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry);
status = saved_amd_ext_api.hsa_amd_memory_async_copy_rect_fn(
dst, dst_offset, src, src_offset, range, copy_agent, dir, num_dep_signals, dep_signals,
entry->signal);
if (status != HSA_STATUS_SUCCESS) Tracker::Disable(entry);
return status;
}
} // namespace
roctracer_timestamp_t timestamp_ns() {
// If the HSA intercept is installed, then use the "original" 'hsa_system_get_info' function to
// avoid reporting calls for internal use of the HSA API by the tracer.
auto hsa_system_get_info_fn = saved_core_api.hsa_system_get_info_fn;
// If the HSA intercept is not installed, use the default 'hsa_system_get_info'.
if (hsa_system_get_info_fn == nullptr) hsa_system_get_info_fn = hsa_system_get_info;
uint64_t sysclock;
if (hsa_status_t status = hsa_system_get_info_fn(HSA_SYSTEM_INFO_TIMESTAMP, &sysclock);
status == HSA_STATUS_ERROR_NOT_INITIALIZED)
return 0;
else if (status != HSA_STATUS_SUCCESS)
fatal("hsa_system_get_info failed");
static uint64_t sysclock_period = [&]() {
uint64_t sysclock_hz = 0;
if (hsa_status_t status =
hsa_system_get_info_fn(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sysclock_hz);
status != HSA_STATUS_SUCCESS)
fatal("hsa_system_get_info failed");
return (uint64_t)1000000000 / sysclock_hz;
}();
return sysclock * sysclock_period;
}
void Initialize(HsaApiTable* table) {
// Save the HSA core api and amd_ext api.
saved_core_api = *table->core_;
saved_amd_ext_api = *table->amd_ext_;
// Enumerate the agents.
if (hsa_support::saved_core_api.hsa_iterate_agents_fn(
[](hsa_agent_t agent, void* data) {
hsa_support::AgentInfo agent_info;
if (hsa_support::saved_core_api.hsa_agent_get_info_fn(
agent, HSA_AGENT_INFO_DEVICE, &agent_info.type) != HSA_STATUS_SUCCESS)
fatal("hsa_agent_get_info failed");
switch (agent_info.type) {
case HSA_DEVICE_TYPE_CPU:
static int cpu_agent_count = 0;
agent_info.id = cpu_agent_count++;
break;
case HSA_DEVICE_TYPE_GPU: {
uint32_t driver_node_id;
if (hsa_support::saved_core_api.hsa_agent_get_info_fn(
agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_DRIVER_NODE_ID),
&driver_node_id) != HSA_STATUS_SUCCESS)
fatal("hsa_agent_get_info failed");
agent_info.id = driver_node_id;
} break;
default:
static int other_agent_count = 0;
agent_info.id = other_agent_count++;
break;
}
hsa_support::agent_info_map.emplace(agent.handle, agent_info);
return HSA_STATUS_SUCCESS;
},
nullptr) != HSA_STATUS_SUCCESS)
fatal("hsa_iterate_agents failed");
// Install the code object intercept.
hsa_status_t status = table->core_->hsa_system_get_major_extension_table_fn(
HSA_EXTENSION_AMD_LOADER, 1, sizeof(hsa_ven_amd_loader_1_01_pfn_t), &hsa_loader_api);
if (status != HSA_STATUS_SUCCESS) fatal("hsa_system_get_major_extension_table failed");
// Install the HSA_OPS intercept
table->amd_ext_->hsa_amd_memory_async_copy_fn = MemoryASyncCopyIntercept;
table->amd_ext_->hsa_amd_memory_async_copy_rect_fn = MemoryASyncCopyRectIntercept;
table->amd_ext_->hsa_amd_memory_async_copy_on_engine_fn = MemoryASyncCopyOnEngineIntercept;
table->amd_ext_->hsa_amd_profiling_async_copy_enable_fn = ProfilingAsyncCopyEnableIntercept;
// Install the HSA_EVT intercept
table->core_->hsa_memory_allocate_fn = MemoryAllocateIntercept;
table->core_->hsa_memory_assign_agent_fn = MemoryAssignAgentIntercept;
table->core_->hsa_memory_copy_fn = MemoryCopyIntercept;
table->amd_ext_->hsa_amd_memory_pool_allocate_fn = MemoryPoolAllocateIntercept;
table->amd_ext_->hsa_amd_memory_pool_free_fn = MemoryPoolFreeIntercept;
table->amd_ext_->hsa_amd_agents_allow_access_fn = AgentsAllowAccessIntercept;
table->core_->hsa_executable_freeze_fn = ExecutableFreezeIntercept;
table->core_->hsa_executable_destroy_fn = ExecutableDestroyIntercept;
// Install the HSA_API wrappers
detail::InstallCoreApiWrappers(table->core_);
detail::InstallAmdExtWrappers(table->amd_ext_);
detail::InstallImageExtWrappers(table->image_ext_);
}
void Finalize() {
if (hsa_status_t status =
saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn(profiling_async_copy_enable.load(std::memory_order_relaxed));
status != HSA_STATUS_SUCCESS)
assert(!"hsa_amd_profiling_async_copy_enable failed");
memset(&saved_core_api, '\0', sizeof(saved_core_api));
memset(&saved_amd_ext_api, '\0', sizeof(saved_amd_ext_api));
memset(&hsa_loader_api, '\0', sizeof(hsa_loader_api));
}
const char* GetApiName(uint32_t id) { return detail::GetApiName(id); }
const char* GetEvtName(uint32_t id) {
switch (id) {
case HSA_EVT_ID_ALLOCATE:
return "ALLOCATE";
case HSA_EVT_ID_DEVICE:
return "DEVICE";
case HSA_EVT_ID_MEMCOPY:
return "MEMCOPY";
case HSA_EVT_ID_SUBMIT:
return "SUBMIT";
case HSA_EVT_ID_KSYMBOL:
return "KSYMBOL";
case HSA_EVT_ID_CODEOBJ:
return "CODEOBJ";
case HSA_EVT_ID_NUMBER:
break;
}
throw ApiError(ROCTRACER_STATUS_ERROR_INVALID_ARGUMENT, "invalid HSA EVT callback id");
}
const char* GetOpsName(uint32_t id) {
switch (id) {
case HSA_OP_ID_DISPATCH:
return "DISPATCH";
case HSA_OP_ID_COPY:
return "COPY";
case HSA_OP_ID_BARRIER:
return "BARRIER";
case HSA_OP_ID_RESERVED1:
return "PCSAMPLE";
}
throw ApiError(ROCTRACER_STATUS_ERROR_INVALID_ARGUMENT, "invalid HSA OPS callback id");
}
uint32_t GetApiCode(const char* str) { return detail::GetApiCode(str); }
void RegisterTracerCallback(int (*function)(activity_domain_t domain, uint32_t operation_id,
void* data)) {
report_activity.store(function, std::memory_order_relaxed);
}
} // namespace roctracer::hsa_support
+54
Féach ar an gComhad
@@ -0,0 +1,54 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#ifndef HSA_SUPPORT_H_
#define HSA_SUPPORT_H_
#include "roctracer.h"
#include "roctracer_hsa.h"
#include <hsa/hsa_api_trace.h>
namespace roctracer::hsa_support {
struct hsa_trace_data_t {
hsa_api_data_t api_data;
uint64_t phase_enter_timestamp;
uint64_t phase_data;
void (*phase_enter)(hsa_api_id_t operation_id, hsa_trace_data_t* data);
void (*phase_exit)(hsa_api_id_t operation_id, hsa_trace_data_t* data);
};
void Initialize(HsaApiTable* table);
void Finalize();
const char* GetApiName(uint32_t id);
const char* GetEvtName(uint32_t id);
const char* GetOpsName(uint32_t id);
uint32_t GetApiCode(const char* str);
void RegisterTracerCallback(int (*function)(activity_domain_t domain, uint32_t operation_id,
void* data));
uint64_t timestamp_ns();
} // namespace roctracer::hsa_support
#endif // HSA_SUPPORT_H_
+192
Féach ar an gComhad
@@ -0,0 +1,192 @@
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#ifndef ROCTRACER_LOADER_H_
#define ROCTRACER_LOADER_H_
#include "debug.h"
#include <hip/hip_runtime_api.h>
#include <dlfcn.h>
#include <experimental/filesystem>
#include <link.h>
#include <unistd.h>
namespace fs = std::experimental::filesystem;
namespace roctracer {
// Base loader class
template <typename Loader> class BaseLoader {
protected:
BaseLoader(const char* pattern) {
// Iterate through the process' loaded shared objects and try to dlopen the first entry with a
// file name starting with the given 'pattern'. This allows the loader to acquire a handle
// to the target library iff it is already loaded. The handle is used to query symbols
// exported by that library.
auto callback = [this, pattern](dl_phdr_info* info) {
if (handle_ == nullptr &&
fs::path(info->dlpi_name).filename().string().rfind(pattern, 0) == 0)
handle_ = ::dlopen(info->dlpi_name, RTLD_LAZY);
};
dl_iterate_phdr(
[](dl_phdr_info* info, size_t size, void* data) {
(*reinterpret_cast<decltype(callback)*>(data))(info);
return 0;
},
&callback);
}
~BaseLoader() {
if (handle_ != nullptr) ::dlclose(handle_);
}
BaseLoader(const BaseLoader&) = delete;
BaseLoader& operator=(const BaseLoader&) = delete;
public:
bool IsEnabled() const { return handle_ != nullptr; }
template <typename FunctionPtr> FunctionPtr GetFun(const char* symbol) const {
assert(IsEnabled());
auto function_ptr = reinterpret_cast<FunctionPtr>(::dlsym(handle_, symbol));
if (function_ptr == nullptr) fatal("symbol lookup '%s' failed: %s", symbol, ::dlerror());
return function_ptr;
}
static inline Loader& Instance() {
static Loader instance;
return instance;
}
private:
void* handle_;
};
} // namespace roctracer
// HIP runtime library loader class
namespace roctracer {
#if STATIC_BUILD
__attribute__((weak)) const char* hipKernelNameRef(const hipFunction_t f) { return nullptr; }
__attribute__((weak)) const char* hipKernelNameRefByPtr(const void* hostFunction,
hipStream_t stream) {
return nullptr;
}
__attribute__((weak)) int hipGetStreamDeviceId(hipStream_t stream) { return 0; }
__attribute__((weak)) const char* hipGetCmdName(unsigned op) { return nullptr; }
__attribute__((weak)) const char* hipApiName(uint32_t id) { return nullptr; }
__attribute__((weak)) void hipRegisterTracerCallback(int (*function)(activity_domain_t domain,
uint32_t operation_id,
void* data)) {}
class HipLoader {
private:
HipLoader() {}
public:
bool IsEnabled() const { return true; }
int GetStreamDeviceId(hipStream_t stream) const { return hipGetStreamDeviceId(stream); }
const char* KernelNameRef(const hipFunction_t f) const { return hipKernelNameRef(f); }
const char* KernelNameRefByPtr(const void* host_function, hipStream_t stream = nullptr) const {
return hipKernelNameRefByPtr(host_function, stream);
}
const char* GetOpName(unsigned op) const { return hipGetCmdName(op); }
const char* ApiName(uint32_t id) const { return hipApiName(id); }
void RegisterTracerCallback(int (*callback)(activity_domain_t domain, uint32_t operation_id,
void* data)) const {
return hipRegisterTracerCallback(callback);
}
static inline HipLoader& Instance() {
static HipLoader instance;
return instance;
}
};
#else
class HipLoader : public BaseLoader<HipLoader> {
private:
friend HipLoader& BaseLoader::Instance();
HipLoader() : BaseLoader("libamdhip64.so") {}
public:
int GetStreamDeviceId(hipStream_t stream) const {
static auto function = GetFun<int (*)(hipStream_t stream)>("hipGetStreamDeviceId");
return function(stream);
}
const char* KernelNameRef(const hipFunction_t f) const {
static auto function = GetFun<const char* (*)(const hipFunction_t f)>("hipKernelNameRef");
return function(f);
}
const char* KernelNameRefByPtr(const void* host_function, hipStream_t stream = nullptr) const {
static auto function = GetFun<const char* (*)(const void* hostFunction, hipStream_t stream)>(
"hipKernelNameRefByPtr");
return function(host_function, stream);
}
const char* GetOpName(unsigned op) const {
static auto function = GetFun<const char* (*)(unsigned op)>("hipGetCmdName");
return function(op);
}
const char* ApiName(uint32_t id) const {
static auto function = GetFun<const char* (*)(uint32_t id)>("hipApiName");
return function(id);
}
void RegisterTracerCallback(int (*callback)(activity_domain_t domain, uint32_t operation_id,
void* data)) const {
static auto function = GetFun<void (*)(int (*callback)(
activity_domain_t domain, uint32_t operation_id, void* data))>("hipRegisterTracerCallback");
return function(callback);
}
};
#endif
// ROCTX library loader class
class RocTxLoader : public BaseLoader<RocTxLoader> {
private:
friend RocTxLoader& BaseLoader::Instance();
RocTxLoader() : BaseLoader("libroctx64.so") {}
public:
void RegisterTracerCallback(int (*callback)(activity_domain_t domain, uint32_t operation_id,
void* data)) const {
static auto function =
GetFun<void (*)(int (*callback)(activity_domain_t domain, uint32_t operation_id,
void* data))>("roctxRegisterTracerCallback");
return function(callback);
}
};
} // namespace roctracer
#endif // ROCTRACER_LOADER_H_
+238
Féach ar an gComhad
@@ -0,0 +1,238 @@
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#ifndef MEMORY_POOL_H_
#define MEMORY_POOL_H_
#include "roctracer.h"
#include <cassert>
#include <condition_variable>
#include <cstdlib>
#include <cstddef>
#include <cstring>
#include <future>
#include <mutex>
#include <type_traits>
namespace roctracer {
class MemoryPool {
public:
MemoryPool(const roctracer_properties_t& properties) : properties_(properties) {
// Pool definition: The memory pool is split in 2 buffers of equal size. When first initialized,
// the write pointer points to the first element of the first buffer. When a buffer is full, or
// when Flush() is called, the write pointer moves to the other buffer.
// Each buffer should be large enough to hold at least 2 activity records, as record pairs may
// be written when external correlation ids are used.
const size_t allocation_size =
2 * std::max(2 * sizeof(roctracer_record_t), properties_.buffer_size);
pool_begin_ = nullptr;
AllocateMemory(&pool_begin_, allocation_size);
assert(pool_begin_ != nullptr && "pool allocator failed");
pool_end_ = pool_begin_ + allocation_size;
buffer_begin_ = pool_begin_;
buffer_end_ = buffer_begin_ + properties_.buffer_size;
record_ptr_ = buffer_begin_;
data_ptr_ = buffer_end_;
// Create a consumer thread and wait for it to be ready to accept work.
std::promise<void> ready;
std::future<void> future = ready.get_future();
consumer_thread_ = std::thread(&MemoryPool::ConsumerThreadLoop, this, std::move(ready));
future.wait();
}
~MemoryPool() {
Flush();
// Wait for the previous flush to complete, then send the exit signal.
NotifyConsumerThread(nullptr, nullptr);
consumer_thread_.join();
// Free the pool's buffer memory.
AllocateMemory(&pool_begin_, 0);
}
MemoryPool(const MemoryPool&) = delete;
MemoryPool& operator=(const MemoryPool&) = delete;
template <typename Record, typename Functor = std::function<void(Record& record, const void*)>>
void Write(Record&& record, const void* data, size_t data_size, Functor&& store_data = {}) {
assert(data != nullptr || data_size == 0); // If data is null, then data_size must be 0
std::lock_guard producer_lock(producer_mutex_);
// The amount of memory reserved in the buffer to store data. If the data cannot fit because it
// is larger than the buffer size minus one record, then the data won't be copied into the
// buffer.
size_t reserve_data_size =
data_size <= (properties_.buffer_size - sizeof(Record)) ? data_size : 0;
std::byte* next_record = record_ptr_ + sizeof(Record);
if (next_record > (data_ptr_ - reserve_data_size)) {
NotifyConsumerThread(buffer_begin_, record_ptr_);
SwitchBuffers();
next_record = record_ptr_ + sizeof(Record);
assert(next_record <= buffer_end_ && "buffer size is less then the record size");
}
// Store data in the record. Copy the data first if it fits in the buffer
// (reserve_data_size != 0).
if (reserve_data_size) {
data_ptr_ -= data_size;
::memcpy(data_ptr_, data, data_size);
store_data(record, data_ptr_);
} else if (data != nullptr) {
store_data(record, data);
}
// Store the record into the buffer, and increment the write pointer.
::memcpy(record_ptr_, &record, sizeof(Record));
record_ptr_ = next_record;
// If the data does not fit in the buffer, flush the buffer with the record as is. We don't copy
// the data so we make sure that the record and its data are processed by waiting until the
// flush is complete.
if (data != nullptr && reserve_data_size == 0) {
NotifyConsumerThread(buffer_begin_, record_ptr_);
SwitchBuffers();
{
std::unique_lock consumer_lock(consumer_mutex_);
consumer_cond_.wait(consumer_lock, [this]() { return !consumer_arg_.valid; });
}
}
}
template <typename Record> void Write(Record&& record) {
using DataPtr = void*;
Write(std::forward<Record>(record), DataPtr(nullptr), 0, {});
}
// Flush the records and block until they are all made visible to the client.
void Flush() {
{
std::lock_guard producer_lock(producer_mutex_);
if (record_ptr_ == buffer_begin_) return;
NotifyConsumerThread(buffer_begin_, record_ptr_);
SwitchBuffers();
}
{
// Wait for the current operation to complete.
std::unique_lock consumer_lock(consumer_mutex_);
consumer_cond_.wait(consumer_lock, [this]() { return !consumer_arg_.valid; });
}
}
private:
void SwitchBuffers() {
buffer_begin_ = (buffer_end_ == pool_end_) ? pool_begin_ : buffer_end_;
buffer_end_ = buffer_begin_ + properties_.buffer_size;
record_ptr_ = buffer_begin_;
data_ptr_ = buffer_end_;
}
void ConsumerThreadLoop(std::promise<void> ready) {
std::unique_lock consumer_lock(consumer_mutex_);
// This consumer is now ready to accept work.
ready.set_value();
while (true) {
consumer_cond_.wait(consumer_lock, [this]() { return consumer_arg_.valid; });
// begin == end == nullptr means the thread needs to exit.
if (consumer_arg_.begin == nullptr && consumer_arg_.end == nullptr) break;
properties_.buffer_callback_fun(reinterpret_cast<const char*>(consumer_arg_.begin),
reinterpret_cast<const char*>(consumer_arg_.end),
properties_.buffer_callback_arg);
// Mark this operation as complete (valid=false) and notify all producers that may be
// waiting for this operation to finish, or to start a new operation. See comment below in
// NotifyConsumerThread().
consumer_arg_.valid = false;
consumer_cond_.notify_all();
}
}
void NotifyConsumerThread(const std::byte* data_begin, const std::byte* data_end) {
std::unique_lock consumer_lock(consumer_mutex_);
// If consumer_arg_ is still in use (valid=true), then wait for the consumer thread to finish
// processing the current operation. Multiple producers may wait here, one will be allowed to
// continue once the consumer thread is idle and valid=false. This prevents a race condition
// where operations would be lost if multiple producers could enter this critical section
// (sequentially) before the consumer thread could re-acquire the consumer_mutex_ lock.
consumer_cond_.wait(consumer_lock, [this]() { return !consumer_arg_.valid; });
consumer_arg_.begin = data_begin;
consumer_arg_.end = data_end;
consumer_arg_.valid = true;
consumer_cond_.notify_all();
}
void AllocateMemory(std::byte** ptr, size_t size) const {
if (properties_.alloc_fun != nullptr) {
// Use the custom allocator provided in the properties.
properties_.alloc_fun(reinterpret_cast<char**>(ptr), size, properties_.alloc_arg);
return;
}
// No custom allocator was provided so use the default malloc/realloc/free allocator.
if (*ptr == nullptr) {
*ptr = static_cast<std::byte*>(malloc(size));
} else if (size != 0) {
*ptr = static_cast<std::byte*>(realloc(*ptr, size));
} else {
free(*ptr);
*ptr = nullptr;
}
}
// Properties used to create the memory pool.
const roctracer_properties_t properties_;
// Pool definition
std::byte* pool_begin_;
std::byte* pool_end_;
std::byte* buffer_begin_;
std::byte* buffer_end_;
std::byte* record_ptr_;
std::byte* data_ptr_;
std::mutex producer_mutex_;
// Consumer thread
std::thread consumer_thread_;
struct {
const std::byte* begin;
const std::byte* end;
bool valid = false;
} consumer_arg_;
std::mutex consumer_mutex_;
std::condition_variable consumer_cond_;
};
} // namespace roctracer
#endif // MEMORY_POOL_H_
@@ -0,0 +1,102 @@
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#ifndef UTIL_CALLBACK_TABLE_H_
#define UTIL_CALLBACK_TABLE_H_
#include "ext/prof_protocol.h"
#include <array>
#include <atomic>
#include <cassert>
#include <optional>
#include <shared_mutex>
#include <utility>
namespace roctracer::util {
#if __GNUC__ == 11 || __GNUCC__ == 12
// Starting with gcc-11 (verified with gcc-12 as well), an array out-of-bounds subscript error is
// reported for accessing the registration table element at the operation ID index. Validating the
// index in the function calling Register/Unregister does not quiet the warning/error in release
// builds, so, for gcc-11 and gcc-12, we disable that warning just for this class.
#define IGNORE_GCC_ARRAY_BOUNDS_ERROR 1
#endif // __GNUC__ == 11 || __GNUCC__ == 12
#if IGNORE_GCC_ARRAY_BOUNDS_ERROR
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Warray-bounds"
#endif // IGNORE_GCC_ARRAY_BOUNDS_ERROR
namespace detail {
struct False {
constexpr bool operator()() { return false; }
};
} // namespace detail
// Generic callbacks table
template <typename T, uint32_t N, typename IsStopped = detail::False> class RegistrationTable {
public:
struct table_element_t {
std::atomic<bool> enabled{false};
mutable std::shared_mutex mutex;
T data;
};
template <typename... Args> void Register(uint32_t operation_id, Args... args) {
assert(operation_id < N && "operation_id is out of range");
table_element_t& entry = table_.at(operation_id);
std::unique_lock lock(entry.mutex);
if (!entry.enabled.exchange(true, std::memory_order_relaxed))
registered_count_.fetch_add(1, std::memory_order_relaxed);
entry.data = T{std::forward<Args>(args)...};
}
void Unregister(uint32_t operation_id) {
assert(operation_id < N && "id is out of range");
table_element_t& entry = table_.at(operation_id);
std::unique_lock lock(entry.mutex);
if (entry.enabled.exchange(false, std::memory_order_relaxed))
registered_count_.fetch_sub(1, std::memory_order_relaxed);
}
std::optional<T> Get(uint32_t operation_id) const {
assert(operation_id < N && "id is out of range");
const table_element_t& entry = table_.at(operation_id);
if (!entry.enabled.load(std::memory_order_relaxed) || IsStopped{}()) return std::nullopt;
std::shared_lock lock(entry.mutex);
return entry.enabled.load(std::memory_order_relaxed) ? std::make_optional(entry.data)
: std::nullopt;
}
bool IsEmpty() const { return registered_count_.load(std::memory_order_relaxed) == 0; }
private:
std::atomic<size_t> registered_count_{0};
std::array<table_element_t, N> table_{};
};
#if IGNORE_GCC_ARRAY_BOUNDS_ERROR
#pragma GCC diagnostic pop
#endif // IGNORE_GCC_ARRAY_BOUNDS_ERROR
} // namespace roctracer::util
#endif // UTIL_CALLBACK_TABLE_H_
+894
Féach ar an gComhad
@@ -0,0 +1,894 @@
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#include "roctracer.h"
#include "roctracer_hip.h"
#include "roctracer_ext.h"
#include "roctracer_roctx.h"
#include "roctracer_hsa.h"
#include <assert.h>
#include <dirent.h>
#include <hsa/hsa_api_trace.h>
#include <string.h>
#include <sys/syscall.h>
#include <unistd.h>
#include <atomic>
#include <mutex>
#include <stack>
#include <type_traits>
#include <unordered_map>
#include <vector>
#include "correlation_id.h"
#include "debug.h"
#include "exception.h"
#include "hsa_support.h"
#include "loader.h"
#include "logger.h"
#include "memory_pool.h"
#include "registration_table.h"
#define API_METHOD_PREFIX \
roctracer_status_t err = ROCTRACER_STATUS_SUCCESS; \
try {
#define API_METHOD_SUFFIX \
} \
catch (std::exception & e) { \
ERR_LOGGING(__FUNCTION__ << "(), " << e.what()); \
err = GetExcStatus(e); \
} \
return err;
#define API_METHOD_CATCH(X) \
} \
catch (std::exception & e) { \
ERR_LOGGING(__FUNCTION__ << "(), " << e.what()); \
} \
(void)err; \
return X;
static inline uint32_t GetPid() {
static auto pid = syscall(__NR_getpid);
return pid;
}
static inline uint32_t GetTid() {
static thread_local auto tid = syscall(__NR_gettid);
return tid;
}
using namespace roctracer;
namespace {
///////////////////////////////////////////////////////////////////////////////////////////////////
// Internal library methods
//
roctracer_start_cb_t roctracer_start_cb = nullptr;
roctracer_stop_cb_t roctracer_stop_cb = nullptr;
roctracer_status_t GetExcStatus(const std::exception& e) {
const ApiError* roctracer_exc_ptr = dynamic_cast<const ApiError*>(&e);
return (roctracer_exc_ptr) ? roctracer_exc_ptr->status() : ROCTRACER_STATUS_ERROR;
}
std::mutex registration_mutex;
// Memory pool routines and primitives
std::recursive_mutex memory_pool_mutex;
MemoryPool* default_memory_pool = nullptr;
} // namespace
///////////////////////////////////////////////////////////////////////////////////////////////////
// Public library methods
//
// Returns library version
ROCTRACER_API uint32_t roctracer_version_major() { return ROCTRACER_VERSION_MAJOR; }
ROCTRACER_API uint32_t roctracer_version_minor() { return ROCTRACER_VERSION_MINOR; }
// Returns the last error
ROCTRACER_API const char* roctracer_error_string() {
return strdup(util::Logger::Instance().LastMessage().c_str());
}
// Return Op string by given domain and activity/API codes
// nullptr returned on the error and the library errno is set
ROCTRACER_API const char* roctracer_op_string(uint32_t domain, uint32_t op, uint32_t kind) {
API_METHOD_PREFIX
switch (domain) {
case ACTIVITY_DOMAIN_HSA_API:
return hsa_support::GetApiName(op);
case ACTIVITY_DOMAIN_HSA_EVT:
return hsa_support::GetEvtName(op);
case ACTIVITY_DOMAIN_HSA_OPS:
return hsa_support::GetOpsName(op);
case ACTIVITY_DOMAIN_HIP_OPS:
return HipLoader::Instance().GetOpName(kind);
case ACTIVITY_DOMAIN_HIP_API:
return HipLoader::Instance().ApiName(op);
case ACTIVITY_DOMAIN_EXT_API:
return "EXT_API";
default:
throw roctracer::ApiError(ROCTRACER_STATUS_ERROR_INVALID_DOMAIN_ID, "invalid domain ID");
}
API_METHOD_CATCH(nullptr)
}
// Return Op code and kind by given string
ROCTRACER_API roctracer_status_t roctracer_op_code(uint32_t domain, const char* str, uint32_t* op,
uint32_t* kind) {
API_METHOD_PREFIX
switch (domain) {
case ACTIVITY_DOMAIN_HSA_API: {
*op = hsa_support::GetApiCode(str);
if (*op == HSA_API_ID_NUMBER) {
EXC_RAISING(ROCTRACER_STATUS_ERROR_INVALID_ARGUMENT,
"Invalid API name \"" << str << "\", domain ID(" << domain << ")");
}
if (kind != nullptr) *kind = 0;
break;
}
case ACTIVITY_DOMAIN_HIP_API: {
*op = hipApiIdByName(str);
if (*op == HIP_API_ID_NONE) {
EXC_RAISING(ROCTRACER_STATUS_ERROR_INVALID_ARGUMENT,
"Invalid API name \"" << str << "\", domain ID(" << domain << ")");
}
if (kind != nullptr) *kind = 0;
break;
}
default:
EXC_RAISING(ROCTRACER_STATUS_ERROR_INVALID_DOMAIN_ID, "limited domain ID(" << domain << ")");
}
API_METHOD_SUFFIX
}
namespace {
template <activity_domain_t> struct DomainTraits;
template <> struct DomainTraits<ACTIVITY_DOMAIN_HIP_API> {
using ApiData = hip_api_data_t;
using OperationId = hip_api_id_t;
static constexpr size_t kOpIdBegin = HIP_API_ID_FIRST;
static constexpr size_t kOpIdEnd = HIP_API_ID_LAST + 1;
};
template <> struct DomainTraits<ACTIVITY_DOMAIN_HSA_API> {
using ApiData = hsa_api_data_t;
using OperationId = hsa_api_id_t;
static constexpr size_t kOpIdBegin = 0;
static constexpr size_t kOpIdEnd = HSA_API_ID_NUMBER;
};
template <> struct DomainTraits<ACTIVITY_DOMAIN_ROCTX> {
using ApiData = roctx_api_data_t;
using OperationId = roctx_api_id_t;
static constexpr size_t kOpIdBegin = 0;
static constexpr size_t kOpIdEnd = ROCTX_API_ID_NUMBER;
};
template <> struct DomainTraits<ACTIVITY_DOMAIN_HIP_OPS> {
using OperationId = hip_op_id_t;
static constexpr size_t kOpIdBegin = 0;
static constexpr size_t kOpIdEnd = HIP_OP_ID_NUMBER;
};
template <> struct DomainTraits<ACTIVITY_DOMAIN_HSA_OPS> {
using OperationId = hsa_op_id_t;
static constexpr size_t kOpIdBegin = 0;
static constexpr size_t kOpIdEnd = HSA_OP_ID_NUMBER;
};
template <> struct DomainTraits<ACTIVITY_DOMAIN_HSA_EVT> {
using ApiData = hsa_evt_data_t;
using OperationId = hsa_evt_id_t;
static constexpr size_t kOpIdBegin = 0;
static constexpr size_t kOpIdEnd = HSA_EVT_ID_NUMBER;
};
constexpr uint32_t get_op_begin(activity_domain_t domain) {
switch (domain) {
case ACTIVITY_DOMAIN_HSA_OPS:
return DomainTraits<ACTIVITY_DOMAIN_HSA_OPS>::kOpIdBegin;
case ACTIVITY_DOMAIN_HSA_API:
return DomainTraits<ACTIVITY_DOMAIN_HSA_API>::kOpIdBegin;
case ACTIVITY_DOMAIN_HSA_EVT:
return DomainTraits<ACTIVITY_DOMAIN_HSA_EVT>::kOpIdBegin;
case ACTIVITY_DOMAIN_HIP_OPS:
return DomainTraits<ACTIVITY_DOMAIN_HIP_OPS>::kOpIdBegin;
case ACTIVITY_DOMAIN_HIP_API:
return DomainTraits<ACTIVITY_DOMAIN_HIP_API>::kOpIdBegin;
case ACTIVITY_DOMAIN_ROCTX:
return DomainTraits<ACTIVITY_DOMAIN_ROCTX>::kOpIdBegin;
case ACTIVITY_DOMAIN_EXT_API:
return 0;
default:
throw roctracer::ApiError(ROCTRACER_STATUS_ERROR_INVALID_DOMAIN_ID, "invalid domain ID");
}
}
constexpr uint32_t get_op_end(activity_domain_t domain) {
switch (domain) {
case ACTIVITY_DOMAIN_HSA_OPS:
return DomainTraits<ACTIVITY_DOMAIN_HSA_OPS>::kOpIdEnd;
case ACTIVITY_DOMAIN_HSA_API:
return DomainTraits<ACTIVITY_DOMAIN_HSA_API>::kOpIdEnd;
case ACTIVITY_DOMAIN_HSA_EVT:
return DomainTraits<ACTIVITY_DOMAIN_HSA_EVT>::kOpIdEnd;
case ACTIVITY_DOMAIN_HIP_OPS:
return DomainTraits<ACTIVITY_DOMAIN_HIP_OPS>::kOpIdEnd;
case ACTIVITY_DOMAIN_HIP_API:
return DomainTraits<ACTIVITY_DOMAIN_HIP_API>::kOpIdEnd;
case ACTIVITY_DOMAIN_ROCTX:
return DomainTraits<ACTIVITY_DOMAIN_ROCTX>::kOpIdEnd;
case ACTIVITY_DOMAIN_EXT_API:
return get_op_begin(ACTIVITY_DOMAIN_EXT_API);
default:
throw roctracer::ApiError(ROCTRACER_STATUS_ERROR_INVALID_DOMAIN_ID, "invalid domain ID");
}
}
std::atomic<bool> stopped_status{false};
struct IsStopped {
bool operator()() const { return stopped_status.load(std::memory_order_relaxed); }
};
struct NeverStopped {
constexpr bool operator()() { return false; }
};
using UserCallback = std::pair<activity_rtapi_callback_t, void*>;
template <activity_domain_t domain, typename IsStopped>
using CallbackRegistrationTable =
util::RegistrationTable<UserCallback, DomainTraits<domain>::kOpIdEnd, IsStopped>;
template <activity_domain_t domain, typename IsStopped>
using ActivityRegistrationTable =
util::RegistrationTable<MemoryPool*, DomainTraits<domain>::kOpIdEnd, IsStopped>;
template <activity_domain_t domain> struct ApiTracer {
using ApiData = typename DomainTraits<domain>::ApiData;
using OperationId = typename DomainTraits<domain>::OperationId;
struct TraceData {
ApiData api_data; // API specific data (for example, function arguments).
uint64_t phase_enter_timestamp; // timestamp when phase_enter was executed.
uint64_t phase_data; // data that can be shared between phase_enter and phase_exit.
void (*phase_enter)(OperationId operation_id, TraceData* data);
void (*phase_exit)(OperationId operation_id, TraceData* data);
};
static void Exit(OperationId operation_id, TraceData* trace_data) {
if (auto pool = activity_table.Get(operation_id)) {
assert(trace_data != nullptr);
activity_record_t record{};
record.domain = domain;
record.op = operation_id;
record.correlation_id = trace_data->api_data.correlation_id;
record.begin_ns = trace_data->phase_enter_timestamp;
record.end_ns = hsa_support::timestamp_ns();
record.process_id = GetPid();
record.thread_id = GetTid();
if (auto external_id = ExternalCorrelationId()) {
roctracer_record_t ext_record{};
ext_record.domain = ACTIVITY_DOMAIN_EXT_API;
ext_record.op = ACTIVITY_EXT_OP_EXTERN_ID;
ext_record.correlation_id = record.correlation_id;
ext_record.external_id = *external_id;
// Write the external correlation id record directly followed by the activity record.
(*pool)->Write(std::array<roctracer_record_t, 2>{ext_record, record});
} else {
// Write record to the buffer.
(*pool)->Write(record);
}
}
CorrelationIdPop();
}
static void Exit_UserCallback(OperationId operation_id, TraceData* trace_data) {
if (auto user_callback = callback_table.Get(operation_id)) {
assert(trace_data != nullptr);
trace_data->api_data.phase = ACTIVITY_API_PHASE_EXIT;
user_callback->first(domain, operation_id, &trace_data->api_data, user_callback->second);
}
Exit(operation_id, trace_data);
}
static void Enter_UserCallback(OperationId operation_id, TraceData* trace_data) {
if (auto user_callback = callback_table.Get(operation_id)) {
assert(trace_data != nullptr);
trace_data->api_data.phase = ACTIVITY_API_PHASE_ENTER;
trace_data->api_data.phase_data = &trace_data->phase_data;
user_callback->first(domain, operation_id, &trace_data->api_data, user_callback->second);
trace_data->phase_exit = Exit_UserCallback;
} else {
trace_data->phase_exit = Exit;
}
}
static int Enter(OperationId operation_id, TraceData* trace_data) {
bool callback_enabled = callback_table.Get(operation_id).has_value(),
activity_enabled = activity_table.Get(operation_id).has_value();
if (!callback_enabled && !activity_enabled) return -1;
if (trace_data != nullptr) {
// Generate a new correlation ID.
trace_data->api_data.correlation_id = CorrelationIdPush();
if (activity_enabled) {
trace_data->phase_enter_timestamp = hsa_support::timestamp_ns();
trace_data->phase_enter = nullptr;
trace_data->phase_exit = Exit;
}
if (callback_enabled) {
trace_data->phase_enter = Enter_UserCallback;
trace_data->phase_exit = [](OperationId, TraceData*) { fatal("should not reach here"); };
}
}
return 0;
}
static CallbackRegistrationTable<domain, IsStopped> callback_table;
static ActivityRegistrationTable<domain, IsStopped> activity_table;
};
template <activity_domain_t domain>
CallbackRegistrationTable<domain, IsStopped> ApiTracer<domain>::callback_table;
template <activity_domain_t domain>
ActivityRegistrationTable<domain, IsStopped> ApiTracer<domain>::activity_table;
using HIP_ApiTracer = ApiTracer<ACTIVITY_DOMAIN_HIP_API>;
using HSA_ApiTracer = ApiTracer<ACTIVITY_DOMAIN_HSA_API>;
CallbackRegistrationTable<ACTIVITY_DOMAIN_ROCTX, NeverStopped> roctx_api_callback_table;
ActivityRegistrationTable<ACTIVITY_DOMAIN_HIP_OPS, IsStopped> hip_ops_activity_table;
ActivityRegistrationTable<ACTIVITY_DOMAIN_HSA_OPS, IsStopped> hsa_ops_activity_table;
CallbackRegistrationTable<ACTIVITY_DOMAIN_HSA_EVT, IsStopped> hsa_evt_callback_table;
int TracerCallback(activity_domain_t domain, uint32_t operation_id, void* data) {
switch (domain) {
case ACTIVITY_DOMAIN_HSA_API:
return HSA_ApiTracer::Enter(static_cast<HSA_ApiTracer::OperationId>(operation_id),
static_cast<HSA_ApiTracer::TraceData*>(data));
case ACTIVITY_DOMAIN_HIP_API:
return HIP_ApiTracer::Enter(static_cast<HIP_ApiTracer::OperationId>(operation_id),
static_cast<HIP_ApiTracer::TraceData*>(data));
case ACTIVITY_DOMAIN_HIP_OPS:
if (auto pool = hip_ops_activity_table.Get(operation_id)) {
if (auto record = static_cast<activity_record_t*>(data)) {
// If the record is for a kernel dispatch, write the kernel name in the pool's data,
// and make the record point to it. Older HIP runtimes do not provide a kernel
// name, so record.kernel_name might be null.
if (operation_id == HIP_OP_ID_DISPATCH && record->kernel_name != nullptr)
(*pool)->Write(*record, record->kernel_name, strlen(record->kernel_name) + 1,
[](auto& record, const void* data) {
record.kernel_name = static_cast<const char*>(data);
});
else
(*pool)->Write(*record);
}
return 0;
}
break;
case ACTIVITY_DOMAIN_ROCTX:
if (auto user_callback = roctx_api_callback_table.Get(operation_id)) {
if (auto api_data = static_cast<DomainTraits<ACTIVITY_DOMAIN_ROCTX>::ApiData*>(data))
user_callback->first(ACTIVITY_DOMAIN_ROCTX, operation_id, api_data,
user_callback->second);
return 0;
}
break;
case ACTIVITY_DOMAIN_HSA_OPS:
if (auto pool = hsa_ops_activity_table.Get(operation_id)) {
if (auto record = static_cast<activity_record_t*>(data)) (*pool)->Write(*record);
return 0;
}
break;
case ACTIVITY_DOMAIN_HSA_EVT:
if (auto user_callback = hsa_evt_callback_table.Get(operation_id)) {
if (auto api_data = static_cast<DomainTraits<ACTIVITY_DOMAIN_HSA_EVT>::ApiData*>(data))
user_callback->first(ACTIVITY_DOMAIN_HSA_EVT, operation_id, api_data,
user_callback->second);
return 0;
}
break;
default:
break;
}
return -1;
}
template <typename... Tables> struct RegistrationTableGroup {
private:
bool AllEmpty() const {
return std::apply([](auto&&... tables) { return (tables.IsEmpty() && ...); }, tables_);
}
public:
template <typename Functor1, typename Functor2>
RegistrationTableGroup(Functor1&& engage_tracer, Functor2&& disengage_tracer, Tables&... tables)
: engage_tracer_(std::forward<Functor1>(engage_tracer)),
disengage_tracer_(std::forward<Functor2>(disengage_tracer)),
tables_(tables...) {}
template <typename T, typename... Args>
void Register(T& table, uint32_t operation_id, Args... args) const {
if (AllEmpty()) engage_tracer_();
table.Register(operation_id, std::forward<Args>(args)...);
}
template <typename T> void Unregister(T& table, uint32_t operation_id) const {
table.Unregister(operation_id);
if (AllEmpty()) disengage_tracer_();
}
private:
const std::function<void()> engage_tracer_, disengage_tracer_;
const std::tuple<const Tables&...> tables_;
};
RegistrationTableGroup HSA_registration_group(
[]() { hsa_support::RegisterTracerCallback(TracerCallback); },
[]() { hsa_support::RegisterTracerCallback(nullptr); }, HSA_ApiTracer::callback_table,
HSA_ApiTracer::activity_table, hsa_ops_activity_table, hsa_evt_callback_table);
RegistrationTableGroup HIP_registration_group(
[]() { HipLoader::Instance().RegisterTracerCallback(TracerCallback); },
[]() { HipLoader::Instance().RegisterTracerCallback(nullptr); }, HIP_ApiTracer::callback_table,
HIP_ApiTracer::activity_table, hip_ops_activity_table);
RegistrationTableGroup ROCTX_registration_group(
[]() { RocTxLoader::Instance().RegisterTracerCallback(TracerCallback); },
[]() { RocTxLoader::Instance().RegisterTracerCallback(nullptr); }, roctx_api_callback_table);
} // namespace
// Enable runtime API callbacks
static void roctracer_enable_callback_impl(roctracer_domain_t domain, uint32_t operation_id,
roctracer_rtapi_callback_t callback, void* user_data) {
std::lock_guard lock(registration_mutex);
if (operation_id >= get_op_end(domain) || callback == nullptr)
throw ApiError(ROCTRACER_STATUS_ERROR_INVALID_ARGUMENT, "invalid argument");
switch (domain) {
case ACTIVITY_DOMAIN_HSA_EVT:
HSA_registration_group.Register(hsa_evt_callback_table, operation_id, callback, user_data);
break;
case ACTIVITY_DOMAIN_HSA_API:
HSA_registration_group.Register(HSA_ApiTracer::callback_table, operation_id, callback,
user_data);
break;
case ACTIVITY_DOMAIN_HSA_OPS:
break;
case ACTIVITY_DOMAIN_HIP_API:
if (HipLoader::Instance().IsEnabled())
HIP_registration_group.Register(HIP_ApiTracer::callback_table, operation_id, callback,
user_data);
break;
case ACTIVITY_DOMAIN_HIP_OPS:
break;
case ACTIVITY_DOMAIN_ROCTX:
if (RocTxLoader::Instance().IsEnabled())
ROCTX_registration_group.Register(roctx_api_callback_table, operation_id, callback,
user_data);
break;
default:
EXC_RAISING(ROCTRACER_STATUS_ERROR_INVALID_DOMAIN_ID, "invalid domain ID(" << domain << ")");
}
}
ROCTRACER_API roctracer_status_t roctracer_enable_op_callback(roctracer_domain_t domain,
uint32_t op,
roctracer_rtapi_callback_t callback,
void* user_data) {
API_METHOD_PREFIX
roctracer_enable_callback_impl(domain, op, callback, user_data);
API_METHOD_SUFFIX
}
ROCTRACER_API roctracer_status_t roctracer_enable_domain_callback(
roctracer_domain_t domain, roctracer_rtapi_callback_t callback, void* user_data) {
API_METHOD_PREFIX
const uint32_t op_end = get_op_end(domain);
for (uint32_t op = get_op_begin(domain); op < op_end; ++op)
roctracer_enable_callback_impl(domain, op, callback, user_data);
API_METHOD_SUFFIX
}
// Disable runtime API callbacks
static void roctracer_disable_callback_impl(roctracer_domain_t domain, uint32_t operation_id) {
std::lock_guard lock(registration_mutex);
if (operation_id >= get_op_end(domain))
throw ApiError(ROCTRACER_STATUS_ERROR_INVALID_ARGUMENT, "invalid argument");
switch (domain) {
case ACTIVITY_DOMAIN_HSA_EVT:
HSA_registration_group.Unregister(hsa_evt_callback_table, operation_id);
break;
case ACTIVITY_DOMAIN_HSA_API:
HSA_registration_group.Unregister(HSA_ApiTracer::callback_table, operation_id);
break;
case ACTIVITY_DOMAIN_HSA_OPS:
break;
case ACTIVITY_DOMAIN_HIP_API:
if (HipLoader::Instance().IsEnabled())
HIP_registration_group.Unregister(HIP_ApiTracer::callback_table, operation_id);
break;
case ACTIVITY_DOMAIN_HIP_OPS:
break;
case ACTIVITY_DOMAIN_ROCTX:
if (RocTxLoader::Instance().IsEnabled())
ROCTX_registration_group.Unregister(roctx_api_callback_table, operation_id);
break;
default:
EXC_RAISING(ROCTRACER_STATUS_ERROR_INVALID_DOMAIN_ID, "invalid domain ID(" << domain << ")");
}
}
ROCTRACER_API roctracer_status_t roctracer_disable_op_callback(roctracer_domain_t domain,
uint32_t op) {
API_METHOD_PREFIX
roctracer_disable_callback_impl(domain, op);
API_METHOD_SUFFIX
}
ROCTRACER_API roctracer_status_t roctracer_disable_domain_callback(roctracer_domain_t domain) {
API_METHOD_PREFIX
const uint32_t op_end = get_op_end(domain);
for (uint32_t op = get_op_begin(domain); op < op_end; ++op)
roctracer_disable_callback_impl(domain, op);
API_METHOD_SUFFIX
}
// Return default pool and set new one if parameter pool is not NULL.
ROCTRACER_API roctracer_pool_t* roctracer_default_pool_expl(roctracer_pool_t* pool) {
std::lock_guard lock(memory_pool_mutex);
roctracer_pool_t* p = reinterpret_cast<roctracer_pool_t*>(default_memory_pool);
if (pool != nullptr) default_memory_pool = reinterpret_cast<MemoryPool*>(pool);
return p;
}
ROCTRACER_API roctracer_pool_t* roctracer_default_pool() {
std::lock_guard lock(memory_pool_mutex);
return reinterpret_cast<roctracer_pool_t*>(default_memory_pool);
}
// Open memory pool
static void roctracer_open_pool_impl(const roctracer_properties_t* properties,
roctracer_pool_t** pool) {
std::lock_guard lock(memory_pool_mutex);
if ((pool == nullptr) && (default_memory_pool != nullptr)) {
EXC_RAISING(ROCTRACER_STATUS_ERROR_DEFAULT_POOL_ALREADY_DEFINED, "default pool already set");
}
MemoryPool* p = new MemoryPool(*properties);
if (p == nullptr) EXC_RAISING(ROCTRACER_STATUS_ERROR_MEMORY_ALLOCATION, "MemoryPool() error");
if (pool != nullptr)
*pool = p;
else
default_memory_pool = p;
}
ROCTRACER_API roctracer_status_t roctracer_open_pool_expl(const roctracer_properties_t* properties,
roctracer_pool_t** pool) {
API_METHOD_PREFIX
roctracer_open_pool_impl(properties, pool);
API_METHOD_SUFFIX
}
ROCTRACER_API roctracer_status_t roctracer_open_pool(const roctracer_properties_t* properties) {
API_METHOD_PREFIX
roctracer_open_pool_impl(properties, nullptr);
API_METHOD_SUFFIX
}
ROCTRACER_API roctracer_status_t roctracer_next_record(const activity_record_t* record,
const activity_record_t** next) {
API_METHOD_PREFIX
*next = record + 1;
API_METHOD_SUFFIX
}
// Enable activity records logging
static void roctracer_enable_activity_impl(roctracer_domain_t domain, uint32_t op,
roctracer_pool_t* pool) {
std::lock_guard lock(registration_mutex);
MemoryPool* memory_pool = reinterpret_cast<MemoryPool*>(pool);
if (memory_pool == nullptr) memory_pool = default_memory_pool;
if (memory_pool == nullptr)
EXC_RAISING(ROCTRACER_STATUS_ERROR_DEFAULT_POOL_UNDEFINED, "no default pool");
if (op >= get_op_end(domain))
throw ApiError(ROCTRACER_STATUS_ERROR_INVALID_ARGUMENT, "invalid argument");
switch (domain) {
case ACTIVITY_DOMAIN_HSA_EVT:
break;
case ACTIVITY_DOMAIN_HSA_API:
HSA_registration_group.Register(HSA_ApiTracer::activity_table, op, memory_pool);
break;
case ACTIVITY_DOMAIN_HSA_OPS:
HSA_registration_group.Register(hsa_ops_activity_table, op, memory_pool);
break;
case ACTIVITY_DOMAIN_HIP_API:
if (HipLoader::Instance().IsEnabled())
HIP_registration_group.Register(HIP_ApiTracer::activity_table, op, memory_pool);
break;
case ACTIVITY_DOMAIN_HIP_OPS:
if (HipLoader::Instance().IsEnabled())
HIP_registration_group.Register(hip_ops_activity_table, op, memory_pool);
break;
case ACTIVITY_DOMAIN_ROCTX:
break;
default:
EXC_RAISING(ROCTRACER_STATUS_ERROR_INVALID_DOMAIN_ID, "invalid domain ID(" << domain << ")");
}
}
ROCTRACER_API roctracer_status_t roctracer_enable_op_activity_expl(roctracer_domain_t domain,
uint32_t op,
roctracer_pool_t* pool) {
API_METHOD_PREFIX
roctracer_enable_activity_impl(domain, op, pool);
API_METHOD_SUFFIX
}
ROCTRACER_API roctracer_status_t roctracer_enable_op_activity(activity_domain_t domain,
uint32_t op) {
API_METHOD_PREFIX
roctracer_enable_activity_impl(domain, op, nullptr);
API_METHOD_SUFFIX
}
static void roctracer_enable_domain_activity_impl(roctracer_domain_t domain,
roctracer_pool_t* pool) {
const uint32_t op_end = get_op_end(domain);
for (uint32_t op = get_op_begin(domain); op < op_end; ++op) try {
roctracer_enable_activity_impl(domain, op, pool);
} catch (const ApiError& err) {
if (err.status() != ROCTRACER_STATUS_ERROR_NOT_IMPLEMENTED) throw;
}
}
ROCTRACER_API roctracer_status_t roctracer_enable_domain_activity_expl(roctracer_domain_t domain,
roctracer_pool_t* pool) {
API_METHOD_PREFIX
roctracer_enable_domain_activity_impl(domain, pool);
API_METHOD_SUFFIX
}
ROCTRACER_API roctracer_status_t roctracer_enable_domain_activity(activity_domain_t domain) {
API_METHOD_PREFIX
roctracer_enable_domain_activity_impl(domain, nullptr);
API_METHOD_SUFFIX
}
// Disable activity records logging
static void roctracer_disable_activity_impl(roctracer_domain_t domain, uint32_t op) {
std::lock_guard lock(registration_mutex);
if (op >= get_op_end(domain))
throw ApiError(ROCTRACER_STATUS_ERROR_INVALID_ARGUMENT, "invalid argument");
switch (domain) {
case ACTIVITY_DOMAIN_HSA_EVT:
break;
case ACTIVITY_DOMAIN_HSA_API:
HSA_registration_group.Unregister(HSA_ApiTracer::activity_table, op);
break;
case ACTIVITY_DOMAIN_HSA_OPS:
HSA_registration_group.Unregister(hsa_ops_activity_table, op);
break;
case ACTIVITY_DOMAIN_HIP_API:
if (HipLoader::Instance().IsEnabled())
HIP_registration_group.Unregister(HIP_ApiTracer::activity_table, op);
break;
case ACTIVITY_DOMAIN_HIP_OPS:
if (HipLoader::Instance().IsEnabled())
HIP_registration_group.Unregister(hip_ops_activity_table, op);
break;
case ACTIVITY_DOMAIN_ROCTX:
break;
default:
EXC_RAISING(ROCTRACER_STATUS_ERROR_INVALID_DOMAIN_ID, "invalid domain ID(" << domain << ")");
}
}
ROCTRACER_API roctracer_status_t roctracer_disable_op_activity(roctracer_domain_t domain,
uint32_t op) {
API_METHOD_PREFIX
roctracer_disable_activity_impl(domain, op);
API_METHOD_SUFFIX
}
static void roctracer_disable_domain_activity_impl(roctracer_domain_t domain) {
const uint32_t op_end = get_op_end(domain);
for (uint32_t op = get_op_begin(domain); op < op_end; ++op) try {
roctracer_disable_activity_impl(domain, op);
} catch (const ApiError& err) {
if (err.status() != ROCTRACER_STATUS_ERROR_NOT_IMPLEMENTED) throw;
}
}
ROCTRACER_API roctracer_status_t roctracer_disable_domain_activity(roctracer_domain_t domain) {
API_METHOD_PREFIX
roctracer_disable_domain_activity_impl(domain);
API_METHOD_SUFFIX
}
// Close memory pool
static void roctracer_close_pool_impl(roctracer_pool_t* pool) {
std::lock_guard lock(memory_pool_mutex);
if (pool == nullptr) pool = reinterpret_cast<roctracer_pool_t*>(default_memory_pool);
if (pool == nullptr) return;
MemoryPool* p = reinterpret_cast<MemoryPool*>(pool);
if (p == default_memory_pool) default_memory_pool = nullptr;
#if 0
// Disable any activities that specify the pool being deleted.
std::vector<std::pair<roctracer_domain_t, uint32_t>> ops;
act_journal.ForEach(
[&ops, pool](roctracer_domain_t domain, uint32_t op, const ActivityJournalData& data) {
if (pool == data.pool) ops.emplace_back(domain, op);
return true;
});
for (auto&& [domain, op] : ops) roctracer_disable_activity_impl(domain, op);
#endif
delete (p);
}
ROCTRACER_API roctracer_status_t roctracer_close_pool_expl(roctracer_pool_t* pool) {
API_METHOD_PREFIX
roctracer_close_pool_impl(pool);
API_METHOD_SUFFIX
}
ROCTRACER_API roctracer_status_t roctracer_close_pool() {
API_METHOD_PREFIX
roctracer_close_pool_impl(NULL);
API_METHOD_SUFFIX
}
// Flush available activity records
static void roctracer_flush_activity_impl(roctracer_pool_t* pool) {
if (pool == nullptr) pool = roctracer_default_pool();
MemoryPool* default_memory_pool = reinterpret_cast<MemoryPool*>(pool);
if (default_memory_pool != nullptr) default_memory_pool->Flush();
}
ROCTRACER_API roctracer_status_t roctracer_flush_activity_expl(roctracer_pool_t* pool) {
API_METHOD_PREFIX
roctracer_flush_activity_impl(pool);
API_METHOD_SUFFIX
}
ROCTRACER_API roctracer_status_t roctracer_flush_activity() {
API_METHOD_PREFIX
roctracer_flush_activity_impl(nullptr);
API_METHOD_SUFFIX
}
// Notifies that the calling thread is entering an external API region.
// Push an external correlation id for the calling thread.
ROCTRACER_API roctracer_status_t
roctracer_activity_push_external_correlation_id(activity_correlation_id_t id) {
API_METHOD_PREFIX
ExternalCorrelationIdPush(id);
API_METHOD_SUFFIX
}
// Notifies that the calling thread is leaving an external API region.
// Pop an external correlation id for the calling thread, and return it in 'last_id' if not null.
ROCTRACER_API roctracer_status_t
roctracer_activity_pop_external_correlation_id(activity_correlation_id_t* last_id) {
API_METHOD_PREFIX
auto external_id = ExternalCorrelationIdPop();
if (!external_id) {
if (last_id != nullptr) *last_id = 0;
EXC_RAISING(ROCTRACER_STATUS_ERROR_MISMATCHED_EXTERNAL_CORRELATION_ID,
"unbalanced external correlation id pop");
}
if (last_id != nullptr) *last_id = *external_id;
API_METHOD_SUFFIX
}
// Start API
ROCTRACER_API void roctracer_start() {
if (stopped_status.exchange(false, std::memory_order_relaxed) && roctracer_start_cb)
roctracer_start_cb();
}
// Stop API
ROCTRACER_API void roctracer_stop() {
if (!stopped_status.exchange(true, std::memory_order_relaxed) && roctracer_stop_cb)
roctracer_stop_cb();
}
ROCTRACER_API roctracer_status_t roctracer_get_timestamp(roctracer_timestamp_t* timestamp) {
API_METHOD_PREFIX
*timestamp = hsa_support::timestamp_ns();
API_METHOD_SUFFIX
}
// Set properties
ROCTRACER_API roctracer_status_t roctracer_set_properties(roctracer_domain_t domain,
void* properties) {
API_METHOD_PREFIX
switch (domain) {
case ACTIVITY_DOMAIN_HSA_OPS:
case ACTIVITY_DOMAIN_HSA_EVT:
case ACTIVITY_DOMAIN_HSA_API:
case ACTIVITY_DOMAIN_HIP_OPS:
case ACTIVITY_DOMAIN_HIP_API: {
break;
}
case ACTIVITY_DOMAIN_EXT_API: {
roctracer_ext_properties_t* ops_properties =
reinterpret_cast<roctracer_ext_properties_t*>(properties);
roctracer_start_cb = ops_properties->start_cb;
roctracer_stop_cb = ops_properties->stop_cb;
break;
}
default:
EXC_RAISING(ROCTRACER_STATUS_ERROR_INVALID_DOMAIN_ID, "invalid domain ID(" << domain << ")");
}
API_METHOD_SUFFIX
}
extern "C" {
// The HSA_AMD_TOOL_PRIORITY variable must be a constant value type initialized by the loader
// itself, not by code during _init. 'extern const' seems to do that although that is not a
// guarantee.
ROCTRACER_EXPORT extern const uint32_t HSA_AMD_TOOL_PRIORITY = 50;
// HSA-runtime tool on-load method
ROCTRACER_EXPORT bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
uint64_t failed_tool_count, const char* const* failed_tool_names) {
[](auto&&...) {}(runtime_version, failed_tool_count, failed_tool_names);
hsa_support::Initialize(table);
return true;
}
ROCTRACER_EXPORT void OnUnload() { hsa_support::Finalize(); }
} // extern "C"
+11
Féach ar an gComhad
@@ -0,0 +1,11 @@
ROCTX_4.1 {
global: roctxMarkA;
roctxRangePop;
roctxRangePushA;
roctxRangeStartA;
roctxRangeStop;
roctxRegisterTracerCallback;
roctx_version_major;
roctx_version_minor;
local: *;
};
+94
Féach ar an gComhad
@@ -0,0 +1,94 @@
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#include "roctx.h"
#include "roctracer_roctx.h"
#include "ext/prof_protocol.h"
#include <atomic>
#include <cassert>
namespace {
std::atomic<int (*)(activity_domain_t domain, uint32_t operation_id, void* data)> report_activity;
thread_local int nested_range_level{0};
void ReportActivity(roctx_api_id_t operation_id, const char* message = nullptr,
roctx_range_id_t id = {}) {
auto function = report_activity.load(std::memory_order_relaxed);
if (!function) return;
roctx_api_data_t api_data{};
switch (operation_id) {
case ROCTX_API_ID_roctxMarkA:
api_data.args.roctxMarkA.message = message;
break;
case ROCTX_API_ID_roctxRangePushA:
api_data.args.roctxRangePushA.message = message;
break;
case ROCTX_API_ID_roctxRangePop:
break;
case ROCTX_API_ID_roctxRangeStartA:
api_data.args.roctxRangeStartA.message = message;
api_data.args.roctxRangeStartA.id = id;
break;
case ROCTX_API_ID_roctxRangeStop:
api_data.args.roctxRangeStop.id = id;
break;
default:
assert(!"should not reach here");
}
function(ACTIVITY_DOMAIN_ROCTX, operation_id, &api_data);
}
} // namespace
ROCTX_API uint32_t roctx_version_major() { return ROCTX_VERSION_MAJOR; }
ROCTX_API uint32_t roctx_version_minor() { return ROCTX_VERSION_MINOR; }
ROCTX_API void roctxMarkA(const char* message) { ReportActivity(ROCTX_API_ID_roctxMarkA, message); }
ROCTX_API int roctxRangePushA(const char* message) {
ReportActivity(ROCTX_API_ID_roctxRangePushA, message);
return nested_range_level++;
}
ROCTX_API int roctxRangePop() {
ReportActivity(ROCTX_API_ID_roctxRangePop);
if (nested_range_level == 0) return -1;
return --nested_range_level;
}
ROCTX_API roctx_range_id_t roctxRangeStartA(const char* message) {
static std::atomic<roctx_range_id_t> start_stop_range_id(1);
auto range_id = start_stop_range_id++;
ReportActivity(ROCTX_API_ID_roctxRangeStartA, message, range_id);
return range_id;
}
ROCTX_API void roctxRangeStop(roctx_range_id_t range_id) {
ReportActivity(ROCTX_API_ID_roctxRangeStop, nullptr, range_id);
}
extern "C" ROCTX_EXPORT void roctxRegisterTracerCallback(int (*function)(activity_domain_t domain,
uint32_t operation_id,
void* data)) {
report_activity.store(function, std::memory_order_relaxed);
}
+1
Féach ar an gComhad
@@ -0,0 +1 @@
{ global: HSA_AMD_TOOL_PRIORITY; OnLoad; OnUnload; local: *; };
+288
Féach ar an gComhad
@@ -0,0 +1,288 @@
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#ifndef TOOL_TRACE_BUFFER_H_
#define TOOL_TRACE_BUFFER_H_
#include <atomic>
#include <cassert>
#include <condition_variable>
#include <functional>
#include <future>
#include <iostream>
#include <list>
#include <mutex>
#include <optional>
#include <sstream>
#include <string>
#include <thread>
namespace roctracer {
class TraceBufferBase {
public:
static void FlushAll() {
std::lock_guard lock(mutex_);
for (auto* trace_buffer = head_; trace_buffer != nullptr; trace_buffer = trace_buffer->next_)
trace_buffer->Flush();
}
static void Register(TraceBufferBase* elem) {
std::lock_guard lock(mutex_);
auto** prev_ptr = &head_;
while (*prev_ptr != nullptr && elem->priority_ > (*prev_ptr)->priority_)
prev_ptr = &(*prev_ptr)->next_;
elem->next_ = *prev_ptr;
*prev_ptr = elem;
}
static void Unregister(TraceBufferBase* elem) {
std::lock_guard lock(mutex_);
auto** prev_ptr = &head_;
while (*prev_ptr != nullptr && *prev_ptr != elem) prev_ptr = &(*prev_ptr)->next_;
assert(*prev_ptr != nullptr && "elem is not in the list");
*prev_ptr = elem->next_;
}
TraceBufferBase(std::string name, int priority)
: name_(std::move(name)), priority_(priority), next_(nullptr) {}
TraceBufferBase(const TraceBufferBase&) = delete;
TraceBufferBase& operator=(const TraceBufferBase&) = delete;
virtual ~TraceBufferBase() { Unregister(this); }
virtual void Flush() = 0;
std::string name() && { return std::move(name_); }
const std::string& name() const& { return name_; }
private:
const std::string name_;
const int priority_;
TraceBufferBase* next_;
static TraceBufferBase* head_;
static std::mutex mutex_;
};
enum TraceEntryState { TRACE_ENTRY_INVALID = 0, TRACE_ENTRY_INIT = 1, TRACE_ENTRY_COMPLETE = 2 };
template <typename Entry, typename Allocator = std::allocator<Entry>>
class TraceBuffer : protected TraceBufferBase {
public:
using callback_t = std::function<void(Entry*)>;
TraceBuffer(std::string name, uint64_t size, callback_t flush_callback, int priority = 0)
: TraceBufferBase(std::move(name), priority),
flush_callback_(std::move(flush_callback)),
size_(size) {
assert(size_ != 0 && "cannot create an empty trace buffer");
Entry* write_buffer = allocator_.allocate(size_);
assert(write_buffer != nullptr);
buffer_list_.push_back(write_buffer);
read_index_ = 0;
write_index_ = {0, write_buffer};
AllocateFreeBuffer();
// Add this instance to the link list of all trace buffers in the process.
Register(this);
}
~TraceBuffer() override {
// Flush the remaining records. After flushing, there should not be any records left in the
// trace buffer.
Flush();
assert(read_index_ == write_index_.load().index);
// Acquire both the writer and worker lock as we are accessing shared variables they protect.
std::unique_lock writer_lock(write_mutex_, std::defer_lock);
std::unique_lock worker_lock(worker_mutex_, std::defer_lock);
std::lock(writer_lock, worker_lock);
// Deallocate the buffers.
allocator_.deallocate(write_index_.load().buffer, size_);
allocator_.deallocate(free_buffer_, size_);
// Stop the worker thread. The worker thread loop checks the 'worker_thread_' std::optional
// after waking up, and exits if it does not have a value.
if (worker_thread_) {
std::thread worker_thread = std::move(worker_thread_.value());
{
// Tell the worker thread loop to exit.
worker_thread_.reset();
free_buffer_ = nullptr;
worker_cond_.notify_one();
}
// Release the worker lock to allow the worker thread to exit.
worker_lock.unlock();
worker_thread.join();
}
}
// Flush all entries between read_pointer and write_pointer. read_pointer and write_pointer are
// monotonically increasing indices, with read_pointer % size always indexing inside the first
// buffer in the list. Stop flushing if an incomplete entry is found, it will be flushed with
// the next invocation after changing its state to 'complete'.
void Flush() override {
std::lock_guard lock(write_mutex_);
auto write_index = write_index_.load(std::memory_order_relaxed);
for (auto it = buffer_list_.begin(); it != buffer_list_.end();) {
auto end_of_buffer = read_index_ - read_index_ % size_ + size_;
while (read_index_ < std::min(write_index.index, end_of_buffer)) {
Entry* entry = &(*it)[read_index_ % size_];
// The entry is not yet complete, stop flushing here.
if (entry->valid.load(std::memory_order_acquire) != TRACE_ENTRY_COMPLETE) return;
flush_callback_(entry);
entry->~Entry();
++read_index_;
}
// The buffer is still in use or the read pointer did not reach the end of the buffer.
if (*it == write_index.buffer || read_index_ != end_of_buffer) return;
// All entries in the current buffer are now processed. Destroy the buffer and move onto the
// next buffer in the list.
allocator_.deallocate(*it, size_);
it = buffer_list_.erase(it);
}
}
template <typename... Args> Entry& Emplace(Args... args) {
return *new (GetEntry()) Entry(std::forward<Args>(args)...);
}
private:
Entry* GetEntry() {
auto current = write_index_.load(std::memory_order_relaxed);
while (true) {
// If the pointer is at the end of the current buffer, switch to the available free buffer and
// notify the worker thread to allocate a new buffer.
if (current.index != 0 && current.index % size_ == 0) {
std::lock_guard lock(write_mutex_);
// If the worker thread wasn't already started, start it now. This avoids starting a new
// thread when the trace buffer is created.
if (!worker_thread_) {
std::promise<void> ready;
auto future = ready.get_future();
{
std::lock_guard worker_lock(worker_mutex_);
worker_thread_.emplace(&TraceBuffer::WorkerThreadLoop, this, std::move(ready));
}
future.wait();
}
// Re-check the pointer overflow under the writer lock, another thread could have beaten us
// to it and already bumped the write_index_.
current = write_index_.load(std::memory_order_relaxed);
if (current.index % size_ == 0) {
std::unique_lock worker_lock(worker_mutex_);
// Wait for the free buffer to become available.
worker_cond_.wait(worker_lock, [this]() { return free_buffer_ != nullptr; });
current.buffer = free_buffer_;
buffer_list_.push_back(current.buffer);
write_index_.store({current.index + 1, current.buffer}, std::memory_order_relaxed);
// Tell the worker thread to allocate a new free buffer.
free_buffer_ = nullptr;
worker_cond_.notify_one();
// We successfully allocated a new buffer, return the first element.
return &current.buffer[0];
}
}
if (write_index_.compare_exchange_weak(current, {current.index + 1, current.buffer},
std::memory_order_relaxed))
return &current.buffer[current.index % size_];
}
}
void AllocateFreeBuffer() {
assert(free_buffer_ == nullptr);
free_buffer_ = allocator_.allocate(size_);
assert(free_buffer_ != nullptr);
for (size_t i = 0; i < size_; ++i)
free_buffer_[i].valid.store(TRACE_ENTRY_INVALID, std::memory_order_relaxed);
}
void WorkerThreadLoop(std::promise<void> ready) {
std::unique_lock lock(worker_mutex_);
// This worker thread is now ready to accept work.
ready.set_value();
while (true) {
worker_cond_.wait(lock, [this]() { return free_buffer_ == nullptr; });
if (!worker_thread_) break;
AllocateFreeBuffer();
worker_cond_.notify_one();
}
}
// The WriteIndex is used to store both the index and the buffer associated with that index (the
// buffer contains the trace buffer records at [index - index % size, index - index % size_t +
// size_ - 1]) in a single atomic variable.
struct WriteIndex {
uint64_t index;
Entry* buffer;
};
const callback_t flush_callback_;
const uint64_t size_;
uint64_t read_index_; // The index of the next record to flush.
std::atomic<WriteIndex> write_index_; // The index of the next record that could be written.
Entry* free_buffer_{nullptr}; // The next available free buffer.
std::optional<std::thread> worker_thread_;
std::mutex worker_mutex_;
std::condition_variable worker_cond_;
std::mutex write_mutex_;
std::list<Entry*> buffer_list_;
Allocator allocator_;
};
} // namespace roctracer
#define TRACE_BUFFER_INSTANTIATE() \
roctracer::TraceBufferBase* roctracer::TraceBufferBase::head_ = nullptr; \
std::mutex roctracer::TraceBufferBase::mutex_;
#endif // TOOL_TRACE_BUFFER_H_
+794
Féach ar an gComhad
@@ -0,0 +1,794 @@
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#include <roctracer_ext.h>
#include <roctracer_hip.h>
#include <roctracer_hsa.h>
#include <roctracer_plugin.h>
#include <roctracer_roctx.h>
#include <atomic>
#include <cassert>
#include <chrono>
#include <experimental/filesystem>
#include <iostream>
#include <sstream>
#include <string>
#include <thread>
#include <stack>
#include <utility>
#include <vector>
#include <variant>
#include <cxxabi.h> /* kernel name demangling */
#include <dirent.h>
#include <dlfcn.h>
#include <hsa/hsa_api_trace.h>
#include <stdarg.h>
#include <stdio.h>
#include <string.h>
#include <sys/syscall.h> /* SYS_xxx definitions */
#include <sys/types.h>
#include <unistd.h> /* usleep */
#include "debug.h"
#include "loader.h"
#include "trace_buffer.h"
#include "xml.h"
void initialize() __attribute__((constructor(101)));
namespace fs = std::experimental::filesystem;
// Macro to check ROC-tracer calls status
#define CHECK_ROCTRACER(call) \
do { \
if ((call) != ROCTRACER_STATUS_SUCCESS) { \
fatal(#call " failed: %s", roctracer_error_string()); \
} \
} while (false)
TRACE_BUFFER_INSTANTIATE();
namespace {
inline roctracer_timestamp_t timestamp_ns() {
roctracer_timestamp_t timestamp;
CHECK_ROCTRACER(roctracer_get_timestamp(&timestamp));
return timestamp;
}
std::vector<std::string> hsa_api_vec;
std::vector<std::string> hip_api_vec;
bool trace_roctx = false;
bool trace_hsa_api = false;
bool trace_hsa_activity = false;
bool trace_hip_api = false;
bool trace_hip_activity = false;
bool trace_pcs = false;
uint32_t GetPid() {
static uint32_t pid = syscall(__NR_getpid);
return pid;
}
uint32_t GetTid() {
static thread_local uint32_t tid = syscall(__NR_gettid);
return tid;
}
size_t GetBufferSize() {
auto bufSize = getenv("ROCTRACER_BUFFER_SIZE");
// Default size if not set
if (!bufSize) return 0x200000;
return std::stoll({bufSize});
}
// Tracing control thread
uint32_t control_delay_us = 0;
uint32_t control_len_us = 0;
uint32_t control_dist_us = 0;
std::thread* trace_period_thread = nullptr;
std::atomic_bool trace_period_stop = false;
void trace_period_fun() {
std::this_thread::sleep_for(std::chrono::microseconds(control_delay_us));
do {
roctracer_start();
if (trace_period_stop) {
roctracer_stop();
break;
}
std::this_thread::sleep_for(std::chrono::microseconds(control_len_us));
roctracer_stop();
if (trace_period_stop) break;
std::this_thread::sleep_for(std::chrono::microseconds(control_dist_us));
} while (!trace_period_stop);
}
// Flushing control thread
uint32_t control_flush_us = 0;
std::thread* flush_thread = nullptr;
std::atomic_bool stop_flush_thread = false;
void flush_thr_fun() {
while (!stop_flush_thread) {
CHECK_ROCTRACER(roctracer_flush_activity());
roctracer::TraceBufferBase::FlushAll();
std::this_thread::sleep_until(std::chrono::steady_clock::now() +
std::chrono::microseconds(control_flush_us));
}
}
class roctracer_plugin_t {
public:
roctracer_plugin_t(const std::string& plugin_path) {
plugin_handle_ = dlopen(plugin_path.c_str(), RTLD_LAZY);
if (plugin_handle_ == nullptr) {
warning("dlopen(\"%s\") failed: %s", plugin_path.c_str(), dlerror());
return;
}
roctracer_plugin_write_callback_record_ =
reinterpret_cast<decltype(roctracer_plugin_write_callback_record)*>(
dlsym(plugin_handle_, "roctracer_plugin_write_callback_record"));
if (!roctracer_plugin_write_callback_record_) return;
roctracer_plugin_write_activity_records_ =
reinterpret_cast<decltype(roctracer_plugin_write_activity_records)*>(
dlsym(plugin_handle_, "roctracer_plugin_write_activity_records"));
if (!roctracer_plugin_write_activity_records_) return;
roctracer_plugin_finalize_ = reinterpret_cast<decltype(roctracer_plugin_finalize)*>(
dlsym(plugin_handle_, "roctracer_plugin_finalize"));
if (!roctracer_plugin_finalize_) return;
if (auto* initialize = reinterpret_cast<decltype(roctracer_plugin_initialize)*>(
dlsym(plugin_handle_, "roctracer_plugin_initialize"));
initialize != nullptr)
valid_ = initialize(ROCTRACER_VERSION_MAJOR, ROCTRACER_VERSION_MINOR) == 0;
}
~roctracer_plugin_t() {
if (is_valid()) roctracer_plugin_finalize_();
if (plugin_handle_ != nullptr) dlclose(plugin_handle_);
}
bool is_valid() const { return valid_; }
template <typename... Args> auto write_callback_record(Args... args) const {
assert(is_valid());
return roctracer_plugin_write_callback_record_(std::forward<Args>(args)...);
}
template <typename... Args> auto write_activity_records(Args... args) const {
assert(is_valid());
return roctracer_plugin_write_activity_records_(std::forward<Args>(args)...);
}
private:
bool valid_{false};
void* plugin_handle_;
decltype(roctracer_plugin_finalize)* roctracer_plugin_finalize_;
decltype(roctracer_plugin_write_callback_record)* roctracer_plugin_write_callback_record_;
decltype(roctracer_plugin_write_activity_records)* roctracer_plugin_write_activity_records_;
};
std::optional<roctracer_plugin_t> plugin;
} // namespace
///////////////////////////////////////////////////////////////////////////////////////////////////////
// rocTX annotation tracing
struct roctx_trace_entry_t {
std::atomic<roctracer::TraceEntryState> valid;
roctracer_record_t record;
union {
roctx_api_data_t data;
};
roctx_trace_entry_t(uint32_t cid, roctracer_timestamp_t time, uint32_t pid, uint32_t tid,
roctx_range_id_t rid, const char* message)
: valid(roctracer::TRACE_ENTRY_INIT) {
record.domain = ACTIVITY_DOMAIN_ROCTX;
record.op = cid;
record.kind = 0;
record.begin_ns = time;
record.end_ns = 0;
record.process_id = pid;
record.thread_id = tid;
data.args.message = message != nullptr ? strdup(message) : nullptr;
data.args.id = rid;
}
~roctx_trace_entry_t() {
if (data.args.message != nullptr) free(const_cast<char*>(data.args.message));
}
};
roctracer::TraceBuffer<roctx_trace_entry_t> roctx_trace_buffer(
"rocTX API", GetBufferSize(), [](roctx_trace_entry_t* entry) {
assert(plugin && "plugin is not initialized");
plugin->write_callback_record(&entry->record, &entry->data);
});
// rocTX callback function
void roctx_api_callback(uint32_t domain, uint32_t cid, const void* callback_data,
void* /* user_arg */) {
const roctx_api_data_t* data = reinterpret_cast<const roctx_api_data_t*>(callback_data);
roctx_trace_entry_t& entry = roctx_trace_buffer.Emplace(cid, timestamp_ns(), GetPid(), GetTid(),
data->args.id, data->args.message);
entry.valid.store(roctracer::TRACE_ENTRY_COMPLETE, std::memory_order_release);
}
///////////////////////////////////////////////////////////////////////////////////////////////////////
// HSA API tracing
struct hsa_api_trace_entry_t {
std::atomic<uint32_t> valid;
roctracer_record_t record;
union {
hsa_api_data_t data;
};
hsa_api_trace_entry_t(uint32_t cid, roctracer_timestamp_t begin, roctracer_timestamp_t end,
uint32_t pid, uint32_t tid, const hsa_api_data_t& hsa_api_data)
: valid(roctracer::TRACE_ENTRY_INIT) {
record.domain = ACTIVITY_DOMAIN_HSA_API;
record.op = cid;
record.kind = 0;
record.begin_ns = begin;
record.end_ns = end;
record.process_id = pid;
record.thread_id = tid;
data = hsa_api_data;
}
~hsa_api_trace_entry_t() {}
};
roctracer::TraceBuffer<hsa_api_trace_entry_t> hsa_api_trace_buffer(
"HSA API", GetBufferSize(), [](hsa_api_trace_entry_t* entry) {
assert(plugin && "plugin is not initialized");
plugin->write_callback_record(&entry->record, &entry->data);
});
// HSA API callback function
void hsa_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg) {
(void)arg;
const hsa_api_data_t* data = reinterpret_cast<const hsa_api_data_t*>(callback_data);
if (data->phase == ACTIVITY_API_PHASE_ENTER) {
*data->phase_data = timestamp_ns();
} else {
const roctracer_timestamp_t begin_timestamp = *data->phase_data;
const roctracer_timestamp_t end_timestamp =
(cid == HSA_API_ID_hsa_shut_down) ? begin_timestamp : timestamp_ns();
hsa_api_trace_entry_t& entry = hsa_api_trace_buffer.Emplace(cid, begin_timestamp, end_timestamp,
GetPid(), GetTid(), *data);
entry.valid.store(roctracer::TRACE_ENTRY_COMPLETE, std::memory_order_release);
}
}
///////////////////////////////////////////////////////////////////////////////////////////////////////
// HIP API tracing
struct hip_api_trace_entry_t {
std::atomic<uint32_t> valid;
roctracer_record_t record;
union {
hip_api_data_t data;
};
hip_api_trace_entry_t(uint32_t cid, roctracer_timestamp_t begin, roctracer_timestamp_t end,
uint32_t pid, uint32_t tid, const hip_api_data_t& hip_api_data,
const char* name)
: valid(roctracer::TRACE_ENTRY_INIT) {
record.domain = ACTIVITY_DOMAIN_HIP_API;
record.op = cid;
record.kind = 0;
record.begin_ns = begin;
record.end_ns = end;
record.process_id = pid;
record.thread_id = tid;
data = hip_api_data;
record.kernel_name = name ? strdup(name) : nullptr;
}
~hip_api_trace_entry_t() {
if (record.kernel_name != nullptr) free(const_cast<char*>(record.kernel_name));
}
};
static std::string getKernelNameMultiKernelMultiDevice(hipLaunchParams* launchParamsList,
int numDevices) {
std::stringstream name_str;
for (int i = 0; i < numDevices; ++i) {
if (launchParamsList[i].func != nullptr) {
name_str << roctracer::HipLoader::Instance().KernelNameRefByPtr(launchParamsList[i].func)
<< ":"
<< roctracer::HipLoader::Instance().GetStreamDeviceId(launchParamsList[i].stream)
<< ";";
}
}
return name_str.str();
}
template <typename... Ts> struct Overloaded : Ts... {
using Ts::operator()...;
};
template <class... Ts> Overloaded(Ts...) -> Overloaded<Ts...>;
static std::optional<std::string> getKernelName(uint32_t cid, const hip_api_data_t* data) {
std::variant<const void*, hipFunction_t> function;
switch (cid) {
case HIP_API_ID_hipExtLaunchMultiKernelMultiDevice: {
return getKernelNameMultiKernelMultiDevice(
data->args.hipExtLaunchMultiKernelMultiDevice.launchParamsList,
data->args.hipExtLaunchMultiKernelMultiDevice.numDevices);
}
case HIP_API_ID_hipLaunchCooperativeKernelMultiDevice: {
return getKernelNameMultiKernelMultiDevice(
data->args.hipLaunchCooperativeKernelMultiDevice.launchParamsList,
data->args.hipLaunchCooperativeKernelMultiDevice.numDevices);
}
case HIP_API_ID_hipLaunchKernel: {
function = data->args.hipLaunchKernel.function_address;
break;
}
case HIP_API_ID_hipExtLaunchKernel: {
function = data->args.hipExtLaunchKernel.function_address;
break;
}
case HIP_API_ID_hipLaunchCooperativeKernel: {
function = data->args.hipLaunchCooperativeKernel.f;
break;
}
case HIP_API_ID_hipLaunchByPtr: {
function = data->args.hipLaunchByPtr.hostFunction;
break;
}
case HIP_API_ID_hipGraphAddKernelNode: {
function = data->args.hipGraphAddKernelNode.pNodeParams->func;
break;
}
case HIP_API_ID_hipGraphExecKernelNodeSetParams: {
function = data->args.hipGraphExecKernelNodeSetParams.pNodeParams->func;
break;
}
case HIP_API_ID_hipGraphKernelNodeSetParams: {
function = data->args.hipGraphKernelNodeSetParams.pNodeParams->func;
break;
}
case HIP_API_ID_hipModuleLaunchKernel: {
function = data->args.hipModuleLaunchKernel.f;
break;
}
case HIP_API_ID_hipExtModuleLaunchKernel: {
function = data->args.hipExtModuleLaunchKernel.f;
break;
}
case HIP_API_ID_hipHccModuleLaunchKernel: {
function = data->args.hipHccModuleLaunchKernel.f;
break;
}
default:
return {};
}
return std::visit(
Overloaded{
[](const void* func) {
return roctracer::HipLoader::Instance().KernelNameRefByPtr(func);
},
[](hipFunction_t func) { return roctracer::HipLoader::Instance().KernelNameRef(func); },
},
function);
}
roctracer::TraceBuffer<hip_api_trace_entry_t> hip_api_trace_buffer(
"HIP API", GetBufferSize(), [](hip_api_trace_entry_t* entry) {
assert(plugin && "plugin is not initialized");
plugin->write_callback_record(&entry->record, &entry->data);
});
void hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg) {
(void)arg;
const hip_api_data_t* data = reinterpret_cast<const hip_api_data_t*>(callback_data);
const roctracer_timestamp_t timestamp = timestamp_ns();
std::optional<std::string> kernel_name;
if (data->phase == ACTIVITY_API_PHASE_ENTER) {
*data->phase_data = timestamp;
} else {
// Post init of HIP APU args
hipApiArgsInit((hip_api_id_t)cid, const_cast<hip_api_data_t*>(data));
kernel_name = getKernelName(cid, data);
hip_api_trace_entry_t& entry =
hip_api_trace_buffer.Emplace(cid, *data->phase_data, timestamp, GetPid(), GetTid(), *data,
kernel_name ? kernel_name->c_str() : nullptr);
entry.valid.store(roctracer::TRACE_ENTRY_COMPLETE, std::memory_order_release);
}
}
///////////////////////////////////////////////////////////////////////////////////////////////////////
// Input parser
std::string normalize_token(const std::string& token, bool not_empty, const std::string& label) {
const std::string space_chars_set = " \t";
const size_t first_pos = token.find_first_not_of(space_chars_set);
size_t norm_len = 0;
std::string error_str = "none";
if (first_pos != std::string::npos) {
const size_t last_pos = token.find_last_not_of(space_chars_set);
if (last_pos == std::string::npos)
error_str = "token string error: \"" + token + "\"";
else {
const size_t end_pos = last_pos + 1;
if (end_pos <= first_pos)
error_str = "token string error: \"" + token + "\"";
else
norm_len = end_pos - first_pos;
}
}
if (((first_pos != std::string::npos) && (norm_len == 0)) ||
((first_pos == std::string::npos) && not_empty)) {
error("normalize_token error: %s", error_str.c_str());
}
return (norm_len != 0) ? token.substr(first_pos, norm_len) : std::string("");
}
int get_xml_array(const xml::Xml::level_t* node, const std::string& field, const std::string& delim,
std::vector<std::string>* vec, const char* label = nullptr) {
int parse_iter = 0;
const auto& opts = node->opts;
auto it = opts.find(field);
if (it != opts.end()) {
const std::string& array_string = it->second;
if (label != nullptr) std::cout << label << field << " = " << array_string << std::endl;
size_t pos1 = 0;
size_t string_len = array_string.length();
while (pos1 < string_len) {
// set pos2 such that it also handles case of multiple delimiter options.
// For example- "hipLaunchKernel, hipExtModuleLaunchKernel, hipMemsetAsync"
// in this example delimiters are ' ' and also ','
size_t pos2 = array_string.find_first_of(delim, pos1);
bool found = (pos2 != std::string::npos);
size_t token_len = (pos2 != std::string::npos) ? pos2 - pos1 : string_len - pos1;
std::string token = array_string.substr(pos1, token_len);
std::string norm_str = normalize_token(token, found, "get_xml_array");
if (norm_str.length() != 0) vec->push_back(norm_str);
if (!found) break;
// update pos2 such that it represents the first non-delimiter character
// in case multiple delimiters are specified in variable 'delim'
pos1 = array_string.find_first_not_of(delim, pos2);
++parse_iter;
}
}
return parse_iter;
}
// Allocating tracing pool
void open_tracing_pool() {
if (roctracer_default_pool() == nullptr) {
roctracer_properties_t properties{};
properties.buffer_size = GetBufferSize();
properties.buffer_callback_fun = [](const char* begin, const char* end, void* /* arg */) {
assert(plugin && "plugin is not initialized");
plugin->write_activity_records(reinterpret_cast<const roctracer_record_t*>(begin),
reinterpret_cast<const roctracer_record_t*>(end));
};
CHECK_ROCTRACER(roctracer_open_pool(&properties));
}
}
// Flush tracing pool
void close_tracing_pool() {
if (roctracer_pool_t* pool = roctracer_default_pool(); pool != nullptr) {
CHECK_ROCTRACER(roctracer_flush_activity_expl(pool));
CHECK_ROCTRACER(roctracer_close_pool_expl(pool));
}
}
// tool library is loaded
static bool is_loaded = false;
// tool unload method
void tool_unload() {
if (is_loaded == false) return;
is_loaded = false;
if (flush_thread) {
stop_flush_thread = true;
flush_thread->join();
delete flush_thread;
flush_thread = nullptr;
}
if (trace_period_thread) {
trace_period_stop = true;
trace_period_thread->join();
delete trace_period_thread;
trace_period_thread = nullptr;
}
if (trace_roctx) {
CHECK_ROCTRACER(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_ROCTX));
}
if (trace_hsa_api) {
CHECK_ROCTRACER(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HSA_API));
}
if (trace_hsa_activity || trace_pcs) {
CHECK_ROCTRACER(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HSA_OPS));
}
if (trace_hip_api || trace_hip_activity) {
CHECK_ROCTRACER(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HIP_API));
CHECK_ROCTRACER(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HIP_API));
CHECK_ROCTRACER(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HIP_OPS));
}
// Flush tracing pool
close_tracing_pool();
roctracer::TraceBufferBase::FlushAll();
}
// tool load method
void tool_load() {
if (is_loaded == true) return;
is_loaded = true;
// API traces switches
const char* trace_domain = getenv("ROCTRACER_DOMAIN");
if (trace_domain != nullptr) {
// ROCTX domain
if (std::string(trace_domain).find("roctx") != std::string::npos) {
trace_roctx = true;
}
// HSA/HIP domains enabling
if (std::string(trace_domain).find("hsa") != std::string::npos) {
trace_hsa_api = true;
trace_hsa_activity = true;
}
if (std::string(trace_domain).find("hip") != std::string::npos) {
trace_hip_api = true;
trace_hip_activity = true;
}
if (std::string(trace_domain).find("sys") != std::string::npos) {
trace_hsa_api = true;
trace_hip_api = true;
trace_hip_activity = true;
}
// PC sampling enabling
if (std::string(trace_domain).find("pcs") != std::string::npos) {
trace_pcs = true;
}
}
std::cout << "ROCtracer (" << std::dec << GetPid() << "):";
// XML input
const char* xml_name = getenv("ROCP_INPUT");
if (xml_name != nullptr) {
xml::Xml* xml = xml::Xml::Create(xml_name);
if (xml == nullptr) error("input file not found '%s'", xml_name);
bool found = false;
for (const auto* entry : xml->GetNodes("top.trace")) {
auto it = entry->opts.find("name");
if (it == entry->opts.end()) error("trace name is missing");
const std::string& name = it->second;
std::vector<std::string> api_vec;
for (const auto* node : entry->nodes) {
if (node->tag != "parameters")
error("trace node is not supported '%s:%%%s'", name.c_str(), node->tag.c_str());
get_xml_array(node, "api", ", ",
&api_vec); // delimiter options given as both spaces and commas (' ' and ',')
break;
}
if (name == "rocTX") {
found = true;
trace_roctx = true;
}
if (name == "HSA") {
found = true;
trace_hsa_api = true;
hsa_api_vec = api_vec;
}
if (name == "GPU") {
found = true;
trace_hsa_activity = true;
}
if (name == "HIP") {
found = true;
trace_hip_api = true;
trace_hip_activity = true;
hip_api_vec = api_vec;
}
}
if (found) std::cout << " input from \"" << xml_name << "\"";
}
std::cout << std::endl;
// Disable HIP activity if HSA activity was set
if (trace_hsa_activity == true) trace_hip_activity = false;
// Enable rpcTX callbacks
if (trace_roctx) {
// initialize HSA tracing
std::cout << " rocTX-trace()" << std::endl;
CHECK_ROCTRACER(
roctracer_enable_domain_callback(ACTIVITY_DOMAIN_ROCTX, roctx_api_callback, nullptr));
}
const char* ctrl_str = getenv("ROCP_CTRL_RATE");
if (ctrl_str != nullptr) {
uint32_t ctrl_delay = 0;
uint32_t ctrl_len = 0;
uint32_t ctrl_rate = 0;
if (sscanf(ctrl_str, "%d:%d:%d", &ctrl_delay, &ctrl_len, &ctrl_rate) != 3 ||
ctrl_len > ctrl_rate)
error("invalid ROCP_CTRL_RATE variable (ctrl_delay:ctrl_len:ctrl_rate)");
control_dist_us = ctrl_rate - ctrl_len;
control_len_us = ctrl_len;
control_delay_us = ctrl_delay;
roctracer_stop();
if (ctrl_delay != UINT32_MAX) {
std::cout << "ROCtracer: trace control: delay(" << ctrl_delay << "us), length(" << ctrl_len
<< "us), rate(" << ctrl_rate << "us)" << std::endl;
trace_period_thread = new std::thread(trace_period_fun);
} else {
std::cout << "ROCtracer: trace start disabled" << std::endl;
}
}
const char* flush_str = getenv("ROCP_FLUSH_RATE");
if (flush_str != nullptr) {
sscanf(flush_str, "%d", &control_flush_us);
if (control_flush_us == 0) error("invalid control flush rate value '%s'", flush_str);
std::cout << "ROCtracer: trace control flush rate(" << control_flush_us << "us)" << std::endl;
flush_thread = new std::thread(flush_thr_fun);
}
}
extern "C" {
// The HSA_AMD_TOOL_PRIORITY variable must be a constant value type initialized by the loader
// itself, not by code during _init. 'extern const' seems to do that although that is not a
// guarantee.
ROCTRACER_EXPORT extern const uint32_t HSA_AMD_TOOL_PRIORITY = 1050;
// HSA-runtime tool on-load method
ROCTRACER_EXPORT bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
uint64_t failed_tool_count, const char* const* failed_tool_names) {
if (roctracer_version_major() != ROCTRACER_VERSION_MAJOR ||
roctracer_version_minor() < ROCTRACER_VERSION_MINOR) {
warning("the ROCtracer API version is not compatible with this tool");
return true;
}
// Load output plugin
const char* plugin_name = getenv("ROCTRACER_PLUGIN_LIB");
if (plugin_name == nullptr) plugin_name = "libfile_plugin.so";
if (Dl_info dl_info; dladdr((void*)tool_load, &dl_info) != 0) {
if (!plugin.emplace(fs::path(dl_info.dli_fname).replace_filename(plugin_name)).is_valid())
plugin.reset();
}
tool_load();
// OnUnload may not be called if the ROC runtime is not shutdown by the client
// application before exiting, so register an atexit handler to unload the tool.
std::atexit(tool_unload);
// Enable HSA API callbacks/activity
if (trace_hsa_api) {
std::ostringstream out;
out << " HSA-trace(";
if (hsa_api_vec.size() != 0) {
out << "-*";
for (unsigned i = 0; i < hsa_api_vec.size(); ++i) {
uint32_t cid = HSA_API_ID_NUMBER;
const char* api = hsa_api_vec[i].c_str();
if (roctracer_op_code(ACTIVITY_DOMAIN_HSA_API, api, &cid, nullptr) ==
ROCTRACER_STATUS_SUCCESS &&
roctracer_enable_op_callback(ACTIVITY_DOMAIN_HSA_API, cid, hsa_api_callback, nullptr) ==
ROCTRACER_STATUS_SUCCESS)
out << ' ' << api;
else
warning("Unable to enable HSA_API tracing for invalid operation %s", api);
}
} else {
CHECK_ROCTRACER(
roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HSA_API, hsa_api_callback, nullptr));
out << "*";
}
std::cout << out.str() << ')' << std::endl;
}
// Enable HSA GPU activity
if (trace_hsa_activity) {
// Allocating tracing pool
open_tracing_pool();
std::cout << " HSA-activity-trace()" << std::endl;
CHECK_ROCTRACER(roctracer_enable_op_activity(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_COPY));
}
// Enable HIP API callbacks/activity
if (trace_hip_api || trace_hip_activity) {
std::ostringstream out;
out << " HIP-trace(";
// Allocating tracing pool
open_tracing_pool();
// Enable tracing
if (trace_hip_api) {
if (hip_api_vec.size() != 0) {
out << "-*";
for (unsigned i = 0; i < hip_api_vec.size(); ++i) {
uint32_t cid = HIP_API_ID_NONE;
const char* api = hip_api_vec[i].c_str();
if (roctracer_op_code(ACTIVITY_DOMAIN_HIP_API, api, &cid, nullptr) ==
ROCTRACER_STATUS_SUCCESS &&
roctracer_enable_op_callback(ACTIVITY_DOMAIN_HIP_API, cid, hip_api_callback,
nullptr) == ROCTRACER_STATUS_SUCCESS)
out << ' ' << api;
else
warning("Unable to enable HIP_API tracing for invalid operation %s", api);
}
} else {
CHECK_ROCTRACER(
roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HIP_API, hip_api_callback, nullptr));
out << "*";
}
}
if (trace_hip_activity) {
CHECK_ROCTRACER(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_OPS));
}
std::cout << out.str() << ')' << std::endl;
}
// Enable PC sampling
if (trace_pcs) {
std::cout << " PCS-trace()" << std::endl;
open_tracing_pool();
CHECK_ROCTRACER(roctracer_enable_op_activity(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_RESERVED1));
}
return true;
}
// HSA-runtime on-unload method
ROCTRACER_EXPORT void OnUnload() { tool_unload(); }
} // extern "C"
void initialize() {
tool_load();
}
+125
Féach ar an gComhad
@@ -0,0 +1,125 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#include "debug.h"
#include "util.h"
#include <cstdarg>
#include <iostream>
#include <iomanip>
#include <sstream>
#include <string>
#if defined(ENABLE_BACKTRACE)
#include <cxxabi.h>
#include <backtrace.h>
namespace {
struct BackTraceInfo {
struct ::backtrace_state* state = nullptr;
std::stringstream sstream{};
int depth = 0;
int error = 0;
};
void errorCallback(void* data, const char* message, int errnum) {
BackTraceInfo* info = static_cast<BackTraceInfo*>(data);
info->sstream << "ROCtracer error: " << message << '(' << errnum << ')';
info->error = 1;
}
void syminfoCallback(void* data, uintptr_t /* pc */, const char* symname, uintptr_t /* symval */,
uintptr_t /* symsize */) {
BackTraceInfo* info = static_cast<BackTraceInfo*>(data);
if (symname == nullptr) return;
int status;
char* demangled = abi::__cxa_demangle(symname, nullptr, nullptr, &status);
info->sstream << ' ' << (status == 0 ? demangled : symname);
free(demangled);
}
int fullCallback(void* data, uintptr_t pc, const char* filename, int lineno, const char* function) {
BackTraceInfo* info = static_cast<BackTraceInfo*>(data);
info->sstream << std::endl
<< " #" << std::dec << info->depth++ << ' ' << "0x" << std::noshowbase
<< std::hex << std::setfill('0') << std::setw(sizeof(pc) * 2) << pc;
if (function == nullptr)
backtrace_syminfo(info->state, pc, syminfoCallback, errorCallback, data);
else {
int status;
char* demangled = abi::__cxa_demangle(function, nullptr, nullptr, &status);
info->sstream << ' ' << (status == 0 ? demangled : function);
free(demangled);
if (filename != nullptr) {
info->sstream << " in " << filename;
if (lineno) info->sstream << ':' << std::dec << lineno;
}
}
return info->error;
}
} // namespace
#endif // defined (ENABLE_BACKTRACE)
namespace roctracer {
void warning(const char* format, ...) {
va_list va;
va_start(va, format);
std::cerr << "ROCtracer warning: " << string_vprintf(format, va) << std::endl;
va_end(va);
}
void error(const char* format, ...) {
va_list va;
va_start(va, format);
std::cerr << "ROCtracer error: " << string_vprintf(format, va) << std::endl;
va_end(va);
exit(EXIT_FAILURE);
}
void fatal [[noreturn]] (const char* format, ...) {
va_list va;
va_start(va, format);
std::string message = string_vprintf(format, va);
va_end(va);
#if defined(ENABLE_BACKTRACE)
BackTraceInfo info;
info.sstream << std::endl << "Backtrace:";
info.state = ::backtrace_create_state("/proc/self/exe", 0, errorCallback, &info);
::backtrace_full(info.state, 1, fullCallback, errorCallback, &info);
message += info.sstream.str();
#endif /* defined (ENABLE_BACKTRACE) */
std::cerr << "ROCtracer fatal error: " << message << std::endl;
abort();
}
} // namespace roctracer
+47
Féach ar an gComhad
@@ -0,0 +1,47 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#pragma once
namespace roctracer {
extern void warning(const char* format, ...)
#if defined(__GNUC__)
__attribute__((format(printf, 1, 2)))
#endif // defined (__GNUC__)
;
extern void error [[noreturn]] (const char* format, ...)
#if defined(__GNUC__)
__attribute__((format(printf, 1, 2)))
#endif // defined (__GNUC__)
;
extern void fatal [[noreturn]] (const char* format, ...)
#if defined(__GNUC__)
__attribute__((format(printf, 1, 2)))
#endif // defined (__GNUC__)
;
} // namespace roctracer
using roctracer::error;
using roctracer::fatal;
using roctracer::warning;
+167
Féach ar an gComhad
@@ -0,0 +1,167 @@
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#ifndef SRC_UTIL_LOGGER_H_
#define SRC_UTIL_LOGGER_H_
#include <time.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/syscall.h>
#include <sys/file.h>
#include <stdarg.h>
#include <stdlib.h>
#include <atomic>
#include <string>
#include <iostream>
#include <sstream>
#include <fstream>
#include <exception>
#include <mutex>
#include <map>
namespace roctracer::util {
class Logger {
public:
template <typename T> Logger& operator<<(T&& m) {
std::ostringstream oss;
oss << std::forward<T>(m);
if (!streaming_)
Log(oss.str());
else
Put(oss.str());
streaming_ = true;
return *this;
}
using manip_t = void (*)();
Logger& operator<<(manip_t f) {
f();
return *this;
}
static void begm() { Instance().ResetStreaming(true); }
static void endl() { Instance().ResetStreaming(false); }
const std::string& LastMessage() {
std::lock_guard lock(mutex_);
return message_[GetTid()];
}
static Logger& Instance() {
static Logger instance;
return instance;
}
static uint32_t GetPid() { return syscall(__NR_getpid); }
static uint32_t GetTid() { return syscall(__NR_gettid); }
private:
Logger() : file_(nullptr), dirty_(false), streaming_(false), messaging_(false) {
const char* var = getenv("ROCTRACER_LOG");
if (var != nullptr) file_ = fopen("/tmp/roctracer_log.txt", "a");
ResetStreaming(false);
}
~Logger() {
if (file_ != nullptr) {
if (dirty_) Put("\n");
fclose(file_);
}
}
void ResetStreaming(const bool messaging) {
std::lock_guard lock(mutex_);
if (messaging) {
message_[GetTid()] = "";
} else if (streaming_) {
Put("\n");
dirty_ = false;
}
messaging_ = messaging;
streaming_ = messaging;
}
void Put(const std::string& m) {
std::lock_guard lock(mutex_);
if (messaging_) {
message_[GetTid()] += m;
}
if (file_ != nullptr) {
dirty_ = true;
flock(fileno(file_), LOCK_EX);
fprintf(file_, "%s", m.c_str());
fflush(file_);
flock(fileno(file_), LOCK_UN);
}
}
void Log(const std::string& m) {
const time_t rawtime = time(nullptr);
tm tm_info;
localtime_r(&rawtime, &tm_info);
char tm_str[26];
strftime(tm_str, 26, "%Y-%m-%d %H:%M:%S", &tm_info);
std::ostringstream oss;
oss << "<" << tm_str << std::dec << " pid" << GetPid() << " tid" << GetTid() << "> " << m;
Put(oss.str());
}
FILE* file_;
bool dirty_;
bool streaming_;
bool messaging_;
std::recursive_mutex mutex_;
std::map<uint32_t, std::string> message_;
};
} // namespace roctracer::util
#define FATAL_LOGGING(stream) \
do { \
roctracer::util::Logger::Instance() \
<< "fatal: " << roctracer::util::Logger::begm << stream << roctracer::util::Logger::endl; \
abort(); \
} while (false)
#define ERR_LOGGING(stream) \
do { \
roctracer::util::Logger::Instance() \
<< "error: " << roctracer::util::Logger::begm << stream << roctracer::util::Logger::endl; \
} while (false)
#define INFO_LOGGING(stream) \
do { \
roctracer::util::Logger::Instance() \
<< "info: " << roctracer::util::Logger::begm << stream << roctracer::util::Logger::endl; \
} while (false)
#define WARN_LOGGING(stream) \
do { \
std::cerr << "ROCProfiler: " << stream << std::endl; \
roctracer::util::Logger::Instance() << "warning: " << roctracer::util::Logger::begm << stream \
<< roctracer::util::Logger::endl; \
} while (false)
#endif // SRC_UTIL_LOGGER_H_
+51
Féach ar an gComhad
@@ -0,0 +1,51 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#include "util.h"
#include <cstdio>
#include <cstdarg>
#include <string>
namespace roctracer {
std::string string_vprintf(const char* format, va_list va) {
va_list copy;
va_copy(copy, va);
size_t size = vsnprintf(NULL, 0, format, copy);
va_end(copy);
std::string str(size, '\0');
vsprintf(&str[0], format, va);
return str;
}
std::string string_printf(const char* format, ...) {
va_list va;
va_start(va, format);
std::string str(string_vprintf(format, va));
va_end(va);
return str;
}
} // namespace roctracer
+36
Féach ar an gComhad
@@ -0,0 +1,36 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#pragma once
#include <cstdarg>
#include <string>
namespace roctracer {
extern std::string string_vprintf(const char* format, va_list va);
extern std::string string_printf(const char* format, ...)
#if defined(__GNUC__)
__attribute__((format(printf, 1, 2)))
#endif // defined (__GNUC__)
;
} // namespace roctracer
+457
Féach ar an gComhad
@@ -0,0 +1,457 @@
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#ifndef TEST_UTIL_XML_H_
#define TEST_UTIL_XML_H_
#include <fcntl.h>
#include <stdio.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <fstream>
#include <iostream>
#include <map>
#include <string>
#include <vector>
namespace xml {
class Xml {
public:
typedef std::vector<char> token_t;
struct level_t;
typedef std::vector<level_t*> nodes_t;
typedef std::map<std::string, std::string> opts_t;
struct level_t {
std::string tag;
nodes_t nodes;
opts_t opts;
};
typedef std::vector<level_t*> nodes_vec_t;
typedef std::map<std::string, nodes_vec_t> map_t;
enum { DECL_STATE, BODY_STATE };
static Xml* Create(const std::string& file_name, const Xml* obj = NULL) {
Xml* xml = new Xml(file_name, obj);
if (xml != NULL) {
if (xml->Init() == false) {
delete xml;
xml = NULL;
} else {
const std::size_t pos = file_name.rfind('/');
const std::string path = (pos != std::string::npos) ? file_name.substr(0, pos + 1) : "";
xml->PreProcess();
nodes_t incl_nodes;
for (auto* node : xml->GetNodes("top.include")) {
if (node->opts.find("touch") == node->opts.end()) {
node->opts["touch"] = "";
incl_nodes.push_back(node);
}
}
for (auto* incl : incl_nodes) {
const std::string& incl_name = path + incl->opts["file"];
Xml* ixml = Create(incl_name, xml);
if (ixml == NULL) {
delete xml;
xml = NULL;
break;
} else {
delete ixml;
}
}
if (xml) {
xml->Process();
}
}
}
return xml;
}
static void Destroy(Xml* xml) { delete xml; }
std::string GetName() { return file_name_; }
void AddExpr(const std::string& full_tag, const std::string& name, const std::string& expr) {
const std::size_t pos = full_tag.rfind('.');
const std::size_t pos1 = (pos == std::string::npos) ? 0 : pos + 1;
const std::string level_tag = full_tag.substr(pos1);
level_t* level = new level_t;
(*map_)[full_tag].push_back(level);
level->tag = level_tag;
level->opts["name"] = name;
level->opts["expr"] = expr;
}
void AddConst(const std::string& full_tag, const std::string& name, const uint64_t& val) {
std::ostringstream oss;
oss << val;
AddExpr(full_tag, name, oss.str());
}
nodes_t GetNodes(const std::string& global_tag) { return (*map_)[global_tag]; }
template <class F> F ForEach(const F& f_i) {
F f = f_i;
if (map_) {
for (auto& entry : *map_) {
for (auto node : entry.second) {
if (f.fun(entry.first, node) == false) break;
}
}
}
return f;
}
template <class F> F ForEach(const F& f_i) const {
F f = f_i;
if (map_) {
for (auto& entry : *map_) {
for (auto node : entry.second) {
if (f.fun(entry.first, node) == false) break;
}
}
}
return f;
}
struct print_func {
bool fun(const std::string& global_tag, level_t* node) {
for (auto& opt : node->opts) {
std::cout << global_tag << "." << opt.first << " = " << opt.second << std::endl;
}
return true;
}
};
void Print() const {
std::cout << "XML file '" << file_name_ << "':" << std::endl;
ForEach(print_func());
}
private:
Xml(const std::string& file_name, const Xml* obj)
: file_name_(file_name),
file_line_(0),
data_size_(0),
index_(0),
state_(BODY_STATE),
comment_(false),
included_(false),
level_(NULL),
map_(NULL) {
if (obj != NULL) {
map_ = obj->map_;
level_ = obj->level_;
included_ = true;
}
}
struct delete_func {
bool fun(const std::string&, level_t* node) {
delete node;
return true;
}
};
~Xml() {
if (included_ == false) {
ForEach(delete_func());
delete map_;
}
}
bool Init() {
fd_ = open(file_name_.c_str(), O_RDONLY);
if (fd_ == -1) {
// perror((std::string("open XML file ") + file_name_).c_str());
return false;
}
if (map_ == NULL) {
map_ = new map_t;
if (map_ == NULL) return false;
AddLevel("top");
}
return true;
}
void PreProcess() {
uint32_t ind = 0;
char buf[kBufSize];
bool error = false;
while (1) {
const uint32_t pos = lseek(fd_, 0, SEEK_CUR);
uint32_t size = read(fd_, buf, kBufSize);
if (size <= 0) break;
buf[size - 1] = '\0';
if (strncmp(buf, "#include \"", 10) == 0) {
for (ind = 0; (ind < size) && (buf[ind] != '\n'); ++ind) {
}
if (ind == size) {
fprintf(stderr, "XML PreProcess failed, line size limit %zu\n", kBufSize);
error = true;
break;
}
buf[ind] = '\0';
size = ind;
lseek(fd_, pos + ind + 1, SEEK_SET);
for (ind = 10; (ind < size) && (buf[ind] != '"'); ++ind) {
}
if (ind == size) {
error = true;
break;
}
buf[ind] = '\0';
AddLevel("include");
AddOption("file", &buf[10]);
UpLevel();
}
}
if (error) {
fprintf(stderr, "XML PreProcess failed, line '%s'\n", buf);
exit(1);
}
lseek(fd_, 0, SEEK_SET);
}
void Process() {
token_t remainder;
while (1) {
token_t token = (remainder.size()) ? remainder : NextToken();
remainder.clear();
// token_t token1 = token;
// token1.push_back('\0');
// std::cout << "> " << &token1[0] << std::endl;
// End of file
if (token.size() == 0) break;
switch (state_) {
case BODY_STATE:
if (token[0] == '<') {
bool node_begin = true;
unsigned ind = 1;
if (token[1] == '/') {
node_begin = false;
++ind;
}
unsigned i = ind;
while (i < token.size()) {
if (token[i] == '>') break;
++i;
}
for (unsigned j = i + 1; j < token.size(); ++j) remainder.push_back(token[j]);
if (i == token.size()) {
if (node_begin)
state_ = DECL_STATE;
else
BadFormat(token);
token.push_back('\0');
} else {
token[i] = '\0';
}
const char* tag = &token[ind];
if (node_begin) {
AddLevel(tag);
} else {
if (strncmp(CurrentLevel().c_str(), tag, strlen(tag)) != 0) {
token.back() = '>';
BadFormat(token);
}
UpLevel();
}
} else {
BadFormat(token);
}
break;
case DECL_STATE:
if (token[0] == '>') {
state_ = BODY_STATE;
for (unsigned j = 1; j < token.size(); ++j) remainder.push_back(token[j]);
continue;
} else {
token.push_back('\0');
unsigned j = 0;
for (j = 0; j < token.size(); ++j)
if (token[j] == '=') break;
if (j == token.size()) BadFormat(token);
token[j] = '\0';
const char* key = &token[0];
const char* value = &token[j + 1];
AddOption(key, value);
}
break;
default:
std::cout << "XML parser error: wrong state: " << state_ << std::endl;
exit(1);
}
}
}
bool SpaceCheck() const {
bool cond = ((buffer_[index_] == ' ') || (buffer_[index_] == '\t'));
return cond;
}
bool LineEndCheck() {
bool found = false;
if (buffer_[index_] == '\n') {
buffer_[index_] = ' ';
++file_line_;
found = true;
comment_ = false;
} else if (comment_ || (buffer_[index_] == '#')) {
found = true;
comment_ = true;
}
return found;
}
token_t NextToken() {
token_t token;
bool in_string = false;
bool special_symb = false;
while (1) {
if (data_size_ == 0) {
data_size_ = read(fd_, buffer_, kBufSize);
if (data_size_ <= 0) break;
}
if (token.empty()) {
while ((index_ < data_size_) && (SpaceCheck() || LineEndCheck())) {
++index_;
}
}
while ((index_ < data_size_) && (in_string || !(SpaceCheck() || LineEndCheck()))) {
const char symb = buffer_[index_];
bool skip_symb = false;
switch (symb) {
case '\\':
if (special_symb) {
special_symb = false;
} else {
special_symb = true;
skip_symb = true;
}
break;
case '"':
if (special_symb) {
special_symb = false;
} else {
in_string = !in_string;
if (!in_string) {
buffer_[index_] = ' ';
--index_;
}
skip_symb = true;
}
break;
}
if (!skip_symb) token.push_back(symb);
++index_;
}
if (index_ == data_size_) {
index_ = 0;
data_size_ = 0;
} else {
if (special_symb || in_string) BadFormat(token);
break;
}
}
return token;
}
void BadFormat(token_t token) {
token.push_back('\0');
std::cout << "Error: " << file_name_ << ", line " << file_line_ << ", bad XML token '"
<< &token[0] << "'" << std::endl;
exit(1);
}
void AddLevel(const std::string& tag) {
level_t* level = new level_t;
level->tag = tag;
if (level_) {
level_->nodes.push_back(level);
stack_.push_back(level_);
}
level_ = level;
std::string global_tag;
for (level_t* level : stack_) {
global_tag += level->tag + ".";
}
global_tag += tag;
(*map_)[global_tag].push_back(level_);
}
void UpLevel() {
level_ = stack_.back();
stack_.pop_back();
}
std::string CurrentLevel() const { return level_->tag; }
void AddOption(const std::string& key, const std::string& value) { level_->opts[key] = value; }
const std::string file_name_;
unsigned file_line_;
int fd_;
static const size_t kBufSize = 256;
char buffer_[kBufSize];
unsigned data_size_;
unsigned index_;
unsigned state_;
bool comment_;
std::vector<level_t*> stack_;
bool included_;
level_t* level_;
map_t* map_;
};
} // namespace xml
#endif // TEST_UTIL_XML_H_
+198
Féach ar an gComhad
@@ -0,0 +1,198 @@
################################################################################
## Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
##
## Permission is hereby granted, free of charge, to any person obtaining a copy
## of this software and associated documentation files (the "Software"), to
## deal in the Software without restriction, including without limitation the
## rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
## sell copies of the Software, and to permit persons to whom the Software is
## furnished to do so, subject to the following conditions:
##
## The above copyright notice and this permission notice shall be included in
## all copies or substantial portions of the Software.
##
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
## FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
## IN THE SOFTWARE.
################################################################################
get_property(HSA_RUNTIME_INCLUDE_DIRECTORIES TARGET hsa-runtime64::hsa-runtime64 PROPERTY INTERFACE_INCLUDE_DIRECTORIES)
# Set the HIP language runtime link flags as FindHIP does not set them.
set(CMAKE_EXECUTABLE_RUNTIME_HIP_FLAG ${CMAKE_SHARED_LIBRARY_RUNTIME_CXX_FLAG})
set(CMAKE_EXECUTABLE_RUNTIME_HIP_FLAG_SEP ${CMAKE_SHARED_LIBRARY_RUNTIME_CXX_FLAG_SEP})
set(CMAKE_EXECUTABLE_RPATH_LINK_HIP_FLAG ${CMAKE_SHARED_LIBRARY_RPATH_LINK_CXX_FLAG})
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${ROCM_PATH}/lib/cmake/hip")
set(CMAKE_HIP_ARCHITECTURES OFF)
if(DEFINED ROCM_PATH)
set(HIP_ROOT_DIR "${ROCM_PATH}/bin")
endif()
find_package(HIP REQUIRED MODULE)
find_package(Clang REQUIRED CONFIG
PATHS "${ROCM_PATH}"
PATH_SUFFIXES "llvm/lib/cmake/clang")
## Add a custom targets to build and run all the tests
add_custom_target(mytest)
add_dependencies(mytest roctracer_tool hip_stats)
add_custom_target(check COMMAND ${PROJECT_BINARY_DIR}/run.sh DEPENDS mytest)
## Build MatrixTranspose
set_source_files_properties(hip/MatrixTranspose.cpp PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
hip_add_executable(MatrixTranspose hip/MatrixTranspose.cpp)
## Adding generated build-id as hip_add_executable doesn't generate automatically
target_link_options(MatrixTranspose PRIVATE "-Wl,--build-id=md5")
target_include_directories(MatrixTranspose PRIVATE ${PROJECT_SOURCE_DIR}/inc)
target_link_libraries(MatrixTranspose PRIVATE roctracer roctx)
add_dependencies(mytest MatrixTranspose)
## Build MatrixTranspose_test, MatrixTranspose_hipaact_test and MatrixTranspose_mgpu
set_source_files_properties(app/MatrixTranspose_test.cpp PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
function(build_matrix_transpose_test OUTPUT_FILE DEFINITIONS)
hip_add_executable(${OUTPUT_FILE} app/MatrixTranspose_test.cpp)
## Adding generated build-id as hip_add_executable doesn't generate automatically
target_link_options(${OUTPUT_FILE} PRIVATE "-Wl,--build-id=md5")
target_compile_definitions(${OUTPUT_FILE} PRIVATE ITERATIONS=100 HIP_TEST=1 ${DEFINITIONS})
target_include_directories(${OUTPUT_FILE} PRIVATE ${PROJECT_SOURCE_DIR}/inc)
target_link_libraries(${OUTPUT_FILE} PRIVATE roctracer roctx)
add_dependencies(mytest ${OUTPUT_FILE})
endfunction(build_matrix_transpose_test)
build_matrix_transpose_test(MatrixTranspose_test "")
build_matrix_transpose_test(MatrixTranspose_hipaact_test HIP_API_ACTIVITY_ON=1)
build_matrix_transpose_test(MatrixTranspose_mgpu MGPU_TEST=1)
## Build MatrixTranspose MatrixTranspose_ctest
add_custom_command(OUTPUT MatrixTranspose.c
COMMAND ${CMAKE_COMMAND} -E create_symlink ${CMAKE_CURRENT_SOURCE_DIR}/app/MatrixTranspose_test.cpp MatrixTranspose.c)
hip_add_executable(MatrixTranspose_ctest MatrixTranspose.c)
## Adding generated build-id as hip_add_executable doesn't generate automatically
target_link_options(MatrixTranspose_ctest PRIVATE "-Wl,--build-id=md5")
target_compile_definitions(MatrixTranspose_ctest PRIVATE HIP_TEST=0 __HIP_PLATFORM_AMD__)
target_include_directories(MatrixTranspose_ctest PRIVATE ${PROJECT_SOURCE_DIR}/inc)
target_link_libraries(MatrixTranspose_ctest PRIVATE roctracer roctx)
add_dependencies(mytest MatrixTranspose_ctest)
## Build codeobj event test
add_library(codeobj_test SHARED app/codeobj_test.cpp)
target_include_directories(codeobj_test PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} ${PROJECT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/inc)
target_link_libraries(codeobj_test roctracer)
add_dependencies(mytest codeobj_test)
install(TARGETS codeobj_test DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/test COMPONENT tests)
## Build the hsa (standalone) copy test
function(generate_hsaco TARGET_ID INPUT_FILE OUTPUT_FILE)
separate_arguments(CLANG_ARG_LIST UNIX_COMMAND
"-O2 -x cl -Xclang -finclude-default-header -cl-denorms-are-zero -cl-std=CL2.0 -Wl,--build-id=sha1
-target amdgcn-amd-amdhsa -mcpu=${TARGET_ID} -o ${OUTPUT_FILE} ${INPUT_FILE}")
add_custom_command(OUTPUT ${PROJECT_BINARY_DIR}/${OUTPUT_FILE}
COMMAND clang ${CLANG_ARG_LIST}
WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
DEPENDS ${INPUT_FILE} clang
COMMENT "Building ${OUTPUT_FILE}..."
VERBATIM)
install(FILES ${PROJECT_BINARY_DIR}/${OUTPUT_FILE} DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME} COMPONENT tests)
set(HSACO_TARGET_LIST ${HSACO_TARGET_LIST} ${PROJECT_BINARY_DIR}/${OUTPUT_FILE} PARENT_SCOPE)
endfunction(generate_hsaco)
separate_arguments(GPU_TARGETS)
list(LENGTH GPU_TARGETS list_count)
if (${list_count} LESS_EQUAL 1)
string(REPLACE " " ";" GPU_LIST "${GPU_TARGETS}")
string(REPLACE "," ";" GPU_LIST "${GPU_TARGETS}")
else()
set(GPU_LIST ${GPU_TARGETS})
endif()
foreach(target_id ${GPU_LIST})
## generate kernel bitcodes
generate_hsaco(${target_id} ${CMAKE_CURRENT_SOURCE_DIR}/hsa/copy.cl ${target_id}_copy.hsaco)
endforeach(target_id)
add_custom_target(hsaco_targets DEPENDS ${HSACO_TARGET_LIST})
add_executable(copy hsa/copy.cpp)
target_link_libraries(copy hsa-runtime64::hsa-runtime64 Threads::Threads dl)
add_dependencies(copy hsaco_targets)
add_dependencies(mytest copy)
## Build the ROCTX test
set_source_files_properties(app/roctx_test.cpp PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
hip_add_executable(roctx_test app/roctx_test.cpp)
## Adding generated build-id as hip_add_executable doesn't generate automatically
target_link_options(roctx_test PRIVATE "-Wl,--build-id=md5")
target_link_libraries(roctx_test Threads::Threads roctx)
add_dependencies(mytest roctx_test)
## Build the backward compatibility test
add_executable(backward_compat_test app/backward_compat_test.cpp)
target_link_libraries(backward_compat_test roctracer)
add_dependencies(mytest backward_compat_test)
add_executable(load_unload_reload_test hsa/load_unload_reload.cpp)
target_link_libraries(load_unload_reload_test hsa-runtime64::hsa-runtime64)
add_dependencies(mytest load_unload_reload_test)
## Build the trace_buffer test
add_executable(trace_buffer directed/trace_buffer.cpp)
target_include_directories(trace_buffer PRIVATE ${PROJECT_SOURCE_DIR}/src/tracer_tool)
target_link_libraries(trace_buffer Threads::Threads atomic)
add_dependencies(mytest trace_buffer)
## Build the memory_pool test
add_executable(memory_pool directed/memory_pool.cpp)
target_include_directories(memory_pool PRIVATE ${PROJECT_SOURCE_DIR}/src/roctracer ${PROJECT_SOURCE_DIR}/inc)
target_link_libraries(memory_pool Threads::Threads atomic)
add_dependencies(mytest memory_pool)
## Build the activity_and_callback test
set_source_files_properties(directed/activity_and_callback.cpp PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
hip_add_executable(activity_and_callback directed/activity_and_callback.cpp)
## Adding generated build-id as hip_add_executable doesn't generate automatically
target_link_options(activity_and_callback PRIVATE "-Wl,--build-id=md5")
target_link_libraries(activity_and_callback roctracer)
add_dependencies(mytest activity_and_callback)
## Build the multi_pool_activities test
set_source_files_properties(directed/multi_pool_activities.cpp PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
hip_add_executable(multi_pool_activities directed/multi_pool_activities.cpp)
## Adding generated build-id as hip_add_executable doesn't generate automatically
target_link_options(multi_pool_activities PRIVATE "-Wl,--build-id=md5")
target_link_libraries(multi_pool_activities roctracer)
add_dependencies(mytest multi_pool_activities)
## Build the dlopen test
add_executable(dlopen directed/dlopen.cpp)
target_include_directories(dlopen PRIVATE ${PROJECT_SOURCE_DIR}/inc ${HSA_RUNTIME_INCLUDE_DIRECTORIES})
target_link_libraries(dlopen dl)
add_dependencies(mytest dlopen)
## Copy the golden traces and test scripts
configure_file(run.sh ${PROJECT_BINARY_DIR} COPYONLY)
execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink run.sh ${PROJECT_BINARY_DIR}/run_ci.sh)
install(PROGRAMS ${PROJECT_BINARY_DIR}/run.sh RENAME "run_tests.sh" DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME} COMPONENT tests)
configure_file(${PROJECT_SOURCE_DIR}/script/check_trace.py ${PROJECT_BINARY_DIR}/test/check_trace.py COPYONLY)
install(PROGRAMS ${PROJECT_BINARY_DIR}/test/check_trace.py DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/test COMPONENT tests)
file(GLOB files RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "golden_traces/tests_trace_cmp_levels.txt" "golden_traces/*_trace.txt")
foreach(file ${files})
configure_file(${file} ${PROJECT_BINARY_DIR}/test/${file} COPYONLY)
endforeach()
install(DIRECTORY ${PROJECT_BINARY_DIR}/test/golden_traces DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/test COMPONENT tests)
# install all executables
set(all_executables)
get_property(all_targets DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY BUILDSYSTEM_TARGETS)
foreach(target IN LISTS all_targets)
get_target_property(target_type ${target} TYPE)
if (target_type STREQUAL "EXECUTABLE")
list(APPEND all_executables ${target})
endif()
endforeach()
install(TARGETS ${all_executables} DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/test COMPONENT tests)
@@ -0,0 +1,403 @@
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#ifdef __cplusplus
#include <cstdlib>
using namespace std;
#else
#include <stdlib.h>
#endif
// roctx header file
#include <roctx.h>
// roctracer extension API
#include <roctracer_ext.h>
#ifdef __cplusplus
static thread_local const size_t msg_size = 512;
static thread_local char* msg_buf = NULL;
static thread_local char* message = NULL;
#else
static const size_t msg_size = 512;
static char* msg_buf = NULL;
static char* message = NULL;
#endif
void SPRINT(const char* fmt, ...) {
if (msg_buf == NULL) {
msg_buf = (char*)calloc(msg_size, 1);
message = msg_buf;
}
va_list args;
va_start(args, fmt);
message += vsnprintf(message, msg_size - (message - msg_buf), fmt, args);
va_end(args);
}
void SFLUSH() {
if (msg_buf == NULL) abort();
message = msg_buf;
msg_buf[msg_size - 1] = 0;
fprintf(stdout, "%s", msg_buf);
fflush(stdout);
}
#if HIP_TEST
// hip header file
#include <hip/hip_runtime.h>
// Macro to call HIP API
#define CALL_HIP(call) \
do { \
call; \
} while (0);
#define CHECK_HIP(call) \
do { \
hipError_t err = call; \
if (err != hipSuccess) { \
fprintf(stderr, "%s\n", hipGetErrorString(err)); \
abort(); \
} \
} while (0)
#else
#define CALL_HIP(call) \
do { \
} while (0)
#define CHECK_HIP(call) \
do { \
} while (0)
#endif
#ifndef ITERATIONS
#define ITERATIONS 101
#endif
#define WIDTH 1024
#define NUM (WIDTH * WIDTH)
#define THREADS_PER_BLOCK_X 4
#define THREADS_PER_BLOCK_Y 4
#define THREADS_PER_BLOCK_Z 1
#if HIP_TEST
// Device (Kernel) function, it must be void
__global__ void matrixTranspose(float* out, float* in, const int width) {
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
out[y * width + x] = in[x * width + y];
}
#endif
// CPU implementation of matrix transpose
void matrixTransposeCPUReference(float* output, float* input, const unsigned int width) {
for (unsigned int j = 0; j < width; j++) {
for (unsigned int i = 0; i < width; i++) {
output[i * width + j] = input[j * width + i];
}
}
}
int iterations = ITERATIONS;
void init_tracing();
void start_tracing();
void stop_tracing();
int main() {
float* Matrix;
float* TransposeMatrix;
float* cpuTransposeMatrix;
#if HIP_TEST
float* gpuMatrix;
float* gpuTransposeMatrix;
#endif
int i;
int errors = 0;
init_tracing();
#if HIP_TEST
int gpuCount = 1;
#if MGPU_TEST
hipGetDeviceCount(&gpuCount);
fprintf(stderr, "Number of GPUs: %d\n", gpuCount);
#endif
iterations *= gpuCount;
#endif
while (iterations-- > 0) {
start_tracing();
#if HIP_TEST
// set GPU
const int devIndex = iterations % gpuCount;
hipSetDevice(devIndex);
hipDeviceProp_t devProp;
CHECK_HIP(hipGetDeviceProperties(&devProp, 0));
fprintf(stderr, "Device %d name: %s\n", devIndex, devProp.name);
#endif
Matrix = (float*)malloc(NUM * sizeof(float));
TransposeMatrix = (float*)malloc(NUM * sizeof(float));
cpuTransposeMatrix = (float*)malloc(NUM * sizeof(float));
// initialize the input data
for (i = 0; i < NUM; i++) {
Matrix[i] = (float)i * 10.0f;
}
// allocate the memory on the device side
CHECK_HIP(hipMalloc((void**)&gpuMatrix, NUM * sizeof(float)));
CHECK_HIP(hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)));
// correlation reagion32
roctracer_activity_push_external_correlation_id(31);
// correlation reagion32
roctracer_activity_push_external_correlation_id(32);
// Memory transfer from host to device
CHECK_HIP(hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice));
// correlation reagion33
roctracer_activity_push_external_correlation_id(33);
roctxMark("before hipLaunchKernel");
roctxRangePush("hipLaunchKernel");
// Lauching kernel from host
CALL_HIP(hipLaunchKernelGGL(matrixTranspose,
dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y),
dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0,
gpuTransposeMatrix, gpuMatrix, WIDTH));
roctxMark("after hipLaunchKernel");
// correlation reagion end
roctracer_activity_pop_external_correlation_id(NULL);
// Memory transfer from device to host
roctxRangePush("hipMemcpy");
CHECK_HIP(
hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost));
roctxRangePop(); // for "hipMemcpy"
roctxRangePop(); // for "hipLaunchKernel"
// correlation reagion end
roctracer_activity_pop_external_correlation_id(NULL);
// CPU MatrixTranspose computation
#if HIP_TEST
matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH);
// verify the results
double eps = 1.0E-6;
for (i = 0; i < NUM; i++) {
if (abs((double)TransposeMatrix[i] - cpuTransposeMatrix[i]) > eps) {
errors++;
}
}
if (errors != 0) {
fprintf(stderr, "FAILED: %d errors\n", errors);
} else {
errors = 0;
fprintf(stderr, "PASSED!\n");
}
#endif
// free the resources on device side
CHECK_HIP(hipFree(gpuMatrix));
CHECK_HIP(hipFree(gpuTransposeMatrix));
// correlation reagion end
roctracer_activity_pop_external_correlation_id(NULL);
// correlation reagion end
roctracer_activity_pop_external_correlation_id(NULL);
// free the resources on host side
free(Matrix);
free(TransposeMatrix);
free(cpuTransposeMatrix);
}
stop_tracing();
return errors;
}
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// HIP Callbacks/Activity tracing
//
#if 1
#include <roctracer_hip.h>
#include <roctracer_hsa.h>
#include <roctracer_roctx.h>
#include <unistd.h>
#include <sys/syscall.h> /* For SYS_xxx definitions */
// Macro to check ROC-tracer calls status
#define CHECK_ROCTRACER(call) \
do { \
int err = call; \
if (err != 0) { \
fprintf(stderr, "%s\n", roctracer_error_string()); \
abort(); \
} \
} while (0)
static inline uint32_t GetTid() { return syscall(__NR_gettid); }
static inline uint32_t GetPid() { return syscall(__NR_getpid); }
// Runtime API callback function
void api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg) {
(void)arg;
if (domain == ACTIVITY_DOMAIN_ROCTX) {
const roctx_api_data_t* data = (const roctx_api_data_t*)(callback_data);
fprintf(stdout, "rocTX <\"%s pid(%d) tid(%d)\">\n", data->args.message, GetPid(), GetTid());
return;
}
const hip_api_data_t* data = (const hip_api_data_t*)(callback_data);
SPRINT("<%s id(%u)\tcorrelation_id(%lu) %s pid(%d) tid(%d)> ",
roctracer_op_string(ACTIVITY_DOMAIN_HIP_API, cid, 0), cid, data->correlation_id,
(data->phase == ACTIVITY_API_PHASE_ENTER) ? "on-enter" : "on-exit", GetPid(), GetTid());
if (data->phase == ACTIVITY_API_PHASE_ENTER) {
switch (cid) {
case HIP_API_ID_hipMemcpy:
SPRINT("dst(%p) src(%p) size(0x%x) kind(%u)", data->args.hipMemcpy.dst,
data->args.hipMemcpy.src, (uint32_t)(data->args.hipMemcpy.sizeBytes),
(uint32_t)(data->args.hipMemcpy.kind));
break;
case HIP_API_ID_hipMalloc:
SPRINT("ptr(%p) size(0x%x)", data->args.hipMalloc.ptr,
(uint32_t)(data->args.hipMalloc.size));
break;
case HIP_API_ID_hipFree:
SPRINT("ptr(%p)", data->args.hipFree.ptr);
break;
case HIP_API_ID_hipModuleLaunchKernel:
SPRINT("kernel(\"%s\") stream(%p)", hipKernelNameRef(data->args.hipModuleLaunchKernel.f),
data->args.hipModuleLaunchKernel.stream);
break;
default:
break;
}
} else {
switch (cid) {
case HIP_API_ID_hipMalloc:
SPRINT("*ptr(0x%p)", *(data->args.hipMalloc.ptr));
break;
default:
break;
}
}
SPRINT("\n");
SFLUSH();
}
// Activity tracing callback
// hipMalloc id(3) correlation_id(1): begin_ns(1525888652762640464) end_ns(1525888652762877067)
void activity_callback(const char* begin, const char* end, void* arg) {
const roctracer_record_t* record = (const roctracer_record_t*)(begin);
const roctracer_record_t* end_record = (const roctracer_record_t*)(end);
SPRINT("\tActivity records:\n");
while (record < end_record) {
const char* name = roctracer_op_string(record->domain, record->op, record->kind);
SPRINT("\t%s\tcorrelation_id(%lu) time_ns(%lu:%lu)", name, record->correlation_id,
record->begin_ns, record->end_ns);
if (record->domain == ACTIVITY_DOMAIN_HIP_API) {
SPRINT(" process_id(%u) thread_id(%u)", record->process_id, record->thread_id);
} else if (record->domain == ACTIVITY_DOMAIN_HIP_OPS) {
SPRINT(" device_id(%d) queue_id(%lu)", record->device_id, record->queue_id);
if (record->op == HIP_OP_ID_COPY) SPRINT(" bytes(0x%zx)", record->bytes);
} else if (record->domain == ACTIVITY_DOMAIN_HSA_OPS) {
SPRINT(" se(%u) cycle(%lu) pc(%lx)", record->pc_sample.se, record->pc_sample.cycle,
record->pc_sample.pc);
} else if (record->domain == ACTIVITY_DOMAIN_EXT_API) {
SPRINT(" external_id(%lu)", record->external_id);
} else {
fprintf(stdout, "Bad domain %d\n\n", record->domain);
abort();
}
SPRINT("\n");
SFLUSH();
CHECK_ROCTRACER(roctracer_next_record(record, &record));
}
}
// Init tracing routine
void init_tracing() {
fprintf(stderr, "# INIT #############################\n");
// roctracer properties
roctracer_set_properties(ACTIVITY_DOMAIN_HIP_API, NULL);
// Allocating tracing pool
roctracer_properties_t properties;
memset(&properties, 0, sizeof(roctracer_properties_t));
properties.buffer_size = 0x1000;
properties.buffer_callback_fun = activity_callback;
CHECK_ROCTRACER(roctracer_open_pool(&properties));
// Enable HIP API callbacks
CHECK_ROCTRACER(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HIP_API, api_callback, NULL));
// Enable HIP activity tracing
#if HIP_API_ACTIVITY_ON
CHECK_ROCTRACER(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_API));
#endif
CHECK_ROCTRACER(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_OPS));
// Enable PC sampling
CHECK_ROCTRACER(roctracer_enable_op_activity(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_RESERVED1));
// Enable rocTX
CHECK_ROCTRACER(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_ROCTX, api_callback, NULL));
}
// Start tracing routine
void start_tracing() {
fprintf(stderr, "# START (%d) #############################\n", iterations);
// Start
if ((iterations & 1) == 1)
roctracer_start();
else
roctracer_stop();
}
// Stop tracing routine
void stop_tracing() {
CHECK_ROCTRACER(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HIP_API));
#if HIP_API_ACTIVITY_ON
CHECK_ROCTRACER(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HIP_API));
#endif
CHECK_ROCTRACER(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HIP_OPS));
CHECK_ROCTRACER(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HSA_OPS));
CHECK_ROCTRACER(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_ROCTX));
CHECK_ROCTRACER(roctracer_flush_activity());
fprintf(stderr, "# STOP #############################\n");
}
#else
void init_tracing() {}
void start_tracing() {}
void stop_tracing() {}
#endif
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -0,0 +1,32 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#include "roctracer.h"
extern "C" int roctracer_load(); // Removed in ROCTX 4.1
extern "C" void roctracer_unload(); // Removed in ROCTX 4.1
extern "C" void roctracer_flush_buf(); // Removed in ROCTX 4.1
int main() {
if (roctracer_load() != 1) return -1;
roctracer_flush_buf();
roctracer_unload();
return 0;
}
+69
Féach ar an gComhad
@@ -0,0 +1,69 @@
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#include <cstring>
#include <cstdio>
#include <cstdlib>
#include "roctracer.h"
#include "roctracer_hsa.h"
namespace {
// Check returned HSA API status
inline void CHECK(roctracer_status_t status) {
if (status != ROCTRACER_STATUS_SUCCESS) {
fprintf(stderr, "ERROR: %s\n", roctracer_error_string());
abort();
}
}
// codeobj callback
void CodeObjectCallback(uint32_t domain, uint32_t cid, const void* data, void* arg) {
const hsa_evt_data_t* evt_data = reinterpret_cast<const hsa_evt_data_t*>(data);
fprintf(stdout,
"codeobj_callback domain(%u) cid(%u): load_base(0x%lx) load_size(0x%lx) "
"load_delta(0x%lx) uri(\"%s\") unload(%d)\n",
domain, cid, evt_data->codeobj.load_base, evt_data->codeobj.load_size,
evt_data->codeobj.load_delta, evt_data->codeobj.uri, evt_data->codeobj.unload);
}
} // namespace
#include <hsa/hsa_api_trace.h>
extern "C" {
// The HSA_AMD_TOOL_PRIORITY variable must be a constant value type initialized by the loader
// itself, not by code during _init. 'extern const' seems to do that although that is not a
// guarantee.
ROCTRACER_EXPORT extern const uint32_t HSA_AMD_TOOL_PRIORITY = 1050;
// HSA-runtime tool on-load method
ROCTRACER_EXPORT bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
uint64_t failed_tool_count, const char* const* failed_tool_names) {
CHECK(roctracer_enable_op_callback(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_CODEOBJ,
CodeObjectCallback, nullptr));
return true;
}
ROCTRACER_EXPORT void OnUnload() {
CHECK(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HSA_EVT));
}
} // extern "C"
+75
Féach ar an gComhad
@@ -0,0 +1,75 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#include <hip/hip_runtime.h>
#include "roctx.h"
#include <thread>
#define HIP_CALL(call) \
do { \
hipError_t err = call; \
if (err != hipSuccess) { \
fprintf(stderr, "%s\n", hipGetErrorString(err)); \
abort(); \
} \
} while (0)
__global__ void kernel() {}
int main(int argc, char* argv[]) {
HIP_CALL(hipSetDevice(0));
// Not in a roctx range.
kernel<<<1, 1>>>();
int ret = roctxRangePush("NestedRangeA");
// In a simple first level roctx range.
kernel<<<1, 1>>>();
if (roctxRangePop() != ret) return -1;
roctxRangePush("NestedRangeB");
roctxRangePush("NestedRangeC");
roctx_range_id_t id = roctxRangeStart("StartStopRangeA");
// In a nested roctx range.
kernel<<<1, 1>>>();
roctxRangePop();
roctxRangePop();
std::thread thread([id]() { roctxRangeStop(id); });
thread.join();
roctxRangePush("NestedRangeD");
roctxRangePush("NestedRangeE");
roctxRangePop();
// In a first level roctx range, but after a nested range.
kernel<<<1, 1>>>();
if (roctxRangePop() != 0) return -1;
HIP_CALL(hipDeviceSynchronize());
return 0;
}
@@ -0,0 +1,139 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#include <hip/hip_runtime.h>
#include <roctracer.h>
#define HIP_PROF_HIP_API_STRING 1
#include <roctracer_hip.h>
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/syscall.h>
__global__ void kernel() {}
template <typename T> inline void CHECK(T status);
template <> inline void CHECK(hipError_t err) {
if (err != hipSuccess) {
std::cerr << hipGetErrorString(err) << std::endl;
abort();
}
}
template <> inline void CHECK(roctracer_status_t status) {
if (status != ROCTRACER_STATUS_SUCCESS) {
std::cerr << roctracer_error_string() << std::endl;
abort();
}
}
namespace {
uint32_t GetPid() {
static auto pid = syscall(__NR_getpid);
return pid;
}
uint32_t GetTid() {
static thread_local auto tid = syscall(__NR_gettid);
return tid;
}
void hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg) {
const hip_api_data_t* data = static_cast<const hip_api_data_t*>(callback_data);
fprintf(stdout, "<%s id(%u)\tcorrelation_id(%lu) %s pid(%d) tid(%d)>\n",
roctracer_op_string(domain, cid, 0), cid, data->correlation_id,
(data->phase == ACTIVITY_API_PHASE_ENTER) ? "on-enter" : "on-exit", GetPid(), GetTid());
}
void buffer_callback(const char* begin, const char* end, void* arg) {
for (const roctracer_record_t* record = (const roctracer_record_t*)begin;
record < (const roctracer_record_t*)end; CHECK(roctracer_next_record(record, &record))) {
fprintf(stdout, "\t%s\tcorrelation_id(%lu) time_ns(%lu:%lu)\n",
roctracer_op_string(record->domain, record->op, record->kind), record->correlation_id,
record->begin_ns, record->end_ns);
}
}
} // namespace
int main() {
CHECK(hipSetDevice(0));
roctracer_properties_t properties{};
properties.buffer_callback_fun = buffer_callback;
properties.buffer_callback_arg = nullptr;
properties.buffer_size = 1024;
CHECK(roctracer_open_pool(&properties));
// 1: callbacks only
CHECK(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HIP_API, hip_api_callback, nullptr));
CHECK(hipSetDevice(0));
kernel<<<1, 1>>>();
CHECK(hipDeviceSynchronize());
CHECK(roctracer_flush_activity());
// 2: callbacks and activities
CHECK(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_API));
CHECK(hipSetDevice(0));
kernel<<<1, 1>>>();
CHECK(hipDeviceSynchronize());
CHECK(roctracer_flush_activity());
// 3: activities only
CHECK(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HIP_API));
CHECK(hipSetDevice(0));
kernel<<<1, 1>>>();
CHECK(hipDeviceSynchronize());
CHECK(roctracer_flush_activity());
// 4: callbacks only
CHECK(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HIP_API, hip_api_callback, nullptr));
CHECK(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HIP_API));
CHECK(hipSetDevice(0));
kernel<<<1, 1>>>();
CHECK(hipDeviceSynchronize());
CHECK(roctracer_flush_activity());
// 5: callbacks and activities
CHECK(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_API));
CHECK(hipSetDevice(0));
kernel<<<1, 1>>>();
CHECK(hipDeviceSynchronize());
CHECK(roctracer_flush_activity());
// 6: callbacks only
CHECK(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HIP_API));
CHECK(hipSetDevice(0));
kernel<<<1, 1>>>();
CHECK(hipDeviceSynchronize());
CHECK(roctracer_flush_activity());
// 7: none
CHECK(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HIP_API));
CHECK(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HIP_API));
CHECK(hipSetDevice(0));
kernel<<<1, 1>>>();
CHECK(hipDeviceSynchronize());
CHECK(roctracer_flush_activity());
return 0;
}
+94
Féach ar an gComhad
@@ -0,0 +1,94 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#include "roctracer.h"
#include <dlfcn.h>
#include <hsa/hsa.h>
#include <cassert>
using get_timestamp_t = decltype(roctracer_get_timestamp);
using hsa_init_t = decltype(hsa_init);
using hsa_shut_down_t = decltype(hsa_shut_down);
int main() {
// CASE 1: HSA is not loaded.
//
{
void* tracer_library = dlopen("libroctracer64.so", RTLD_LAZY);
assert(tracer_library != nullptr);
auto* get_timestamp =
reinterpret_cast<get_timestamp_t*>(dlsym(tracer_library, "roctracer_get_timestamp"));
assert(get_timestamp != nullptr);
roctracer_timestamp_t timestamp;
(*get_timestamp)(&timestamp);
dlclose(tracer_library);
}
// CASE 2 Load the roctracer after hsa_init().
//
void* hsa_library = dlopen("libhsa-runtime64.so.1", RTLD_LAZY);
assert(hsa_library != nullptr);
auto* hsa_init = reinterpret_cast<hsa_init_t*>(dlsym(hsa_library, "hsa_init"));
auto* hsa_shut_down = reinterpret_cast<hsa_shut_down_t*>(dlsym(hsa_library, "hsa_shut_down"));
assert(hsa_init != nullptr && hsa_shut_down != nullptr);
{
(*hsa_init)();
void* tracer_library = dlopen("libroctracer64.so", RTLD_LAZY);
assert(tracer_library != nullptr);
auto* get_timestamp =
reinterpret_cast<get_timestamp_t*>(dlsym(tracer_library, "roctracer_get_timestamp"));
assert(get_timestamp != nullptr);
roctracer_timestamp_t timestamp;
(*get_timestamp)(&timestamp);
dlclose(tracer_library);
(*hsa_shut_down)();
}
// CASE 3: Load and use the roctracer before hsa_init().
//
{
void* tracer_library = dlopen("libroctracer64.so", RTLD_LAZY);
assert(tracer_library != nullptr);
auto* get_timestamp =
reinterpret_cast<get_timestamp_t*>(dlsym(tracer_library, "roctracer_get_timestamp"));
assert(get_timestamp != nullptr);
roctracer_timestamp_t timestamp;
(*get_timestamp)(&timestamp);
(*hsa_init)();
(*hsa_shut_down)();
dlclose(tracer_library);
}
return 0;
}
+125
Féach ar an gComhad
@@ -0,0 +1,125 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#include "roctracer.h"
#include "memory_pool.h"
#include <algorithm>
#include <atomic>
#include <cstdlib>
#include <iterator>
#include <iostream>
#include <fstream>
#include <thread>
#include <vector>
using namespace roctracer;
namespace {
std::ifstream cpuinfo("/proc/cpuinfo");
const std::size_t num_cpu_cores =
std::count(std::istream_iterator<std::string>(cpuinfo), std::istream_iterator<std::string>(),
std::string("processor"));
constexpr std::size_t num_iterations = 1000;
constexpr std::size_t min_num_threads = 10;
constexpr std::size_t max_num_threads = 50;
void fatal_error(const char* message) {
std::cerr << message << std::endl;
abort();
}
} // namespace
int main() {
constexpr size_t buffer_size = 10 * sizeof(roctracer_record_t);
constexpr size_t max_data_size = buffer_size - sizeof(roctracer_record_t);
size_t flush_count = 0, record_count = 0;
auto flush_callback = [&flush_count, &record_count](const char* begin, const char* end) {
++flush_count;
std::this_thread::sleep_for(std::chrono::microseconds(10));
record_count += (end - begin) / sizeof(roctracer_record_t);
};
roctracer_properties_t properties{};
properties.buffer_callback_fun = [](const char* begin, const char* end, void* arg) {
(*static_cast<decltype(flush_callback)*>(arg))(begin, end);
};
properties.buffer_callback_arg = &flush_callback;
properties.buffer_size = buffer_size;
MemoryPool pool(properties);
const void* original_data;
std::atomic<int> relocation_count{0};
auto relocate_data = [&relocation_count, &original_data](roctracer_record_t&, const void* data) {
if (data != original_data) ++relocation_count;
};
// test1: the record and data fit in the buffer: no flush, data should get relocated.
constexpr char data_fits[max_data_size] = {0};
original_data = data_fits;
pool.Write(roctracer_record_t{}, data_fits, sizeof(data_fits), relocate_data); // F=0, R=1
pool.Flush(); // F=1, R=1
if (flush_count != 1 || relocation_count != 1) fatal_error("failed test1");
flush_count = record_count = relocation_count = 0;
// test2: the records and data do not fit in the buffer: 1 flush, data should get relocated.
pool.Write(roctracer_record_t{}); // F=0, R=0
pool.Write(roctracer_record_t{}, data_fits, sizeof(data_fits), relocate_data); // F=1, R=1
pool.Flush(); // F=2, R=1
if (flush_count != 2 || relocation_count != 1) fatal_error("failed test2");
flush_count = record_count = relocation_count = 0;
// test3: data does not fit in the buffer: 1 Flush, data is not relocated, all records should be
// processed.
constexpr char does_not_fit[max_data_size + 1] = {0};
original_data = does_not_fit;
pool.Write(roctracer_record_t{}, does_not_fit, sizeof(does_not_fit), relocate_data); // F=1, R=0
if (flush_count != 1 || relocation_count != 0 || record_count != 1) fatal_error("failed test3");
flush_count = record_count = relocation_count = 0;
// test4: stress test writing and flushing.
const std::size_t num_threads = std::clamp(num_cpu_cores, min_num_threads, max_num_threads);
std::vector<std::thread> threads(num_threads);
// Start the worker threads. Each thread will write 'num_iterations' records in the memory
// pool, then exit.
for (auto&& thread : threads) {
thread = std::thread([&pool]() {
for (std::size_t j = 0; j < num_iterations; ++j) pool.Write(roctracer_record_t{});
});
}
// Wait for all the threads to complete, then flush the trace buffer.
for (auto&& thread : threads) thread.join();
pool.Flush();
if (record_count != num_iterations * threads.size() ||
flush_count != (record_count / (buffer_size / sizeof(roctracer_record_t))))
fatal_error("failed test4");
return 0;
}
@@ -0,0 +1,94 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#include <hip/hip_runtime.h>
#include <roctracer.h>
#include <roctracer_hip.h>
#include <iostream>
// This test checks that asynchronous activities can be enabled in distinct memory pools. It enables
// activity reporting for HIP kernel dispatches in one memory pool, and memory copy reporting in
// another memory pool. The output of this test to stdout should be a series of kernel dispatch
// records (10) followed by a series of memory copy records (10). The records should not be
// interleaved.
__global__ void kernel(void* global_memory) {}
namespace {
template <typename T> inline void CHECK(T status);
template <> inline void CHECK(hipError_t err) {
if (err != hipSuccess) {
std::cerr << hipGetErrorString(err) << std::endl;
abort();
}
}
template <> inline void CHECK(roctracer_status_t status) {
if (status != ROCTRACER_STATUS_SUCCESS) {
std::cerr << roctracer_error_string() << std::endl;
abort();
}
}
void buffer_callback(const char* begin, const char* end, void* arg) {
for (const roctracer_record_t* record = (const roctracer_record_t*)begin;
record != (const roctracer_record_t*)end; CHECK(roctracer_next_record(record, &record))) {
fprintf(stdout, "\t:%s\t: correlation_id(%lu) time_ns(%lu:%lu)\n",
roctracer_op_string(record->domain, record->op, record->kind), record->correlation_id,
record->begin_ns, record->end_ns);
}
}
} // namespace
int main() {
CHECK(hipSetDevice(0));
roctracer_properties_t properties{};
properties.buffer_callback_fun = buffer_callback;
properties.buffer_callback_arg = nullptr;
properties.buffer_size = 1024 * 1024;
roctracer_pool_t* pool_1;
CHECK(roctracer_open_pool_expl(&properties, &pool_1));
CHECK(roctracer_enable_op_activity_expl(ACTIVITY_DOMAIN_HIP_OPS, HIP_OP_ID_DISPATCH, pool_1));
roctracer_pool_t* pool_2;
CHECK(roctracer_open_pool_expl(&properties, &pool_2));
CHECK(roctracer_enable_op_activity_expl(ACTIVITY_DOMAIN_HIP_OPS, HIP_OP_ID_COPY, pool_2));
CHECK(roctracer_enable_op_activity_expl(ACTIVITY_DOMAIN_HIP_API, HIP_API_ID_hipMemcpy, pool_2));
int host_array[256] = {0};
int* device_memory;
CHECK(hipMalloc(&device_memory, sizeof(host_array)));
for (int i = 0; i < 10; ++i) {
CHECK(hipMemcpy(device_memory, host_array, sizeof(host_array), hipMemcpyHostToDevice));
kernel<<<1, 1>>>(device_memory);
}
CHECK(hipDeviceSynchronize());
CHECK(roctracer_flush_activity_expl(pool_1));
CHECK(roctracer_flush_activity_expl(pool_2));
return 0;
}
@@ -0,0 +1,79 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#include "trace_buffer.h"
#include <algorithm>
#include <atomic>
#include <fstream>
#include <iostream>
#include <iterator>
#include <string>
#include <thread>
#include <vector>
struct TraceEntry {
std::atomic<roctracer::TraceEntryState> valid;
};
TRACE_BUFFER_INSTANTIATE();
namespace {
std::ifstream cpuinfo("/proc/cpuinfo");
const std::size_t num_cpu_cores =
std::count(std::istream_iterator<std::string>(cpuinfo), std::istream_iterator<std::string>(),
std::string("processor"));
constexpr std::size_t num_iterations = 1000;
constexpr std::size_t min_num_threads = 10;
constexpr std::size_t max_num_threads = 50;
} // namespace
int main() {
const std::size_t num_threads = std::clamp(num_cpu_cores, min_num_threads, max_num_threads);
std::vector<std::thread> threads(num_threads);
std::atomic<size_t> flush_count(0); // Count the number of times the flush callback is called.
roctracer::TraceBuffer<TraceEntry> trace_buffer("Test", 10,
[&flush_count](auto* entry) { ++flush_count; });
// Start the worker threads. Each thread will request 'num_iterations' entries from the
// 'trace_buffer', then exit.
for (auto&& thread : threads) {
thread = std::thread([&trace_buffer]() {
for (std::size_t j = 0; j < num_iterations; ++j) {
auto& entry = trace_buffer.Emplace();
entry.valid.store(roctracer::TRACE_ENTRY_COMPLETE, std::memory_order_release);
}
});
}
// Wait for all the threads to complete, then flush the trace buffer.
for (auto&& thread : threads) thread.join();
trace_buffer.Flush();
std::cout << "number of records flushed = " << flush_count << std::endl;
if (flush_count != num_iterations * threads.size()) abort();
return EXIT_SUCCESS;
}
@@ -0,0 +1,503 @@
+ LD_PRELOAD=libkfdwrapper64.so ./test/MatrixTranspose_ctest
# INIT #############################
# START (99) #############################
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipMemcpy pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
PASSED!
# START (98) #############################
PASSED!
# START (97) #############################
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipMemcpy pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
PASSED!
# START (96) #############################
PASSED!
# START (95) #############################
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipMemcpy pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
PASSED!
# START (94) #############################
PASSED!
# START (93) #############################
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipMemcpy pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
PASSED!
# START (92) #############################
PASSED!
# START (91) #############################
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipMemcpy pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
PASSED!
# START (90) #############################
PASSED!
# START (89) #############################
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipMemcpy pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
PASSED!
# START (88) #############################
PASSED!
# START (87) #############################
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipMemcpy pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
PASSED!
# START (86) #############################
PASSED!
# START (85) #############################
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipMemcpy pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
PASSED!
# START (84) #############################
PASSED!
# START (83) #############################
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipMemcpy pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
PASSED!
# START (82) #############################
PASSED!
# START (81) #############################
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipMemcpy pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
PASSED!
# START (80) #############################
PASSED!
# START (79) #############################
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipMemcpy pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
PASSED!
# START (78) #############################
PASSED!
# START (77) #############################
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipMemcpy pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
PASSED!
# START (76) #############################
PASSED!
# START (75) #############################
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipMemcpy pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
PASSED!
# START (74) #############################
PASSED!
# START (73) #############################
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipMemcpy pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
PASSED!
# START (72) #############################
PASSED!
# START (71) #############################
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipMemcpy pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
PASSED!
# START (70) #############################
PASSED!
# START (69) #############################
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipMemcpy pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
PASSED!
# START (68) #############################
PASSED!
# START (67) #############################
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipMemcpy pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
PASSED!
# START (66) #############################
PASSED!
# START (65) #############################
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipMemcpy pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
PASSED!
# START (64) #############################
PASSED!
# START (63) #############################
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipMemcpy pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
PASSED!
# START (62) #############################
PASSED!
# START (61) #############################
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipMemcpy pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
PASSED!
# START (60) #############################
PASSED!
# START (59) #############################
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipMemcpy pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
PASSED!
# START (58) #############################
PASSED!
# START (57) #############################
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipMemcpy pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
PASSED!
# START (56) #############################
PASSED!
# START (55) #############################
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipMemcpy pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
PASSED!
# START (54) #############################
PASSED!
# START (53) #############################
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipMemcpy pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
PASSED!
# START (52) #############################
PASSED!
# START (51) #############################
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipMemcpy pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
PASSED!
# START (50) #############################
PASSED!
# START (49) #############################
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipMemcpy pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
PASSED!
# START (48) #############################
PASSED!
# START (47) #############################
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipMemcpy pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
PASSED!
# START (46) #############################
PASSED!
# START (45) #############################
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipMemcpy pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
PASSED!
# START (44) #############################
PASSED!
# START (43) #############################
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipMemcpy pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
PASSED!
# START (42) #############################
PASSED!
# START (41) #############################
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipMemcpy pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
PASSED!
# START (40) #############################
PASSED!
# START (39) #############################
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipMemcpy pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
PASSED!
# START (38) #############################
PASSED!
# START (37) #############################
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipMemcpy pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
PASSED!
# START (36) #############################
PASSED!
# START (35) #############################
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipMemcpy pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
PASSED!
# START (34) #############################
PASSED!
# START (33) #############################
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipMemcpy pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
PASSED!
# START (32) #############################
PASSED!
# START (31) #############################
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipMemcpy pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
PASSED!
# START (30) #############################
PASSED!
# START (29) #############################
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipMemcpy pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
PASSED!
# START (28) #############################
PASSED!
# START (27) #############################
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipMemcpy pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
PASSED!
# START (26) #############################
PASSED!
# START (25) #############################
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipMemcpy pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
PASSED!
# START (24) #############################
PASSED!
# START (23) #############################
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipMemcpy pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
PASSED!
# START (22) #############################
PASSED!
# START (21) #############################
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipMemcpy pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
PASSED!
# START (20) #############################
PASSED!
# START (19) #############################
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipMemcpy pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
PASSED!
# START (18) #############################
PASSED!
# START (17) #############################
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipMemcpy pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
PASSED!
# START (16) #############################
PASSED!
# START (15) #############################
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipMemcpy pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
PASSED!
# START (14) #############################
PASSED!
# START (13) #############################
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipMemcpy pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
PASSED!
# START (12) #############################
PASSED!
# START (11) #############################
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipMemcpy pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
PASSED!
# START (10) #############################
PASSED!
# START (9) #############################
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipMemcpy pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
PASSED!
# START (8) #############################
PASSED!
# START (7) #############################
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipMemcpy pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
PASSED!
# START (6) #############################
PASSED!
# START (5) #############################
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipMemcpy pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
PASSED!
# START (4) #############################
PASSED!
# START (3) #############################
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipMemcpy pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
PASSED!
# START (2) #############################
PASSED!
# START (1) #############################
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
rocTX <"hipMemcpy pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
rocTX <"(null) pid(22834) tid(22834)">
PASSED!
# START (0) #############################
PASSED!
# STOP #############################
@@ -0,0 +1,815 @@
0x21fde60 agent cpu
0x21ff890 agent cpu
0x2239bd0 agent gpu
0x223ddf0 agent gpu
0x2242290 agent gpu
0x2246740 agent gpu
6503209724059324
ROCtracer (35331):
ROCtracer: trace control flush rate(100000us)
HIP-trace(*)
6503209734529563:6503209734531917 35331:35331 hipGetDevicePropertiesR0600(props={pageableMemoryAccessUsesHostPageTables=0, pageableMemoryAccess=0, concurrentManagedAccess=1, directManagedMemAccessFromHost=0, managedMemory=1, asicRevision=1, isLargeBar=1, cooperativeMultiDeviceUnmatchedSharedMem=1, cooperativeMultiDeviceUnmatchedBlockDim=1, cooperativeMultiDeviceUnmatchedGridDim=1, cooperativeMultiDeviceUnmatchedFunc=1, tccDriver=0, ECCEnabled=0, kernelExecTimeoutEnabled=0, texturePitchAlignment=256, textureAlignment=256, memPitch=2147483647, hdpRegFlushCntl=0x7f1f3948c004, hdpMemFlushCntl=0x7f1f3948c000, maxTexture3D=0x7f1e76c31318, maxTexture2D=0x7f1e76c31310, maxTexture1D=16384, maxTexture1DLinear=2147483647, cooperativeMultiDeviceLaunch=1, cooperativeLaunch=1, integrated=0, gcnArchName="gfx90a:sramecc+:xnack-", gcnArch=910, canMapHostMemory=1, isMultiGpuBoard=0, maxSharedMemoryPerMultiProcessor=65536, pciDeviceID=0, pciBusID=74, pciDomainID=0, concurrentKernels=1, arch={}, clockInstructionRate=1000000, computeMode=0, maxThreadsPerMultiProcessor=2048, l2CacheSize=8388608, multiProcessorCount=104, minor=0, major=9, totalConstMem=2147483647, memoryBusWidth=4096, memoryClockRate=1600000, clockRate=1700000, maxGridSize=0x7f1e76c31198, maxThreadsDim=0x7f1e76c3118c, maxThreadsPerBlock=1024, warpSize=64, regsPerBlock=65536, sharedMemPerBlock=65536, totalGlobalMem=68702699520, name="AMD Instinct MI210"}, device=0) :1
6503209735378958:6503209735406670 35331:35331 hipMalloc(ptr=0x7f19f7a00000, size=4194304) :2
6503209735407101:6503209735423080 35331:35331 hipMalloc(ptr=0x7f19f7400000, size=4194304) :3
6503209978814181:6503209979619458 2:0 CopyHostToDevice:4:35331
6503209980083977:6503209980169097 2:0 matrixTranspose(float*, float*, int):7:35331
6503209981680453:6503209984909684 2:0 CopyDeviceToHost:8:35331
6503209994109180:6503209995685815 2:0 CopyHostToDevice:9:35331
6503209995761855:6503209995862975 2:0 matrixTranspose(float*, float*, int):12:35331
6503209995867135:6503209997327251 2:0 CopyDeviceToHost:13:35331
6503210005717148:6503210007365784 2:0 CopyHostToDevice:14:35331
6503210007436384:6503210007539904 2:0 matrixTranspose(float*, float*, int):17:35331
6503210007543584:6503210008994420 2:0 CopyDeviceToHost:18:35331
6503210017396637:6503210019047993 2:0 CopyHostToDevice:19:35331
6503210019116193:6503210019222752 2:0 matrixTranspose(float*, float*, int):22:35331
6503210019226432:6503210020670388 2:0 CopyDeviceToHost:23:35331
6503209735433059:6503209979689030 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :4
6503209979696614:6503209979697045 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :5
6503209979701393:6503209979701654 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :6
6503209979703096:6503209980073714 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :7
6503209980087790:6503209985036979 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :8
6503209994087224:6503209995737275 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :9
6503209995740781:6503209995741012 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :10
6503209995741503:6503209995741663 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :11
6503209995741943:6503209995748686 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :12
6503209995750359:6503209997440904 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :13
6503210005702036:6503210007416737 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :14
6503210007419652:6503210007419802 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :15
6503210007420093:6503210007420233 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :16
6503210007420464:6503210007424171 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :17
6503210007424591:6503210009107362 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :18
6503210017382250:6503210019098583 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :19
6503210019099114:6503210019099254 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :20
6503210019099474:6503210019099595 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :21
6503210019099845:6503210019104073 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :22
6503210019104454:6503210020779761 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :23
6503210029076606:6503210030728121 2:0 CopyHostToDevice:24:35331
6503210030793281:6503210030895521 2:0 matrixTranspose(float*, float*, int):27:35331
6503210030899201:6503210032343317 2:0 CopyDeviceToHost:28:35331
6503210040715328:6503210042368282 2:0 CopyHostToDevice:29:35331
6503210042440800:6503210042542880 2:0 matrixTranspose(float*, float*, int):32:35331
6503210042546560:6503210043989715 2:0 CopyDeviceToHost:33:35331
6503210052371125:6503210054023439 2:0 CopyHostToDevice:34:35331
6503210054093079:6503210054195158 2:0 matrixTranspose(float*, float*, int):37:35331
6503210054198838:6503210055644233 2:0 CopyDeviceToHost:38:35331
6503210064560521:6503210066213955 2:0 CopyHostToDevice:39:35331
6503210066279755:6503210066381835 2:0 matrixTranspose(float*, float*, int):42:35331
6503210066385515:6503210067829149 2:0 CopyDeviceToHost:43:35331
6503210076195679:6503210077863833 2:0 CopyHostToDevice:44:35331
6503210077934113:6503210078036193 2:0 matrixTranspose(float*, float*, int):47:35331
6503210078040033:6503210079483188 2:0 CopyDeviceToHost:48:35331
6503210088352916:6503210090004750 2:0 CopyHostToDevice:49:35331
6503210090070070:6503210090172149 2:0 matrixTranspose(float*, float*, int):52:35331
6503210090175669:6503210091618984 2:0 CopyDeviceToHost:53:35331
6503210100501672:6503210102156066 2:0 CopyHostToDevice:54:35331
6503210102222026:6503210102324106 2:0 matrixTranspose(float*, float*, int):57:35331
6503210102327946:6503210103774141 2:0 CopyDeviceToHost:58:35331
6503210112110867:6503210113766700 2:0 CopyHostToDevice:59:35331
6503210113844338:6503210113947698 2:0 matrixTranspose(float*, float*, int):62:35331
6503210113951378:6503210115396771 2:0 CopyDeviceToHost:63:35331
6503210029060790:6503210030776071 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :24
6503210030776722:6503210030776813 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :25
6503210030777083:6503210030777213 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :26
6503210030777534:6503210030780930 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :27
6503210030783495:6503210032454133 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :28
6503210040700588:6503210042424295 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :29
6503210042424756:6503210042424896 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :30
6503210042425107:6503210042425237 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :31
6503210042425457:6503210042428863 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :32
6503210042432991:6503210044098270 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :33
6503210052357328:6503210054074643 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :34
6503210054075114:6503210054075204 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :35
6503210054075394:6503210054075505 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :36
6503210054077799:6503210054081145 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :37
6503210054081536:6503210055752064 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :38
6503210064546646:6503210066261336 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :39
6503210066261796:6503210066261907 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :40
6503210066262137:6503210066262227 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :41
6503210066264632:6503210066267798 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :42
6503210066268178:6503210067936813 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :43
6503210076181475:6503210077915942 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :44
6503210077916312:6503210077916473 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :45
6503210077918687:6503210077918827 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :46
6503210077919047:6503210077922233 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :47
6503210077922664:6503210079590527 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :48
6503210088339324:6503210090051840 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :49
6503210090052241:6503210090052371 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :50
6503210090054485:6503210090054625 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :51
6503210090054855:6503210090058121 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :52
6503210090058462:6503210091724542 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :53
6503210100488016:6503210102203548 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :54
6503210102205732:6503210102205872 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :55
6503210102206082:6503210102206213 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :56
6503210102206463:6503210102210010 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :57
6503210102210380:6503210103881750 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :58
6503210112098009:6503210113825333 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :59
6503210113828058:6503210113828198 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :60
6503210113828388:6503210113828519 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :61
6503210113828889:6503210113832867 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :62
6503210113833307:6503210115506611 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :63
6503210123754974:6503210125414806 2:0 CopyHostToDevice:64:35331
6503210125480606:6503210125582365 2:0 matrixTranspose(float*, float*, int):67:35331
6503210125585885:6503210127028879 2:0 CopyDeviceToHost:68:35331
6503210135409161:6503210137061633 2:0 CopyHostToDevice:69:35331
6503210137127913:6503210137231913 2:0 matrixTranspose(float*, float*, int):72:35331
6503210137235753:6503210138677786 2:0 CopyDeviceToHost:73:35331
6503210147599026:6503210149250218 2:0 CopyHostToDevice:74:35331
6503210149317778:6503210149419538 2:0 matrixTranspose(float*, float*, int):77:35331
6503210149423378:6503210150866211 2:0 CopyDeviceToHost:78:35331
6503210159229213:6503210160882006 2:0 CopyHostToDevice:79:35331
6503210160948285:6503210161050205 2:0 matrixTranspose(float*, float*, int):82:35331
6503210161053885:6503210162499918 2:0 CopyDeviceToHost:83:35331
6503210170875401:6503210172542113 2:0 CopyHostToDevice:84:35331
6503210172609833:6503210172712232 2:0 matrixTranspose(float*, float*, int):87:35331
6503210172715912:6503210174186746 2:0 CopyDeviceToHost:88:35331
6503210182557576:6503210184208611 2:0 CopyHostToDevice:89:35331
6503210184280809:6503210184383369 2:0 matrixTranspose(float*, float*, int):92:35331
6503210184387049:6503210185831324 2:0 CopyDeviceToHost:93:35331
6503210194685533:6503210196337527 2:0 CopyHostToDevice:94:35331
6503210196406687:6503210196508767 2:0 matrixTranspose(float*, float*, int):97:35331
6503210196512607:6503210197956242 2:0 CopyDeviceToHost:98:35331
6503210206316372:6503210207967406 2:0 CopyHostToDevice:99:35331
6503210208037686:6503210208139766 2:0 matrixTranspose(float*, float*, int):102:35331
6503210208143446:6503210209588201 2:0 CopyDeviceToHost:103:35331
6503210217934252:6503210219584806 2:0 CopyHostToDevice:104:35331
6503210219650605:6503210219752205 2:0 matrixTranspose(float*, float*, int):107:35331
6503210219755885:6503210221203520 2:0 CopyDeviceToHost:108:35331
6503210123741484:6503210125463969 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :64
6503210125464811:6503210125464901 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :65
6503210125465201:6503210125465321 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :66
6503210125465632:6503210125469068 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :67
6503210125469589:6503210127137302 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :68
6503210135395218:6503210137111722 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :69
6503210137112112:6503210137112263 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :70
6503210137112493:6503210137112623 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :71
6503210137112844:6503210137116330 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :72
6503210137118444:6503210138787510 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :73
6503210147585037:6503210149301811 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :74
6503210149302232:6503210149302372 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :75
6503210149302582:6503210149302702 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :76
6503210149302913:6503210149306179 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :77
6503210149308704:6503210150974684 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :78
6503210159216530:6503210160930018 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :79
6503210160930449:6503210160930579 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :80
6503210160930759:6503210160930869 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :81
6503210160933294:6503210160936600 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :82
6503210160936991:6503210162608491 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :83
6503210170862119:6503210172590714 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :84
6503210172591115:6503210172591235 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :85
6503210172591436:6503210172591566 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :86
6503210172595143:6503210172598329 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :87
6503210172598689:6503210174298051 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :88
6503210182544005:6503210184262582 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :89
6503210184263033:6503210184263133 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :90
6503210184265157:6503210184265297 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :91
6503210184265538:6503210184268804 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :92
6503210184269185:6503210185939693 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :93
6503210194672109:6503210196385186 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :94
6503210196385617:6503210196385727 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :95
6503210196390917:6503210196391047 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :96
6503210196391288:6503210196394534 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :97
6503210196395065:6503210198065643 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :98
6503210206302971:6503210208019605 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :99
6503210208021769:6503210208021919 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :100
6503210208022109:6503210208022230 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :101
6503210208022480:6503210208025616 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :102
6503210208026027:6503210209696425 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :103
6503210217920678:6503210219631912 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :104
6503210219634487:6503210219634607 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :105
6503210219634857:6503210219634977 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :106
6503210219635208:6503210219638524 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :107
6503210219639005:6503210221311988 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :108
6503210229575811:6503210231227325 2:0 CopyHostToDevice:109:35331
6503210231294245:6503210231396004 2:0 matrixTranspose(float*, float*, int):112:35331
6503210231399684:6503210232882999 2:0 CopyDeviceToHost:113:35331
6503210241818168:6503210243474482 2:0 CopyHostToDevice:114:35331
6503210243540762:6503210243642681 2:0 matrixTranspose(float*, float*, int):117:35331
6503210243646361:6503210245111276 2:0 CopyDeviceToHost:118:35331
6503210253536692:6503210255190285 2:0 CopyHostToDevice:119:35331
6503210255263123:6503210255364883 2:0 matrixTranspose(float*, float*, int):122:35331
6503210255368723:6503210256818757 2:0 CopyDeviceToHost:123:35331
6503210265732641:6503210267385274 2:0 CopyHostToDevice:124:35331
6503210267455393:6503210267557793 2:0 matrixTranspose(float*, float*, int):127:35331
6503210267561473:6503210269015667 2:0 CopyDeviceToHost:128:35331
6503210277598192:6503210279249545 2:0 CopyHostToDevice:129:35331
6503210279319185:6503210279421104 2:0 matrixTranspose(float*, float*, int):132:35331
6503210279424784:6503210280874338 2:0 CopyDeviceToHost:133:35331
6503210289260224:6503210290912697 2:0 CopyHostToDevice:134:35331
6503210290983137:6503210291085217 2:0 matrixTranspose(float*, float*, int):137:35331
6503210291089057:6503210292533171 2:0 CopyDeviceToHost:138:35331
6503210300907376:6503210302558730 2:0 CopyHostToDevice:139:35331
6503210302628049:6503210302730129 2:0 matrixTranspose(float*, float*, int):142:35331
6503210302733809:6503210304178563 2:0 CopyDeviceToHost:143:35331
6503210312547809:6503210314201242 2:0 CopyHostToDevice:144:35331
6503210314267522:6503210314369601 2:0 matrixTranspose(float*, float*, int):147:35331
6503210314373121:6503210315816595 2:0 CopyDeviceToHost:148:35331
6503210229562500:6503210231277591 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :109
6503210231278423:6503210231278513 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :110
6503210231278803:6503210231278924 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :111
6503210231279164:6503210231282240 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :112
6503210231282681:6503210233000296 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :113
6503210241804135:6503210243524255 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :114
6503210243524656:6503210243524756 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :115
6503210243524926:6503210243525046 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :116
6503210243525277:6503210243528703 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :117
6503210243530877:6503210245227574 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :118
6503210253522920:6503210255246977 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :119
6503210255247438:6503210255247558 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :120
6503210255247789:6503210255247909 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :121
6503210255248169:6503210255251365 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :122
6503210255253920:6503210256939426 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :123
6503210265719601:6503210267437236 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :124
6503210267437627:6503210267437747 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :125
6503210267437968:6503210267438078 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :126
6503210267440492:6503210267443688 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :127
6503210267444109:6503210269129916 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :128
6503210277584437:6503210279300810 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :129
6503210279301261:6503210279301401 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :130
6503210279301671:6503210279301792 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :131
6503210279304346:6503210279307522 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :132
6503210279307933:6503210280988640 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :133
6503210289246847:6503210290965093 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :134
6503210290965514:6503210290965645 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :135
6503210290967768:6503210290967889 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :136
6503210290968169:6503210290971425 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :137
6503210290971806:6503210292641222 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :138
6503210300894099:6503210302609931 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :139
6503210302610392:6503210302610492 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :140
6503210302612446:6503210302612586 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :141
6503210302612857:6503210302616123 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :142
6503210302616584:6503210304286501 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :143
6503210312534909:6503210314249539 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :144
6503210314251813:6503210314251934 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :145
6503210314252204:6503210314252324 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :146
6503210314252585:6503210314255711 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :147
6503210314256091:6503210315925297 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :148
6503210324183531:6503210325837284 2:0 CopyHostToDevice:149:35331
6503210325912842:6503210326015722 2:0 matrixTranspose(float*, float*, int):152:35331
6503210326019402:6503210327465916 2:0 CopyDeviceToHost:153:35331
6503210335831961:6503210337492434 2:0 CopyHostToDevice:154:35331
6503210337580473:6503210337682233 2:0 matrixTranspose(float*, float*, int):157:35331
6503210337686073:6503210339132907 2:0 CopyDeviceToHost:158:35331
6503210348021029:6503210349674942 2:0 CopyHostToDevice:159:35331
6503210349740262:6503210349846021 2:0 matrixTranspose(float*, float*, int):162:35331
6503210349849701:6503210351297815 2:0 CopyDeviceToHost:163:35331
6503210359647380:6503210361302893 2:0 CopyHostToDevice:164:35331
6503210361389813:6503210361491572 2:0 matrixTranspose(float*, float*, int):167:35331
6503210361495412:6503210362940006 2:0 CopyDeviceToHost:168:35331
6503210371314691:6503210372968284 2:0 CopyHostToDevice:169:35331
6503210373048644:6503210373150563 2:0 matrixTranspose(float*, float*, int):172:35331
6503210373154243:6503210374617877 2:0 CopyDeviceToHost:173:35331
6503210383017682:6503210384692714 2:0 CopyHostToDevice:174:35331
6503210384765234:6503210384867634 2:0 matrixTranspose(float*, float*, int):177:35331
6503210384871474:6503210386314948 2:0 CopyDeviceToHost:178:35331
6503210394700772:6503210396409404 2:0 CopyHostToDevice:179:35331
6503210396484162:6503210396586242 2:0 matrixTranspose(float*, float*, int):182:35331
6503210396589922:6503210398071155 2:0 CopyDeviceToHost:183:35331
6503210407004395:6503210408654947 2:0 CopyHostToDevice:184:35331
6503210408726827:6503210408829226 2:0 matrixTranspose(float*, float*, int):187:35331
6503210408832906:6503210410293660 2:0 CopyDeviceToHost:188:35331
6503210419220499:6503210420887211 2:0 CopyHostToDevice:189:35331
6503210420952691:6503210421054931 2:0 matrixTranspose(float*, float*, int):192:35331
6503210421058611:6503210422501444 2:0 CopyDeviceToHost:193:35331
6503210324170330:6503210325893976 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :149
6503210325896952:6503210325897082 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :150
6503210325897282:6503210325897403 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :151
6503210325897673:6503210325900849 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :152
6503210325901390:6503210327573671 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :153
6503210335818373:6503210337564121 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :154
6503210337564561:6503210337564702 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :155
6503210337564932:6503210337565042 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :156
6503210337565293:6503210337568729 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :157
6503210337569150:6503210339241812 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :158
6503210348008101:6503210349724094 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :159
6503210349724505:6503210349724635 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :160
6503210349724845:6503210349724975 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :161
6503210349725196:6503210349728512 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :162
6503210349731698:6503210351407356 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :163
6503210359634144:6503210361373791 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :164
6503210361374241:6503210361374362 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :165
6503210361374602:6503210361374712 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :166
6503210361374983:6503210361378219 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :167
6503210361380343:6503210363048967 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :168
6503210371301383:6503210373030941 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :169
6503210373031292:6503210373031402 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :170
6503210373031572:6503210373031683 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :171
6503210373033807:6503210373037013 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :172
6503210373037403:6503210374734290 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :173
6503210383004048:6503210384746339 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :174
6503210384746710:6503210384746850 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :175
6503210384747081:6503210384747211 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :176
6503210384749806:6503210384752962 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :177
6503210384753332:6503210386424672 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :178
6503210394688249:6503210396465866 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :179
6503210396466266:6503210396466407 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :180
6503210396468781:6503210396468941 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :181
6503210396469212:6503210396472748 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :182
6503210396473209:6503210398185786 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :183
6503210406990927:6503210408706859 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :184
6503210408707260:6503210408707390 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :185
6503210408711929:6503210408712049 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :186
6503210408712299:6503210408715385 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :187
6503210408715756:6503210410404919 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :188
6503210419207635:6503210420934348 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :189
6503210420937233:6503210420937413 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :190
6503210420937654:6503210420937764 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :191
6503210420937995:6503210420941241 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :192
6503210420941741:6503210422609735 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :193
6503210430876446:6503210432536598 2:0 CopyHostToDevice:194:35331
6503210432603198:6503210432705278 2:0 matrixTranspose(float*, float*, int):197:35331
6503210432708958:6503210434158351 2:0 CopyDeviceToHost:198:35331
6503210443064391:6503210444714783 2:0 CopyHostToDevice:199:35331
6503210444781223:6503210444882982 2:0 matrixTranspose(float*, float*, int):202:35331
6503210444886662:6503210446330776 2:0 CopyDeviceToHost:203:35331
6503210455262895:6503210456915368 2:0 CopyHostToDevice:204:35331
6503210456979727:6503210457081807 2:0 matrixTranspose(float*, float*, int):207:35331
6503210457085487:6503210458529120 2:0 CopyDeviceToHost:208:35331
6503210466903764:6503210468580717 2:0 CopyHostToDevice:209:35331
6503210468651475:6503210468753555 2:0 matrixTranspose(float*, float*, int):212:35331
6503210468757235:6503210470211269 2:0 CopyDeviceToHost:213:35331
6503210479096990:6503210480748022 2:0 CopyHostToDevice:214:35331
6503210480817982:6503210480923262 2:0 matrixTranspose(float*, float*, int):217:35331
6503210480926942:6503210482369935 2:0 CopyDeviceToHost:218:35331
6503210491251336:6503210492903009 2:0 CopyHostToDevice:219:35331
6503210492969289:6503210493071368 2:0 matrixTranspose(float*, float*, int):222:35331
6503210493075048:6503210494517402 2:0 CopyDeviceToHost:223:35331
6503210502850325:6503210504500558 2:0 CopyHostToDevice:224:35331
6503210504566518:6503210504668597 2:0 matrixTranspose(float*, float*, int):227:35331
6503210504672117:6503210506125191 2:0 CopyDeviceToHost:228:35331
6503210514485314:6503210516095067 2:0 CopyHostToDevice:229:35331
6503210516162147:6503210516264547 2:0 matrixTranspose(float*, float*, int):232:35331
6503210516268067:6503210517714100 2:0 CopyDeviceToHost:233:35331
6503210430863463:6503210432584675 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :194
6503210432587661:6503210432587821 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :195
6503210432588061:6503210432588162 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :196
6503210432588392:6503210432591738 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :197
6503210432592079:6503210434267126 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :198
6503210443050897:6503210444765146 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :199
6503210444765547:6503210444765677 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :200
6503210444765888:6503210444765988 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :201
6503210444766228:6503210444769665 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :202
6503210444770025:6503210446438640 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :203
6503210455249722:6503210456963611 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :204
6503210456964182:6503210456964292 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :205
6503210456964533:6503210456964643 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :206
6503210456964873:6503210456968250 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :207
6503210456970354:6503210458636714 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :208
6503210466890212:6503210468635499 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :209
6503210468635910:6503210468636070 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :210
6503210468636301:6503210468636401 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :211
6503210468636741:6503210468639717 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :212
6503210468642282:6503210470320454 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :213
6503210479083366:6503210480800040 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :214
6503210480800641:6503210480800772 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :215
6503210480800962:6503210480801072 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :216
6503210480803306:6503210480806332 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :217
6503210480806773:6503210482476800 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :218
6503210491237789:6503210492951458 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :219
6503210492951989:6503210492952129 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :220
6503210492952319:6503210492952429 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :221
6503210492954613:6503210492957589 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :222
6503210492958020:6503210494624851 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :223
6503210502837203:6503210504548326 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :224
6503210504548847:6503210504549008 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :225
6503210504551382:6503210504551502 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :226
6503210504551753:6503210504554989 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :227
6503210504555339:6503210506236838 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :228
6503210514472503:6503210516142770 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :229
6503210516143201:6503210516143371 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :230
6503210516147078:6503210516147228 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :231
6503210516147439:6503210516150625 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :232
6503210516151046:6503210517823568 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :233
6503210526084144:6503210527734376 2:0 CopyHostToDevice:234:35331
6503210527801776:6503210527908976 2:0 matrixTranspose(float*, float*, int):237:35331
6503210527912656:6503210529356609 2:0 CopyDeviceToHost:238:35331
6503210537712303:6503210539363018 2:0 CopyHostToDevice:239:35331
6503210539438256:6503210539540016 2:0 matrixTranspose(float*, float*, int):242:35331
6503210539543696:6503210540989091 2:0 CopyDeviceToHost:243:35331
6503210549357383:6503210551008578 2:0 CopyHostToDevice:244:35331
6503210551075178:6503210551177417 2:0 matrixTranspose(float*, float*, int):247:35331
6503210551181257:6503210552624572 2:0 CopyDeviceToHost:248:35331
6503210560964704:6503210562615579 2:0 CopyHostToDevice:249:35331
6503210562680419:6503210562782498 2:0 matrixTranspose(float*, float*, int):252:35331
6503210562786178:6503210564229654 2:0 CopyDeviceToHost:253:35331
6503210572594266:6503210574246260 2:0 CopyHostToDevice:254:35331
6503210574317340:6503210574419259 2:0 matrixTranspose(float*, float*, int):257:35331
6503210574423099:6503210575865295 2:0 CopyDeviceToHost:258:35331
6503210584215347:6503210585868301 2:0 CopyHostToDevice:259:35331
6503210585935061:6503210586038421 2:0 matrixTranspose(float*, float*, int):262:35331
6503210586042101:6503210587484936 2:0 CopyDeviceToHost:263:35331
6503210596359946:6503210598009861 2:0 CopyHostToDevice:264:35331
6503210598075660:6503210598177740 2:0 matrixTranspose(float*, float*, int):267:35331
6503210598181260:6503210599623615 2:0 CopyDeviceToHost:268:35331
6503210608497171:6503210610150604 2:0 CopyHostToDevice:269:35331
6503210610222482:6503210610324562 2:0 matrixTranspose(float*, float*, int):272:35331
6503210610328242:6503210611771396 2:0 CopyDeviceToHost:273:35331
6503210620107038:6503210621757751 2:0 CopyHostToDevice:274:35331
6503210621824831:6503210621928830 2:0 matrixTranspose(float*, float*, int):277:35331
6503210621932670:6503210623375664 2:0 CopyDeviceToHost:278:35331
6503210526069862:6503210527781697 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :234
6503210527785755:6503210527785895 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :235
6503210527786145:6503210527786256 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :236
6503210527786516:6503210527790173 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :237
6503210527790564:6503210529465250 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :238
6503210537697307:6503210539418851 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :239
6503210539421175:6503210539421345 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :240
6503210539421556:6503210539421666 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :241
6503210539421916:6503210539425713 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :242
6503210539426124:6503210541097394 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :243
6503210549342877:6503210551058419 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :244
6503210551058990:6503210551059160 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :245
6503210551059340:6503210551059450 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :246
6503210551059671:6503210551062797 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :247
6503210551063137:6503210552732353 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :248
6503210560950676:6503210562663613 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :249
6503210562664013:6503210562664154 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :250
6503210562664334:6503210562664444 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :251
6503210562664664:6503210562667860 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :252
6503210562669824:6503210564336495 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :253
6503210572580205:6503210574298452 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :254
6503210574298953:6503210574299083 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :255
6503210574299384:6503210574299464 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :256
6503210574301558:6503210574304834 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :257
6503210574305515:6503210575972046 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :258
6503210584201108:6503210585916069 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :259
6503210585916470:6503210585916620 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :260
6503210585916820:6503210585916920 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :261
6503210585919225:6503210585922541 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :262
6503210585922962:6503210587592057 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :263
6503210596345753:6503210598056696 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :264
6503210598057127:6503210598057227 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :265
6503210598059521:6503210598059652 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :266
6503210598059892:6503210598063068 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :267
6503210598063459:6503210599729980 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :268
6503210608484356:6503210610204246 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :269
6503210610204707:6503210610204847 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :270
6503210610207372:6503210610207492 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :271
6503210610207743:6503210610210929 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :272
6503210610211239:6503210611879683 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :273
6503210620094470:6503210621806725 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :274
6503210621809471:6503210621809591 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :275
6503210621809811:6503210621809921 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :276
6503210621810162:6503210621813348 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :277
6503210621813749:6503210623483445 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :278
6503210631751947:6503210633402499 2:0 CopyHostToDevice:279:35331
6503210633468619:6503210633571339 2:0 matrixTranspose(float*, float*, int):282:35331
6503210633575179:6503210635028572 2:0 CopyDeviceToHost:283:35331
6503210643384375:6503210645036048 2:0 CopyHostToDevice:284:35331
6503210645100887:6503210645202967 2:0 matrixTranspose(float*, float*, int):287:35331
6503210645206807:6503210646653001 2:0 CopyDeviceToHost:288:35331
6503210655533601:6503210657184314 2:0 CopyHostToDevice:289:35331
6503210657249793:6503210657351073 2:0 matrixTranspose(float*, float*, int):292:35331
6503210657354753:6503210658798867 2:0 CopyDeviceToHost:293:35331
6503210667164749:6503210668815462 2:0 CopyHostToDevice:294:35331
6503210668883822:6503210668995821 2:0 matrixTranspose(float*, float*, int):297:35331
6503210668999501:6503210670442655 2:0 CopyDeviceToHost:298:35331
6503210678783507:6503210680434380 2:0 CopyHostToDevice:299:35331
6503210680506738:6503210680608978 2:0 matrixTranspose(float*, float*, int):302:35331
6503210680612658:6503210682055652 2:0 CopyDeviceToHost:303:35331
6503210690383137:6503210692034010 2:0 CopyHostToDevice:304:35331
6503210692101890:6503210692204129 2:0 matrixTranspose(float*, float*, int):307:35331
6503210692207809:6503210693651923 2:0 CopyDeviceToHost:308:35331
6503210701982929:6503210703634282 2:0 CopyHostToDevice:309:35331
6503210703703121:6503210703804881 2:0 matrixTranspose(float*, float*, int):312:35331
6503210703808561:6503210705252995 2:0 CopyDeviceToHost:313:35331
6503210713597280:6503210715247353 2:0 CopyHostToDevice:314:35331
6503210715314433:6503210715415872 2:0 matrixTranspose(float*, float*, int):317:35331
6503210715419552:6503210716863506 2:0 CopyDeviceToHost:318:35331
6503210631738005:6503210633449850 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :279
6503210633452975:6503210633453126 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :280
6503210633453326:6503210633453436 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :281
6503210633453677:6503210633457203 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :282
6503210633457704:6503210635138311 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :283
6503210643371151:6503210645085070 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :284
6503210645085581:6503210645085691 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :285
6503210645085921:6503210645086021 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :286
6503210645086262:6503210645089438 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :287
6503210645089768:6503210646760868 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :288
6503210655519883:6503210657233431 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :289
6503210657233942:6503210657234082 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :290
6503210657234293:6503210657234403 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :291
6503210657234644:6503210657237799 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :292
6503210657240424:6503210658906494 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :293
6503210667151617:6503210668867469 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :294
6503210668868070:6503210668868200 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :295
6503210668868421:6503210668868521 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :296
6503210668868752:6503210668872318 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :297
6503210668875023:6503210670550541 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :298
6503210678770185:6503210680488482 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :299
6503210680489033:6503210680489163 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :300
6503210680489344:6503210680489454 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :301
6503210680491758:6503210680494954 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :302
6503210680495325:6503210682164791 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :303
6503210690369849:6503210692083578 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :304
6503210692084099:6503210692084229 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :305
6503210692084459:6503210692084570 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :306
6503210692087074:6503210692089970 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :307
6503210692090330:6503210693760588 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :308
6503210701969663:6503210703684755 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :309
6503210703685275:6503210703685386 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :310
6503210703687610:6503210703687740 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :311
6503210703687950:6503210703691447 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :312
6503210703691798:6503210705361635 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :313
6503210713583945:6503210715295008 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :314
6503210715295750:6503210715296010 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :315
6503210715298414:6503210715298545 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :316
6503210715298785:6503210715302362 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :317
6503210715302783:6503210716972550 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :318
6503210725761229:6503210727411622 2:0 CopyHostToDevice:319:35331
6503210727481582:6503210727583821 2:0 matrixTranspose(float*, float*, int):322:35331
6503210727587501:6503210729031295 2:0 CopyDeviceToHost:323:35331
6503210737379900:6503210739031893 2:0 CopyHostToDevice:324:35331
6503210739103133:6503210739205213 2:0 matrixTranspose(float*, float*, int):327:35331
6503210739208893:6503210740651887 2:0 CopyDeviceToHost:328:35331
6503210749014368:6503210750667322 2:0 CopyHostToDevice:329:35331
6503210750738080:6503210750841440 2:0 matrixTranspose(float*, float*, int):332:35331
6503210750845120:6503210752288594 2:0 CopyDeviceToHost:333:35331
6503210760655122:6503210762323595 2:0 CopyHostToDevice:334:35331
6503210762391475:6503210762493234 2:0 matrixTranspose(float*, float*, int):337:35331
6503210762497074:6503210763941669 2:0 CopyDeviceToHost:338:35331
6503210772825794:6503210774477627 2:0 CopyHostToDevice:339:35331
6503210774541987:6503210774644067 2:0 matrixTranspose(float*, float*, int):342:35331
6503210774647747:6503210776090901 2:0 CopyDeviceToHost:343:35331
6503210784455188:6503210786106382 2:0 CopyHostToDevice:344:35331
6503210786174582:6503210786276981 2:0 matrixTranspose(float*, float*, int):347:35331
6503210786280661:6503210787727176 2:0 CopyDeviceToHost:348:35331
6503210796076583:6503210797745696 2:0 CopyHostToDevice:349:35331
6503210797815176:6503210797922536 2:0 matrixTranspose(float*, float*, int):352:35331
6503210797926216:6503210799369530 2:0 CopyDeviceToHost:353:35331
6503210807752537:6503210809406451 2:0 CopyHostToDevice:354:35331
6503210809473370:6503210809575610 2:0 matrixTranspose(float*, float*, int):357:35331
6503210809579290:6503210811021324 2:0 CopyDeviceToHost:358:35331
6503210819961955:6503210821613308 2:0 CopyHostToDevice:359:35331
6503210821687586:6503210821789666 2:0 matrixTranspose(float*, float*, int):362:35331
6503210821793506:6503210823265459 2:0 CopyDeviceToHost:363:35331
6503210725746452:6503210727461493 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :319
6503210727464579:6503210727464719 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :320
6503210727464919:6503210727465020 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :321
6503210727465260:6503210727469738 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :322
6503210727470340:6503210729141118 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :323
6503210737366233:6503210739084721 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :324
6503210739087175:6503210739087315 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :325
6503210739087516:6503210739087626 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :326
6503210739087866:6503210739091373 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :327
6503210739091714:6503210740761150 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :328
6503210749000702:6503210750721674 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :329
6503210750722245:6503210750722365 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :330
6503210750722596:6503210750722696 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :331
6503210750722976:6503210750726162 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :332
6503210750726533:6503210752396500 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :333
6503210760641773:6503210762374967 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :334
6503210762375528:6503210762375649 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :335
6503210762375839:6503210762375929 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :336
6503210762376180:6503210762379365 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :337
6503210762381499:6503210764049994 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :338
6503210772812756:6503210774525202 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :339
6503210774526044:6503210774526184 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :340
6503210774526364:6503210774526475 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :341
6503210774526715:6503210774529871 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :342
6503210774532075:6503210776198065 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :343
6503210784440993:6503210786156064 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :344
6503210786156565:6503210786156725 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :345
6503210786156925:6503210786157026 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :346
6503210786159190:6503210786162536 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :347
6503210786162957:6503210787834447 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :348
6503210796063088:6503210797796694 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :349
6503210797797265:6503210797797415 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :350
6503210797797605:6503210797797726 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :351
6503210797799920:6503210797803066 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :352
6503210797803526:6503210799476900 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :353
6503210807737842:6503210809452582 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :354
6503210809453163:6503210809453333 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :355
6503210809457191:6503210809457301 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :356
6503210809457541:6503210809461328 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :357
6503210809461769:6503210811128370 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :358
6503210819949250:6503210821668940 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :359
6503210821669381:6503210821669501 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :360
6503210821671805:6503210821671925 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :361
6503210821672176:6503210821675782 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :362
6503210821676213:6503210823374924 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :363
6503210831639182:6503210833290854 2:0 CopyHostToDevice:364:35331
6503210833359214:6503210833460974 2:0 matrixTranspose(float*, float*, int):367:35331
6503210833464654:6503210834907167 2:0 CopyDeviceToHost:368:35331
6503210843838007:6503210845490000 2:0 CopyHostToDevice:369:35331
6503210845559320:6503210845661239 2:0 matrixTranspose(float*, float*, int):372:35331
6503210845664759:6503210847140072 2:0 CopyDeviceToHost:373:35331
6503210856092192:6503210857746265 2:0 CopyHostToDevice:374:35331
6503210857815585:6503210857922944 2:0 matrixTranspose(float*, float*, int):377:35331
6503210857926624:6503210859369458 2:0 CopyDeviceToHost:378:35331
6503210867730540:6503210869382533 2:0 CopyHostToDevice:379:35331
6503210869445933:6503210869548332 2:0 matrixTranspose(float*, float*, int):382:35331
6503210869552012:6503210870995166 2:0 CopyDeviceToHost:383:35331
6503210879894966:6503210881556878 2:0 CopyHostToDevice:384:35331
6503210881622998:6503210881724918 2:0 matrixTranspose(float*, float*, int):387:35331
6503210881728438:6503210883172871 2:0 CopyDeviceToHost:388:35331
6503210892050211:6503210893700764 2:0 CopyHostToDevice:389:35331
6503210893772642:6503210893874562 2:0 matrixTranspose(float*, float*, int):392:35331
6503210893878402:6503210895320596 2:0 CopyDeviceToHost:393:35331
6503210903654322:6503210905304715 2:0 CopyHostToDevice:394:35331
6503210905371635:6503210905473394 2:0 matrixTranspose(float*, float*, int):397:35331
6503210905477234:6503210906920228 2:0 CopyDeviceToHost:398:35331
6503210915260834:6503210916913307 2:0 CopyHostToDevice:399:35331
6503210916982627:6503210917084546 2:0 matrixTranspose(float*, float*, int):402:35331
6503210917088226:6503210918532980 2:0 CopyDeviceToHost:403:35331
6503210831626428:6503210833341068 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :364
6503210833343813:6503210833343983 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :365
6503210833344173:6503210833344274 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :366
6503210833344524:6503210833347790 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :367
6503210833348171:6503210835016245 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :368
6503210843824752:6503210845541526 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :369
6503210845543991:6503210845544131 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :370
6503210845544331:6503210845544431 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :371
6503210845544662:6503210845547748 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :372
6503210845548128:6503210847251848 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :373
6503210856079371:6503210857799601 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :374
6503210857800082:6503210857800212 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :375
6503210857800463:6503210857800563 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :376
6503210857800803:6503210857804099 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :377
6503210857804530:6503210859477092 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :378
6503210867717666:6503210869429962 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :379
6503210869430403:6503210869430503 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :380
6503210869430704:6503210869430804 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :381
6503210869431044:6503210869434511 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :382
6503210869436925:6503210871103376 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :383
6503210879881967:6503210881607237 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :384
6503210881607648:6503210881607798 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :385
6503210881607988:6503210881608089 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :386
6503210881608389:6503210881611625 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :387
6503210881613889:6503210883281583 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :388
6503210892036641:6503210893754166 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :389
6503210893754617:6503210893754757 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :390
6503210893755058:6503210893755168 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :391
6503210893757572:6503210893760788 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :392
6503210893761279:6503210895428351 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :393
6503210903641534:6503210905353099 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :394
6503210905353479:6503210905353590 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :395
6503210905353810:6503210905353920 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :396
6503210905356325:6503210905359491 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :397
6503210905359951:6503210907027193 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :398
6503210915247620:6503210916964555 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :399
6503210916964975:6503210916965096 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :400
6503210916967220:6503210916967330 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :401
6503210916967570:6503210916970786 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :402
6503210916971327:6503210918641334 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :403
6503210926882706:6503210928535339 2:0 CopyHostToDevice:404:35331
6503210928604339:6503210928706579 2:0 matrixTranspose(float*, float*, int):407:35331
6503210928710258:6503210930183332 2:0 CopyDeviceToHost:408:35331
6503210939076416:6503210940698649 2:0 CopyHostToDevice:409:35331
6503210940766209:6503210940869888 2:0 matrixTranspose(float*, float*, int):412:35331
6503210940873568:6503210942322002 2:0 CopyDeviceToHost:413:35331
6503210950657488:6503210952308681 2:0 CopyHostToDevice:414:35331
6503210952378001:6503210952479761 2:0 matrixTranspose(float*, float*, int):417:35331
6503210952483441:6503210953926915 2:0 CopyDeviceToHost:418:35331
6503210962245221:6503210963897215 2:0 CopyHostToDevice:419:35331
6503210963971653:6503210964073733 2:0 matrixTranspose(float*, float*, int):422:35331
6503210964077413:6503210965523607 2:0 CopyDeviceToHost:423:35331
6503210973867413:6503210975517967 2:0 CopyHostToDevice:424:35331
6503210975583127:6503210975684726 2:0 matrixTranspose(float*, float*, int):427:35331
6503210975688406:6503210977155560 2:0 CopyDeviceToHost:428:35331
6503210985723206:6503210987374559 2:0 CopyHostToDevice:429:35331
6503210987439079:6503210987540998 2:0 matrixTranspose(float*, float*, int):432:35331
6503210987544678:6503210988989112 2:0 CopyDeviceToHost:433:35331
6503210997346359:6503210998996752 2:0 CopyHostToDevice:434:35331
6503210999065112:6503210999167191 2:0 matrixTranspose(float*, float*, int):437:35331
6503210999170871:6503211000613385 2:0 CopyDeviceToHost:438:35331
6503211008987272:6503211010637665 2:0 CopyHostToDevice:439:35331
6503211010703145:6503211010805384 2:0 matrixTranspose(float*, float*, int):442:35331
6503211010809064:6503211012253658 2:0 CopyDeviceToHost:443:35331
6503211021150582:6503211022801776 2:0 CopyHostToDevice:444:35331
6503211022868536:6503211022971415 2:0 matrixTranspose(float*, float*, int):447:35331
6503211022975255:6503211024420329 2:0 CopyDeviceToHost:448:35331
6503210926869886:6503210928584566 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :404
6503210928585858:6503210928586099 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :405
6503210928588794:6503210928588924 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :406
6503210928589204:6503210928592420 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :407
6503210928593132:6503210930294828 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :408
6503210939062670:6503210940748146 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :409
6503210940750480:6503210940750600 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :410
6503210940750791:6503210940750901 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :411
6503210940751151:6503210940754367 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :412
6503210940754788:6503210942430186 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :413
6503210950644370:6503210952359491 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :414
6503210952361846:6503210952361956 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :415
6503210952362156:6503210952362267 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :416
6503210952362487:6503210952365613 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :417
6503210952366003:6503210954036371 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :418
6503210962230730:6503210963954958 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :419
6503210963955459:6503210963955629 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :420
6503210963955879:6503210963955990 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :421
6503210963956240:6503210963959747 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :422
6503210963960197:6503210965633361 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :423
6503210973853176:6503210975566133 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :424
6503210975566644:6503210975566804 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :425
6503210975567064:6503210975567175 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :426
6503210975567415:6503210975571122 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :427
6503210975573707:6503210977264983 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :428
6503210985709475:6503210987422643 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :429
6503210987423164:6503210987423284 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :430
6503210987423575:6503210987423685 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :431
6503210987424015:6503210987427141 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :432
6503210987429796:6503210989097720 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :433
6503210997332883:6503210999046261 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :434
6503210999047083:6503210999047243 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :435
6503210999047453:6503210999047564 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :436
6503210999050189:6503210999053144 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :437
6503210999053505:6503211000719795 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :438
6503211008973273:6503211010685108 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :439
6503211010685529:6503211010685669 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :440
6503211010685929:6503211010686040 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :441
6503211010688204:6503211010691119 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :442
6503211010691510:6503211012360636 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :443
6503211021136903:6503211022849770 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :444
6503211022850160:6503211022850291 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :445
6503211022853206:6503211022853326 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :446
6503211022853537:6503211022856632 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :447
6503211022857003:6503211024527271 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :448
6503211033332820:6503211034986574 2:0 CopyHostToDevice:449:35331
6503211035063412:6503211035165812 2:0 matrixTranspose(float*, float*, int):452:35331
6503211035169492:6503211036612966 2:0 CopyDeviceToHost:453:35331
6503211044941734:6503211046592928 2:0 CopyHostToDevice:454:35331
6503211046660167:6503211046762407 2:0 matrixTranspose(float*, float*, int):457:35331
6503211046766087:6503211048208921 2:0 CopyDeviceToHost:458:35331
6503211056554489:6503211058224883 2:0 CopyHostToDevice:459:35331
6503211058295322:6503211058397242 2:0 matrixTranspose(float*, float*, int):462:35331
6503211058400922:6503211059843116 2:0 CopyDeviceToHost:463:35331
6503211068187244:6503211069838117 2:0 CopyHostToDevice:464:35331
6503211069901357:6503211070004077 2:0 matrixTranspose(float*, float*, int):467:35331
6503211070007757:6503211071451071 2:0 CopyDeviceToHost:468:35331
6503211080319837:6503211081970710 2:0 CopyHostToDevice:469:35331
6503211082034430:6503211082136510 2:0 matrixTranspose(float*, float*, int):472:35331
6503211082140190:6503211083587344 2:0 CopyDeviceToHost:473:35331
6503211091935152:6503211093586025 2:0 CopyHostToDevice:474:35331
6503211093654705:6503211093756625 2:0 matrixTranspose(float*, float*, int):477:35331
6503211093760465:6503211095204419 2:0 CopyDeviceToHost:478:35331
6503211103543170:6503211105195483 2:0 CopyHostToDevice:479:35331
6503211105267041:6503211105368961 2:0 matrixTranspose(float*, float*, int):482:35331
6503211105372641:6503211106818035 2:0 CopyDeviceToHost:483:35331
6503211115179760:6503211116830793 2:0 CopyHostToDevice:484:35331
6503211116900433:6503211117012272 2:0 matrixTranspose(float*, float*, int):487:35331
6503211117015952:6503211118458626 2:0 CopyDeviceToHost:488:35331
6503211033319297:6503211035044196 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :449
6503211035045098:6503211035045208 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :450
6503211035047342:6503211035047473 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :451
6503211035047693:6503211035050849 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :452
6503211035051240:6503211036720235 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :453
6503211044928008:6503211046641827 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :454
6503211046644271:6503211046644401 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :455
6503211046644592:6503211046644692 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :456
6503211046644922:6503211046648078 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :457
6503211046648519:6503211048315691 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :458
6503211056541107:6503211058274482 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :459
6503211058279040:6503211058279171 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :460
6503211058279361:6503211058279471 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :461
6503211058279672:6503211058282968 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :462
6503211058283699:6503211059949549 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :463
6503211068173462:6503211069884565 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :464
6503211069885096:6503211069885226 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :465
6503211069885487:6503211069885607 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :466
6503211069885817:6503211069889134 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :467
6503211069889584:6503211071557778 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :468
6503211080305903:6503211082017999 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :469
6503211082018409:6503211082018550 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :470
6503211082018750:6503211082018860 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :471
6503211082019111:6503211082022247 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :472
6503211082024421:6503211083694618 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :473
6503211091920876:6503211093637910 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :474
6503211093638371:6503211093638501 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :475
6503211093638752:6503211093638852 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :476
6503211093639072:6503211093642469 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :477
6503211093644693:6503211095310793 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :478
6503211103529326:6503211105248805 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :479
6503211105249226:6503211105249376 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :480
6503211105249607:6503211105249717 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :481
6503211105251971:6503211105255207 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :482
6503211105255578:6503211106925876 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :483
6503211115166119:6503211116881490 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :484
6503211116881941:6503211116882061 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :485
6503211116882262:6503211116882372 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :486
6503211116885498:6503211116888724 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :487
6503211116889145:6503211118565954 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :488
6503211127375709:6503211129029622 2:0 CopyHostToDevice:489:35331
6503211129101662:6503211129204061 2:0 matrixTranspose(float*, float*, int):492:35331
6503211129207741:6503211130650415 2:0 CopyDeviceToHost:493:35331
6503211139021580:6503211140676133 2:0 CopyHostToDevice:494:35331
6503211140742573:6503211140844653 2:0 matrixTranspose(float*, float*, int):497:35331
6503211140848333:6503211142290686 2:0 CopyDeviceToHost:498:35331
6503211150658811:6503211152310805 2:0 CopyHostToDevice:499:35331
6503211152381564:6503211152483644 2:0 matrixTranspose(float*, float*, int):502:35331
6503211152487164:6503211153938958 2:0 CopyDeviceToHost:503:35331
6503211127362319:6503211129082529 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :489
6503211129083321:6503211129083441 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :490
6503211129086256:6503211129086386 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :491
6503211129086627:6503211129089923 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :492
6503211129090444:6503211130757556 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :493
6503211139008319:6503211140724522 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :494
6503211140725013:6503211140725163 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :495
6503211140727227:6503211140727357 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :496
6503211140727628:6503211140730843 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :497
6503211140731445:6503211142398517 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :498
6503211150645472:6503211152362817 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :499
6503211152365633:6503211152365803 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :500
6503211152366043:6503211152366144 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :501
6503211152366384:6503211152369500 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :502
6503211152369901:6503211154050868 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :503
6503211162290811:6503211162359739 35331:35331 hipFree(ptr=0x7f19f7a00000) :504
6503211162362394:6503211162387610 35331:35331 hipFree(ptr=0x7f19f7400000) :505
@@ -0,0 +1,611 @@
ROCTracer (pid=880592): input from "input.xml"
0x1e17410 agent cpu
0x1e4f580 agent gpu
0x1e51e80 agent gpu
4496524891130970
HIP-trace()
hipFree hipMalloc hipMemcpyDevice name AMD Radeon VII
## Iteration (99) #################
PASSED!
## Iteration (98) #################
PASSED!
## Iteration (97) #################
PASSED!
## Iteration (96) #################
PASSED!
## Iteration (95) #################
PASSED!
## Iteration (94) #################
PASSED!
## Iteration (93) #################
PASSED!
## Iteration (92) #################
PASSED!
## Iteration (91) #################
PASSED!
## Iteration (90) #################
PASSED!
## Iteration (89) #################
PASSED!
## Iteration (88) #################
PASSED!
## Iteration (87) #################
PASSED!
## Iteration (86) #################
PASSED!
## Iteration (85) #################
PASSED!
## Iteration (84) #################
PASSED!
## Iteration (83) #################
PASSED!
## Iteration (82) #################
PASSED!
## Iteration (81) #################
PASSED!
## Iteration (80) #################
PASSED!
## Iteration (79) #################
PASSED!
## Iteration (78) #################
PASSED!
## Iteration (77) #################
PASSED!
## Iteration (76) #################
PASSED!
## Iteration (75) #################
PASSED!
## Iteration (74) #################
PASSED!
## Iteration (73) #################
PASSED!
## Iteration (72) #################
PASSED!
## Iteration (71) #################
PASSED!
## Iteration (70) #################
PASSED!
## Iteration (69) #################
PASSED!
## Iteration (68) #################
PASSED!
## Iteration (67) #################
PASSED!
## Iteration (66) #################
PASSED!
## Iteration (65) #################
PASSED!
## Iteration (64) #################
PASSED!
## Iteration (63) #################
PASSED!
## Iteration (62) #################
PASSED!
## Iteration (61) #################
PASSED!
## Iteration (60) #################
PASSED!
## Iteration (59) #################
PASSED!
## Iteration (58) #################
PASSED!
## Iteration (57) #################
PASSED!
## Iteration (56) #################
PASSED!
## Iteration (55) #################
PASSED!
## Iteration (54) #################
PASSED!
## Iteration (53) #################
PASSED!
## Iteration (52) #################
PASSED!
## Iteration (51) #################
PASSED!
## Iteration (50) #################
PASSED!
## Iteration (49) #################
PASSED!
## Iteration (48) #################
PASSED!
## Iteration (47) #################
PASSED!
## Iteration (46) #################
PASSED!
## Iteration (45) #################
PASSED!
## Iteration (44) #################
PASSED!
## Iteration (43) #################
PASSED!
## Iteration (42) #################
PASSED!
## Iteration (41) #################
PASSED!
## Iteration (40) #################
PASSED!
## Iteration (39) #################
PASSED!
## Iteration (38) #################
PASSED!
## Iteration (37) #################
PASSED!
## Iteration (36) #################
PASSED!
## Iteration (35) #################
PASSED!
## Iteration (34) #################
PASSED!
## Iteration (33) #################
PASSED!
## Iteration (32) #################
PASSED!
## Iteration (31) #################
PASSED!
## Iteration (30) #################
PASSED!
## Iteration (29) #################
PASSED!
## Iteration (28) #################
PASSED!
## Iteration (27) #################
PASSED!
## Iteration (26) #################
PASSED!
## Iteration (25) #################
PASSED!
## Iteration (24) #################
PASSED!
## Iteration (23) #################
PASSED!
## Iteration (22) #################
PASSED!
## Iteration (21) #################
PASSED!
## Iteration (20) #################
PASSED!
## Iteration (19) #################
PASSED!
## Iteration (18) #################
PASSED!
## Iteration (17) #################
PASSED!
## Iteration (16) #################
PASSED!
## Iteration (15) #################
PASSED!
## Iteration (14) #################
PASSED!
## Iteration (13) #################
PASSED!
## Iteration (12) #################
PASSED!
## Iteration (11) #################
PASSED!
## Iteration (10) #################
PASSED!
## Iteration (9) #################
PASSED!
## Iteration (8) #################
PASSED!
## Iteration (7) #################
PASSED!
## Iteration (6) #################
PASSED!
## Iteration (5) #################
PASSED!
## Iteration (4) #################
PASSED!
## Iteration (3) #################
PASSED!
## Iteration (2) #################
PASSED!
## Iteration (1) #################
PASSED!
## Iteration (0) #################
PASSED!
4496525133195468:4496525133500748 0:0 CopyHostToDevice:3:880592
4496525139346056:4496525140494974 0:0 CopyDeviceToHost:6:880592
4496525158136061:4496525158439581 0:0 CopyHostToDevice:7:880592
4496525162884573:4496525163225012 0:0 CopyDeviceToHost:10:880592
4496525179113943:4496525179416502 0:0 CopyHostToDevice:11:880592
4496525183856054:4496525184201293 0:0 CopyDeviceToHost:14:880592
4496525199788429:4496525200090509 0:0 CopyHostToDevice:15:880592
4496525204497581:4496525204837540 0:0 CopyDeviceToHost:18:880592
4496525220040077:4496525220341836 0:0 CopyHostToDevice:19:880592
4496525224734029:4496525225073989 0:0 CopyDeviceToHost:22:880592
4496525240233165:4496525240535565 0:0 CopyHostToDevice:23:880592
4496525244924718:4496525245265318 0:0 CopyDeviceToHost:26:880592
4496525260367854:4496525260669454 0:0 CopyHostToDevice:27:880592
4496525265052841:4496525265424319 0:0 CopyDeviceToHost:30:880592
4496525281955173:4496525282256773 0:0 CopyHostToDevice:31:880592
4496525286638246:4496525286978205 0:0 CopyDeviceToHost:34:880592
4496525301587783:4496525301890022 0:0 CopyHostToDevice:35:880592
4496525306259335:4496525306599935 0:0 CopyDeviceToHost:38:880592
4496525322760230:4496525323062629 0:0 CopyHostToDevice:39:880592
4496525327434982:4496525327775409 0:0 CopyDeviceToHost:42:880592
4496525343578745:4496525343881145 0:0 CopyHostToDevice:43:880592
4496525348304218:4496525348644498 0:0 CopyDeviceToHost:46:880592
4496525364012316:4496525364314556 0:0 CopyHostToDevice:47:880592
4496525368704989:4496525369045109 0:0 CopyDeviceToHost:50:880592
4496525384281567:4496525384583167 0:0 CopyHostToDevice:51:880592
4496525388966240:4496525389338040 0:0 CopyDeviceToHost:54:880592
4496525404545748:4496525404848308 0:0 CopyHostToDevice:55:880592
4496525409873459:4496525410212298 0:0 CopyDeviceToHost:58:880592
4496525426854993:4496525427157073 0:0 CopyHostToDevice:59:880592
4496525431525586:4496525431865866 0:0 CopyDeviceToHost:62:880592
4496525448226961:4496525448529360 0:0 CopyHostToDevice:63:880592
4496525452895154:4496525453237033 0:0 CopyDeviceToHost:66:880592
4496525468131725:4496525468433645 0:0 CopyHostToDevice:67:880592
4496525472798637:4496525473139397 0:0 CopyDeviceToHost:70:880592
4496525488059216:4496525488361616 0:0 CopyHostToDevice:71:880592
4496525492706930:4496525493047049 0:0 CopyDeviceToHost:74:880592
4496525508011988:4496525508314228 0:0 CopyHostToDevice:75:880592
4496525512687542:4496525513027661 0:0 CopyDeviceToHost:78:880592
4496525528000600:4496525528302200 0:0 CopyHostToDevice:79:880592
4496525532665206:4496525533005644 0:0 CopyDeviceToHost:82:880592
4496525548020504:4496525548322744 0:0 CopyHostToDevice:83:880592
4496525552686778:4496525553026738 0:0 CopyDeviceToHost:86:880592
4496525568050718:4496525568352477 0:0 CopyHostToDevice:87:880592
4496525572724831:4496525573064951 0:0 CopyDeviceToHost:90:880592
4496525588056611:4496525588358371 0:0 CopyHostToDevice:91:880592
4496525592722405:4496525593062524 0:0 CopyDeviceToHost:94:880592
4496525608239172:4496525608541412 0:0 CopyHostToDevice:95:880592
4496525612912965:4496525613254204 0:0 CopyDeviceToHost:98:880592
4496525628224585:4496525628526984 0:0 CopyHostToDevice:99:880592
4496525632901099:4496525633241698 0:0 CopyDeviceToHost:102:880592
4496525648284239:4496525648586638 0:0 CopyHostToDevice:103:880592
4496525652965552:4496525653334472 0:0 CopyDeviceToHost:106:880592
4496525668288212:4496525668590132 0:0 CopyHostToDevice:107:880592
4496525672962004:4496525673332842 0:0 CopyDeviceToHost:110:880592
4496525688287542:4496525688589622 0:0 CopyHostToDevice:111:880592
4496525692956536:4496525693329136 0:0 CopyDeviceToHost:114:880592
4496525708328796:4496525708630396 0:0 CopyHostToDevice:115:880592
4496525712994110:4496525713363350 0:0 CopyDeviceToHost:118:880592
4496525728330370:4496525728631810 0:0 CopyHostToDevice:119:880592
4496525732992804:4496525733365723 0:0 CopyDeviceToHost:122:880592
4496525748343184:4496525748645104 0:0 CopyHostToDevice:123:880592
4496525753010257:4496525753383337 0:0 CopyDeviceToHost:126:880592
4496525768361559:4496525768663159 0:0 CopyHostToDevice:127:880592
4496525773024154:4496525773395793 0:0 CopyDeviceToHost:130:880592
4496525788520096:4496525788822335 0:0 CopyHostToDevice:131:880592
4496525793199010:4496525793539930 0:0 CopyDeviceToHost:134:880592
4496525810008550:4496525810310310 0:0 CopyHostToDevice:135:880592
4496525814653983:4496525814993461 0:0 CopyDeviceToHost:138:880592
4496525832283678:4496525832585598 0:0 CopyHostToDevice:139:880592
4496525837594912:4496525837935351 0:0 CopyDeviceToHost:142:880592
4496525854707649:4496525855010049 0:0 CopyHostToDevice:143:880592
4496525859362403:4496525859701883 0:0 CopyDeviceToHost:146:880592
4496525876603621:4496525876905220 0:0 CopyHostToDevice:147:880592
4496525881306229:4496525881646187 0:0 CopyDeviceToHost:150:880592
4496525897973207:4496525898274807 0:0 CopyHostToDevice:151:880592
4496525902618202:4496525902957521 0:0 CopyDeviceToHost:154:880592
4496525917333824:4496525917635744 0:0 CopyHostToDevice:155:880592
4496525921975459:4496525922350138 0:0 CopyDeviceToHost:158:880592
4496525936492201:4496525936794441 0:0 CopyHostToDevice:159:880592
4496525941157356:4496525941500515 0:0 CopyDeviceToHost:162:880592
4496525955677517:4496525955979757 0:0 CopyHostToDevice:163:880592
4496525960341710:4496525960681510 0:0 CopyDeviceToHost:166:880592
4496525976745491:4496525977047571 0:0 CopyHostToDevice:167:880592
4496525981389366:4496525981729005 0:0 CopyDeviceToHost:170:880592
4496525999625945:4496525999927864 0:0 CopyHostToDevice:171:880592
4496526004313339:4496526004653619 0:0 CopyDeviceToHost:174:880592
4496526019801138:4496526020102578 0:0 CopyHostToDevice:175:880592
4496526024481811:4496526024821771 0:0 CopyDeviceToHost:178:880592
4496526040023673:4496526040325593 0:0 CopyHostToDevice:179:880592
4496526044704828:4496526045044948 0:0 CopyDeviceToHost:182:880592
4496526060018210:4496526060319810 0:0 CopyHostToDevice:183:880592
4496526064683365:4496526065023164 0:0 CopyDeviceToHost:186:880592
4496526080078987:4496526080380427 0:0 CopyHostToDevice:187:880592
4496526084744673:4496526085104791 0:0 CopyDeviceToHost:190:880592
4496526100065734:4496526100367494 0:0 CopyHostToDevice:191:880592
4496526104740009:4496526105080608 0:0 CopyDeviceToHost:194:880592
4496526120132111:4496526120434351 0:0 CopyHostToDevice:195:880592
4496526124795826:4496526125136266 0:0 CopyDeviceToHost:198:880592
4496526140122969:4496526140424888 0:0 CopyHostToDevice:199:880592
4496526144798843:4496526145139283 0:0 CopyDeviceToHost:202:880592
4496526160131675:4496526160434075 0:0 CopyHostToDevice:203:880592
4496526164805309:4496526165146708 0:0 CopyDeviceToHost:206:880592
4496526180137732:4496526180439812 0:0 CopyHostToDevice:207:880592
4496526184806087:4496526185146207 0:0 CopyDeviceToHost:210:880592
4496526200143310:4496526200444910 0:0 CopyHostToDevice:211:880592
4496526204807345:4496526205146985 0:0 CopyDeviceToHost:214:880592
4496526220164409:4496526220466328 0:0 CopyHostToDevice:215:880592
4496526224827377:4496526225167815 0:0 CopyDeviceToHost:218:880592
4496526240237879:4496526240539639 0:0 CopyHostToDevice:219:880592
4496526244914074:4496526245255794 0:0 CopyDeviceToHost:222:880592
4496526260329698:4496526260631617 0:0 CopyHostToDevice:223:880592
4496526265641092:4496526265982492 0:0 CopyDeviceToHost:226:880592
4496526282735434:4496526283037674 0:0 CopyHostToDevice:227:880592
4496526287389972:4496526287729770 0:0 CopyDeviceToHost:230:880592
4496526304640792:4496526304943031 0:0 CopyHostToDevice:231:880592
4496526309322907:4496526309663346 0:0 CopyDeviceToHost:234:880592
4496526326129888:4496526326432128 0:0 CopyHostToDevice:235:880592
4496526330838403:4496526331178843 0:0 CopyDeviceToHost:238:880592
4496526343522029:4496526343824269 0:0 CopyHostToDevice:239:880592
4496526348310864:4496526348650504 0:0 CopyDeviceToHost:242:880592
4496526363968080:4496526364270160 0:0 CopyHostToDevice:243:880592
4496526368652274:4496526368992073 0:0 CopyDeviceToHost:246:880592
4496526384185337:4496526384486936 0:0 CopyHostToDevice:247:880592
4496526388854172:4496526389198131 0:0 CopyDeviceToHost:250:880592
4496526404335235:4496526404637314 0:0 CopyHostToDevice:251:880592
4496526408997830:4496526409365789 0:0 CopyDeviceToHost:254:880592
4496526424568493:4496526424870412 0:0 CopyHostToDevice:255:880592
4496526429213789:4496526429553747 0:0 CopyDeviceToHost:258:880592
4496526446042850:4496526446345090 0:0 CopyHostToDevice:259:880592
4496526450809605:4496526451149405 0:0 CopyDeviceToHost:262:880592
4496526467002509:4496526467304588 0:0 CopyHostToDevice:263:880592
4496526471659664:4496526471999943 0:0 CopyDeviceToHost:266:880592
4496526487288888:4496526487590807 0:0 CopyHostToDevice:267:880592
4496526491942249:4496526492283807 0:0 CopyDeviceToHost:270:880592
4496526507571795:4496526507873555 0:0 CopyHostToDevice:271:880592
4496526512311031:4496526512651311 0:0 CopyDeviceToHost:274:880592
4496526527812739:4496526528114819 0:0 CopyHostToDevice:275:880592
4496526532491175:4496526532831135 0:0 CopyDeviceToHost:278:880592
4496526547850803:4496526548153203 0:0 CopyHostToDevice:279:880592
4496526552533079:4496526552873199 0:0 CopyDeviceToHost:282:880592
4496526567925672:4496526568228072 0:0 CopyHostToDevice:283:880592
4496526572602186:4496526572942306 0:0 CopyDeviceToHost:286:880592
4496526587965332:4496526588267412 0:0 CopyHostToDevice:287:880592
4496526592643288:4496526592983887 0:0 CopyDeviceToHost:290:880592
4496526607978913:4496526608280673 0:0 CopyHostToDevice:291:880592
4496526612645349:4496526612985629 0:0 CopyDeviceToHost:294:880592
4496526628000655:4496526628302414 0:0 CopyHostToDevice:295:880592
4496526632663325:4496526633003763 0:0 CopyDeviceToHost:298:880592
4496526648017837:4496526648320077 0:0 CopyHostToDevice:299:880592
4496526652678035:4496526653018155 0:0 CopyDeviceToHost:302:880592
4496526668055108:4496526668356868 0:0 CopyHostToDevice:303:880592
4496526672724106:4496526673064066 0:0 CopyDeviceToHost:306:880592
4496526688755419:4496526689057339 0:0 CopyHostToDevice:307:880592
4496526693419457:4496526693759417 0:0 CopyDeviceToHost:310:880592
4496526710295327:4496526710597247 0:0 CopyHostToDevice:311:880592
4496526714950082:4496526715290361 0:0 CopyDeviceToHost:314:880592
4496526732128906:4496526732430665 0:0 CopyHostToDevice:315:880592
4496526736777741:4496526737117701 0:0 CopyDeviceToHost:318:880592
4496526752148407:4496526752450167 0:0 CopyHostToDevice:319:880592
4496526756824443:4496526757165043 0:0 CopyDeviceToHost:322:880592
4496526772241509:4496526772543908 0:0 CopyHostToDevice:323:880592
4496526776924789:4496526777266187 0:0 CopyDeviceToHost:326:880592
4496526792332421:4496526792634501 0:0 CopyHostToDevice:327:880592
4496526797007499:4496526797378499 0:0 CopyDeviceToHost:330:880592
4496526812350653:4496526812652573 0:0 CopyHostToDevice:331:880592
4496526817024291:4496526817396411 0:0 CopyDeviceToHost:334:880592
4496526832511765:4496526832813525 0:0 CopyHostToDevice:335:880592
4496526837210523:4496526837551603 0:0 CopyDeviceToHost:338:880592
4496526852574486:4496526852876246 0:0 CopyHostToDevice:339:880592
4496526857232600:4496526857571760 0:0 CopyDeviceToHost:342:880592
4496526872568066:4496526872869986 0:0 CopyHostToDevice:343:880592
4496526877873701:4496526878217980 0:0 CopyDeviceToHost:346:880592
4496526894948845:4496526895250604 0:0 CopyHostToDevice:347:880592
4496526899604560:4496526899944040 0:0 CopyDeviceToHost:350:880592
4496526916800780:4496526917103020 0:0 CopyHostToDevice:351:880592
4496526921465614:4496526921806534 0:0 CopyDeviceToHost:354:880592
4496526938344278:4496526938646198 0:0 CopyHostToDevice:355:880592
4496526942999834:4496526943373873 0:0 CopyDeviceToHost:358:880592
4496526960757056:4496526961059296 0:0 CopyHostToDevice:359:880592
4496526965400292:4496526965740572 0:0 CopyDeviceToHost:362:880592
4496526982246291:4496526982548371 0:0 CopyHostToDevice:363:880592
4496526986906166:4496526987245965 0:0 CopyDeviceToHost:366:880592
4496527004066590:4496527004368030 0:0 CopyHostToDevice:367:880592
4496527008741665:4496527009081145 0:0 CopyDeviceToHost:370:880592
4496527024023531:4496527024325611 0:0 CopyHostToDevice:371:880592
4496527028707247:4496527029047527 0:0 CopyDeviceToHost:374:880592
4496527043983033:4496527044285593 0:0 CopyHostToDevice:375:880592
4496527048638701:4496527048978659 0:0 CopyDeviceToHost:378:880592
4496527063937205:4496527064239125 0:0 CopyHostToDevice:379:880592
4496527068606361:4496527068946161 0:0 CopyDeviceToHost:382:880592
4496527083919907:4496527084221506 0:0 CopyHostToDevice:383:880592
4496527088585542:4496527088926302 0:0 CopyDeviceToHost:386:880592
4496527103947568:4496527104249487 0:0 CopyHostToDevice:387:880592
4496527108615443:4496527108955883 0:0 CopyDeviceToHost:390:880592
4496527124628049:4496527124930289 0:0 CopyHostToDevice:391:880592
4496527129311123:4496527129651403 0:0 CopyDeviceToHost:394:880592
4496527146344668:4496527146647227 0:0 CopyHostToDevice:395:880592
4496527151012863:4496527151385303 0:0 CopyDeviceToHost:398:880592
4496527166565609:4496527166867209 0:0 CopyHostToDevice:399:880592
4496527171210765:4496527171550564 0:0 CopyDeviceToHost:402:880592
4496524903280142:4496524903426608 880592:880592 hipMalloc(ptr=0x7f14c3000000, size=4194304) :1
4496524903446365:4496524903573365 880592:880592 hipMalloc(ptr=0x7f14c2800000, size=4194304) :2
4496524903588203:4496525133627902 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :3
4496525134207305:4496525140607184 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :6
4496525154755917:4496525158532879 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :7
4496525158552125:4496525163335997 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :10
4496525175814741:4496525179506102 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :11
4496525179519266:4496525184300123 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :14
4496525196393148:4496525200179318 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :15
4496525200189638:4496525204936449 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :18
4496525216744046:4496525220425409 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :19
4496525220438995:4496525225172542 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :22
4496525236900238:4496525240619832 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :23
4496525240633478:4496525245363489 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :26
4496525257076899:4496525260752009 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :27
4496525260765474:4496525265528037 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :30
4496525278601381:4496525282344690 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :31
4496525282356923:4496525287062988 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :34
4496525298199228:4496525301976453 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :35
4496525301989357:4496525306701493 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :38
4496525319442590:4496525323149401 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :39
4496525323159049:4496525327878419 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :42
4496525340212129:4496525343964345 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :43
4496525343973583:4496525348741845 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :46
4496525360729852:4496525364398150 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :47
4496525364412076:4496525369144271 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :50
4496525381014837:4496525384667765 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :51
4496525384680900:4496525389438431 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :54
4496525401191155:4496525404934986 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :55
4496525404946919:4496525410298471 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :58
4496525423502872:4496525427227196 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :59
4496525427242074:4496525431931367 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :62
4496525444940650:4496525448596826 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :63
4496525448609119:4496525453319692 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :66
4496525464825952:4496525468507253 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :67
4496525468516811:4496525473221545 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :70
4496525484776165:4496525488428212 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :71
4496525488438561:4496525493128415 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :74
4496525504724173:4496525508381501 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :75
4496525508391049:4496525513111039 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :78
4496525524703772:4496525528368253 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :79
4496525528381448:4496525533087222 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :82
4496525544743344:4496525548393326 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :83
4496525548405820:4496525553108910 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :86
4496525564776353:4496525568419633 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :87
4496525568431526:4496525573146797 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :90
4496525584773363:4496525588428076 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :91
4496525588441251:4496525593151694 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :94
4496525604951276:4496525608612772 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :95
4496525608626318:4496525613335408 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :98
4496525624925244:4496525628593993 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :99
4496525628604623:4496525633324024 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :102
4496525644955148:4496525648653793 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :103
4496525648663752:4496525653416343 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :106
4496525665022200:4496525668655764 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :107
4496525668669820:4496525673415239 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :110
4496525685021526:4496525688656242 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :111
4496525688669547:4496525693410117 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :114
4496525705048605:4496525708697446 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :115
4496525708710921:4496525713450741 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :118
4496525725057529:4496525728701911 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :119
4496525728714956:4496525733453982 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :122
4496525745101558:4496525748716245 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :123
4496525748728729:4496525753469559 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :126
4496525765097578:4496525768729748 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :127
4496525768739346:4496525773486869 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :130
4496525785153981:4496525788889836 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :131
4496525788899334:4496525793625478 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :134
4496525806694594:4496525810379663 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :135
4496525810389762:4496525815062083 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :138
4496525829025717:4496525832652416 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :139
4496525832666232:4496525838020960 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :142
4496525851335449:4496525855079079 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :143
4496525855095120:4496525859768263 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :146
4496525873262060:4496525876972367 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :147
4496525876986173:4496525881732493 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :150
4496525894813432:4496525898344842 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :151
4496525898357967:4496525903025769 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :154
4496525914049899:4496525917708138 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :155
4496525917721533:4496525922419974 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :158
4496525933083213:4496525936861418 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :159
4496525936871808:4496525941569528 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :162
4496525952271099:4496525956051338 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :163
4496525956061647:4496525960747143 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :166
4496525973345872:4496525977113719 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :167
4496525977127134:4496525981829511 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :170
4496525996273891:4496525999995089 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :171
4496526000008564:4496526004736400 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :174
4496526016480127:4496526020175927 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :175
4496526020189152:4496526024907620 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :178
4496526036737941:4496526040392402 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :179
4496526040406088:4496526045126310 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :182
4496526056750972:4496526060387640 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :183
4496526060400274:4496526065106138 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :186
4496526076767389:4496526080447340 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :187
4496526080456357:4496526085186276 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :190
4496526096786342:4496526100434311 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :191
4496526100443629:4496526105161156 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :194
4496526116801218:4496526120501776 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :195
4496526120511514:4496526125217650 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :198
4496526136832464:4496526140494920 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :199
4496526140509047:4496526145220241 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :202
4496526156843100:4496526160502872 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :203
4496526160515847:4496526165229165 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :206
4496526176867433:4496526180510052 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :207
4496526180522345:4496526185229233 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :210
4496526196882068:4496526200511312 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :211
4496526200522904:4496526205229030 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :214
4496526216885692:4496526220532459 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :215
4496526220544051:4496526225248913 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :218
4496526236953526:4496526240608640 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :219
4496526240618608:4496526245336946 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :222
4496526257032562:4496526260697764 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :223
4496526260706901:4496526266068673 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :226
4496526279332747:4496526283108167 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :227
4496526283124067:4496526287796549 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :230
4496526301304112:4496526305009459 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :231
4496526305023315:4496526309749930 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :234
4496526322801743:4496526326516899 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :235
4496526326534903:4496526331245847 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :238
4496526340107579:4496526343906984 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :239
4496526343924247:4496526348732695 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :242
4496526360672632:4496526364342794 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :243
4496526364356309:4496526369073956 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :246
4496526380921729:4496526384554339 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :247
4496526384564297:4496526389280813 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :250
4496526401055448:4496526404706814 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :251
4496526404716291:4496526409453775 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :254
4496526421231786:4496526424938237 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :255
4496526424947284:4496526429644452 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :258
4496526442708689:4496526446413777 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :259
4496526446426842:4496526451217265 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :262
4496526463698402:4496526467373874 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :263
4496526467389634:4496526472081051 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :266
4496526484017822:4496526487658527 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :267
4496526487672484:4496526492365545 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :270
4496526504217596:4496526507940206 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :271
4496526507952269:4496526512732904 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :274
4496526524481750:4496526528181919 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :275
4496526528194643:4496526532912630 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :278
4496526544545337:4496526548219016 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :279
4496526548228644:4496526552953164 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :282
4496526564603244:4496526568298141 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :283
4496526568308411:4496526573022752 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :286
4496526584675307:4496526588335099 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :287
4496526588344907:4496526593063886 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :290
4496526604685893:4496526608347148 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :291
4496526608360543:4496526613066458 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :294
4496526624712120:4496526628367043 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :295
4496526628379787:4496526633084449 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :298
4496526644736553:4496526648383901 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :299
4496526648395974:4496526653098974 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :302
4496526664769242:4496526668420808 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :303
4496526668433191:4496526673144235 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :306
4496526684807961:4496526689128249 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :307
4496526689138038:4496526693842629 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :310
4496526706935721:4496526710671858 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :311
4496526710682528:4496526715357624 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :314
4496526728844507:4496526732497026 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :315
4496526732507246:4496526737198843 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :318
4496526748886113:4496526752517561 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :319
4496526752530505:4496526757248855 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :322
4496526768940533:4496526772608911 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :323
4496526772622196:4496526777347416 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :326
4496526789025531:4496526792698958 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :327
4496526792712093:4496526797464554 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :330
4496526809125575:4496526812718051 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :331
4496526812731336:4496526817481033 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :334
4496526829138567:4496526832879692 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :335
4496526832891444:4496526837636884 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :338
4496526849206473:4496526852946255 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :339
4496526852954871:4496526857658531 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :342
4496526869245494:4496526873593022 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :343
4496526873604183:4496526878299818 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :346
4496526891597135:4496526895318503 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :347
4496526895329664:4496526900009158 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :350
4496526913485803:4496526917173899 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :351
4496526917187725:4496526921890463 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :354
4496526934966763:4496526938716654 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :355
4496526938729629:4496526943439361 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :358
4496526957411730:4496526961125205 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :359
4496526961138059:4496526965826390 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :362
4496526978879125:4496526982620470 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :363
4496526982632643:4496526987311768 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :366
4496527000778785:4496527004436191 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :367
4496527004448955:4496527009161343 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :370
4496527020723638:4496527024395191 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :371
4496527024404639:4496527029129138 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :374
4496527040699580:4496527044353630 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :375
4496527044363048:4496527049060306 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :378
4496527060632609:4496527064307921 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :379
4496527064321907:4496527069027702 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :382
4496527080621436:4496527084288380 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :383
4496527084301195:4496527089011037 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :386
4496527100657080:4496527104315719 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :387
4496527104328433:4496527109036020 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :390
4496527120665231:4496527125002391 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :391
4496527125016488:4496527129737942 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :394
4496527143046309:4496527146717213 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :395
4496527146730868:4496527151451590 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :398
4496527163132549:4496527166937384 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :399
4496527166947223:4496527171616068 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :402
4496527185123250:4496527185153196 880592:880592 hipFree(ptr=0x7f14c3000000) :403
4496527185154519:4496527185168716 880592:880592 hipFree(ptr=0x7f14c2800000) :404
@@ -0,0 +1,274 @@
+ ROCP_CTRL_RATE=10:100000:1000000 ./test/MatrixTranspose
ROCTracer (pid=1983):
ROCTracer: trace control: delay(10us), length(100000us), rate(1000000us)
3802699747119708
HIP-trace()
Device name Device 687f
## Iteration (99) #################
PASSED!
## Iteration (98) #################
PASSED!
## Iteration (97) #################
PASSED!
## Iteration (96) #################
PASSED!
## Iteration (95) #################
PASSED!
## Iteration (94) #################
PASSED!
## Iteration (93) #################
PASSED!
## Iteration (92) #################
PASSED!
## Iteration (91) #################
PASSED!
## Iteration (90) #################
PASSED!
## Iteration (89) #################
PASSED!
## Iteration (88) #################
PASSED!
## Iteration (87) #################
PASSED!
## Iteration (86) #################
PASSED!
## Iteration (85) #################
PASSED!
## Iteration (84) #################
PASSED!
## Iteration (83) #################
PASSED!
## Iteration (82) #################
PASSED!
## Iteration (81) #################
PASSED!
## Iteration (80) #################
PASSED!
## Iteration (79) #################
PASSED!
## Iteration (78) #################
PASSED!
## Iteration (77) #################
PASSED!
## Iteration (76) #################
PASSED!
## Iteration (75) #################
PASSED!
## Iteration (74) #################
PASSED!
## Iteration (73) #################
PASSED!
## Iteration (72) #################
PASSED!
## Iteration (71) #################
PASSED!
## Iteration (70) #################
PASSED!
## Iteration (69) #################
PASSED!
## Iteration (68) #################
PASSED!
## Iteration (67) #################
PASSED!
## Iteration (66) #################
PASSED!
## Iteration (65) #################
PASSED!
## Iteration (64) #################
PASSED!
## Iteration (63) #################
PASSED!
## Iteration (62) #################
PASSED!
## Iteration (61) #################
PASSED!
## Iteration (60) #################
PASSED!
## Iteration (59) #################
PASSED!
## Iteration (58) #################
PASSED!
## Iteration (57) #################
PASSED!
## Iteration (56) #################
PASSED!
## Iteration (55) #################
PASSED!
## Iteration (54) #################
PASSED!
## Iteration (53) #################
PASSED!
## Iteration (52) #################
PASSED!
## Iteration (51) #################
PASSED!
## Iteration (50) #################
PASSED!
## Iteration (49) #################
PASSED!
## Iteration (48) #################
PASSED!
## Iteration (47) #################
PASSED!
## Iteration (46) #################
PASSED!
## Iteration (45) #################
PASSED!
## Iteration (44) #################
PASSED!
## Iteration (43) #################
PASSED!
## Iteration (42) #################
PASSED!
## Iteration (41) #################
PASSED!
## Iteration (40) #################
PASSED!
## Iteration (39) #################
PASSED!
## Iteration (38) #################
PASSED!
## Iteration (37) #################
PASSED!
## Iteration (36) #################
PASSED!
## Iteration (35) #################
PASSED!
## Iteration (34) #################
PASSED!
## Iteration (33) #################
PASSED!
## Iteration (32) #################
PASSED!
## Iteration (31) #################
PASSED!
## Iteration (30) #################
PASSED!
## Iteration (29) #################
PASSED!
## Iteration (28) #################
PASSED!
## Iteration (27) #################
PASSED!
## Iteration (26) #################
PASSED!
## Iteration (25) #################
PASSED!
## Iteration (24) #################
PASSED!
## Iteration (23) #################
PASSED!
## Iteration (22) #################
PASSED!
## Iteration (21) #################
PASSED!
## Iteration (20) #################
PASSED!
## Iteration (19) #################
PASSED!
## Iteration (18) #################
PASSED!
## Iteration (17) #################
PASSED!
## Iteration (16) #################
PASSED!
## Iteration (15) #################
PASSED!
## Iteration (14) #################
PASSED!
## Iteration (13) #################
PASSED!
## Iteration (12) #################
PASSED!
## Iteration (11) #################
PASSED!
## Iteration (10) #################
PASSED!
## Iteration (9) #################
PASSED!
## Iteration (8) #################
PASSED!
## Iteration (7) #################
PASSED!
## Iteration (6) #################
PASSED!
## Iteration (5) #################
PASSED!
## Iteration (4) #################
PASSED!
## Iteration (3) #################
PASSED!
## Iteration (2) #################
PASSED!
## Iteration (1) #################
PASSED!
## Iteration (0) #################
PASSED!
3802699751533941:3802699751541991 1983:1983 hipGetDevicePropertiesR0600(props=, device=0)
3802699752571489:3802699752686289 1983:1983 hipMalloc(ptr=0x7f6c121ff010, size=4194304)
3802699752688639:3802699752749390 1983:1983 hipMalloc(ptr=0x7fffefcadf28, size=4194304)
3802699752763840:3802700027958750 1983:1983 hipMemcpy(dst=0x7f6c11400000, src=0x7f6c121ff010, sizeBytes=4194304, kind=1)
3802700932447414:3802700934135107 1983:1983 hipMemcpy(dst=0x7f6c11400000, src=0x7f6c121ff010, sizeBytes=4194304, kind=1)
3802700934143817:3802700934144527 1983:1983 __hipPushCallConfiguration(gridDim=, blockDim=, sharedMem=0, stream=0)
3802700934146607:3802700934147267 1983:1983 __hipPopCallConfiguration(gridDim=, blockDim=, sharedMem=140106682958042, stream=0xd8282e03f3099)
3802700934158787:3802700934164967 1983:1983 hipLaunchKernel(function_address=0x401030, numBlocks=, dimBlocks=, args=0x3b9aca00, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int)
3802700934192847:3802700936775947 1983:1983 hipMemcpy(dst=0x7f6c11dfe010, src=0x7f6c10e00000, sizeBytes=4194304, kind=2)
3802700943795998:3802700945501111 1983:1983 hipMemcpy(dst=0x7f6c11400000, src=0x7f6c121ff010, sizeBytes=4194304, kind=1)
3802700945517031:3802700945517901 1983:1983 __hipPushCallConfiguration(gridDim=, blockDim=, sharedMem=0, stream=0)
3802700945519841:3802700945520521 1983:1983 __hipPopCallConfiguration(gridDim=, blockDim=, sharedMem=140106682958042, stream=0xd8282e0ecbb86)
3802700945522671:3802700945530171 1983:1983 hipLaunchKernel(function_address=0x401030, numBlocks=, dimBlocks=, args=0x3b9aca00, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int)
3802700945534701:3802700948131020 1983:1983 hipMemcpy(dst=0x7f6c11dfe010, src=0x7f6c10e00000, sizeBytes=4194304, kind=2)
3802700955136442:3802700956839355 1983:1983 hipMemcpy(dst=0x7f6c11400000, src=0x7f6c121ff010, sizeBytes=4194304, kind=1)
3802700956847725:3802700956848495 1983:1983 __hipPushCallConfiguration(gridDim=, blockDim=, sharedMem=0, stream=0)
3802700956850235:3802700956850825 1983:1983 __hipPopCallConfiguration(gridDim=, blockDim=, sharedMem=140106682958042, stream=0xd8282e1999f61)
3802700956860545:3802700956868795 1983:1983 hipLaunchKernel(function_address=0x401030, numBlocks=, dimBlocks=, args=0x3b9aca00, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int)
3802700956872065:3802700959479235 1983:1983 hipMemcpy(dst=0x7f6c11dfe010, src=0x7f6c10e00000, sizeBytes=4194304, kind=2)
3802700966505397:3802700968203670 1983:1983 hipMemcpy(dst=0x7f6c11400000, src=0x7f6c121ff010, sizeBytes=4194304, kind=1)
3802700968219030:3802700968219770 1983:1983 __hipPushCallConfiguration(gridDim=, blockDim=, sharedMem=0, stream=0)
3802700968221700:3802700968222280 1983:1983 __hipPopCallConfiguration(gridDim=, blockDim=, sharedMem=140106682958042, stream=0xd8282e247222e)
3802700968225090:3802700968233560 1983:1983 hipLaunchKernel(function_address=0x401030, numBlocks=, dimBlocks=, args=0x3b9aca00, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int)
3802700968241120:3802700970853059 1983:1983 hipMemcpy(dst=0x7f6c11dfe010, src=0x7f6c10e00000, sizeBytes=4194304, kind=2)
3802700977859821:3802700979559833 1983:1983 hipMemcpy(dst=0x7f6c11400000, src=0x7f6c121ff010, sizeBytes=4194304, kind=1)
3802700979567803:3802700979568553 1983:1983 __hipPushCallConfiguration(gridDim=, blockDim=, sharedMem=0, stream=0)
3802700979570433:3802700979571073 1983:1983 __hipPopCallConfiguration(gridDim=, blockDim=, sharedMem=140106682958042, stream=0xd8282e2f44d18)
3802700979581243:3802700979589274 1983:1983 hipLaunchKernel(function_address=0x401030, numBlocks=, dimBlocks=, args=0x3b9aca00, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int)
3802700979592044:3802700982222943 1983:1983 hipMemcpy(dst=0x7f6c11dfe010, src=0x7f6c10e00000, sizeBytes=4194304, kind=2)
3802700989239045:3802700990944838 1983:1983 hipMemcpy(dst=0x7f6c11400000, src=0x7f6c121ff010, sizeBytes=4194304, kind=1)
3802700990960008:3802700990960828 1983:1983 __hipPushCallConfiguration(gridDim=, blockDim=, sharedMem=0, stream=0)
3802700990963068:3802700990963638 1983:1983 __hipPopCallConfiguration(gridDim=, blockDim=, sharedMem=140106682958042, stream=0xd8282e3a221d9)
3802700990966328:3802700990975628 1983:1983 hipLaunchKernel(function_address=0x401030, numBlocks=, dimBlocks=, args=0x3b9aca00, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int)
3802700990978718:3802700993694078 1983:1983 hipMemcpy(dst=0x7f6c11dfe010, src=0x7f6c10e00000, sizeBytes=4194304, kind=2)
3802701000919212:3802701002625515 1983:1983 hipMemcpy(dst=0x7f6c11400000, src=0x7f6c121ff010, sizeBytes=4194304, kind=1)
3802701002633405:3802701002634215 1983:1983 __hipPushCallConfiguration(gridDim=, blockDim=, sharedMem=0, stream=0)
3802701002635935:3802701002636515 1983:1983 __hipPopCallConfiguration(gridDim=, blockDim=, sharedMem=140106682958042, stream=0xd8282e45440c4)
3802701002649885:3802701002657855 1983:1983 hipLaunchKernel(function_address=0x401030, numBlocks=, dimBlocks=, args=0x3b9aca00, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int)
3802701002660835:3802701005267024 1983:1983 hipMemcpy(dst=0x7f6c11dfe010, src=0x7f6c10e00000, sizeBytes=4194304, kind=2)
3802701012322026:3802701014008789 1983:1983 hipMemcpy(dst=0x7f6c11400000, src=0x7f6c121ff010, sizeBytes=4194304, kind=1)
3802701014023469:3802701014024239 1983:1983 __hipPushCallConfiguration(gridDim=, blockDim=, sharedMem=0, stream=0)
3802701014028089:3802701014028669 1983:1983 __hipPopCallConfiguration(gridDim=, blockDim=, sharedMem=140106682958042, stream=0xd8282e5020cc5)
3802701014031569:3802701014039849 1983:1983 hipLaunchKernel(function_address=0x401030, numBlocks=, dimBlocks=, args=0x3b9aca00, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int)
3802701014042919:3802701016640288 1983:1983 hipMemcpy(dst=0x7f6c11dfe010, src=0x7f6c10e00000, sizeBytes=4194304, kind=2)
3802701023688501:3802701025398903 1983:1983 hipMemcpy(dst=0x7f6c11400000, src=0x7f6c121ff010, sizeBytes=4194304, kind=1)
3802701025407454:3802701025408214 1983:1983 __hipPushCallConfiguration(gridDim=, blockDim=, sharedMem=0, stream=0)
3802701025410224:3802701025411104 1983:1983 __hipPopCallConfiguration(gridDim=, blockDim=, sharedMem=140106682958042, stream=0xd8282e5afc125)
3802701025412944:3802701025420534 1983:1983 hipLaunchKernel(function_address=0x401030, numBlocks=, dimBlocks=, args=0x3b9aca00, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int)
3802701025431374:3802701028050563 1983:1983 hipMemcpy(dst=0x7f6c11dfe010, src=0x7f6c10e00000, sizeBytes=4194304, kind=2)
3802700025923715:3802700027953920 0:0 CopyHostToDevice:4:1983
3802700932468645:3802700934131397 0:0 CopyHostToDevice:159:1983
3802700934202858:3802700936764597 0:0 CopyDeviceToHost:165:1983
3802700943841248:3802700945497221 0:0 CopyHostToDevice:166:1983
3802700945569841:3802700948120440 0:0 CopyDeviceToHost:172:1983
3802700955175473:3802700956835555 0:0 CopyHostToDevice:173:1983
3802700956907066:3802700959467615 0:0 CopyDeviceToHost:179:1983
3802700966543517:3802700968200020 0:0 CopyHostToDevice:180:1983
3802700968270720:3802700970841439 0:0 CopyDeviceToHost:186:1983
3802700977897221:3802700979556403 0:0 CopyHostToDevice:187:1983
3802700979628944:3802700982210583 0:0 CopyDeviceToHost:193:1983
3802700989276246:3802700990941188 0:0 CopyHostToDevice:194:1983
3802700991012848:3802700993682128 0:0 CopyDeviceToHost:200:1983
3802701000959152:3802701002622075 0:0 CopyHostToDevice:201:1983
3802701002693645:3802701005254464 0:0 CopyDeviceToHost:207:1983
3802701012346926:3802701014005359 0:0 CopyHostToDevice:208:1983
3802701014077439:3802701016629358 0:0 CopyDeviceToHost:214:1983
3802701023726221:3802701025394963 0:0 CopyHostToDevice:215:1983
3802701025467214:3802701028039843 0:0 CopyDeviceToHost:221:1983
Tá difríocht comhad cosc orthu toisc go bhfuil sé ró-mhór Difríocht Luchtaigh
@@ -0,0 +1,205 @@
+ LD_PRELOAD=libkfdwrapper64.so ./test/MatrixTranspose
ROCTracer (pid=1963):
KFD-trace()
3802694735152956
Device name Device 687f
## Iteration (99) #################
PASSED!
## Iteration (98) #################
PASSED!
## Iteration (97) #################
PASSED!
## Iteration (96) #################
PASSED!
## Iteration (95) #################
PASSED!
## Iteration (94) #################
PASSED!
## Iteration (93) #################
PASSED!
## Iteration (92) #################
PASSED!
## Iteration (91) #################
PASSED!
## Iteration (90) #################
PASSED!
## Iteration (89) #################
PASSED!
## Iteration (88) #################
PASSED!
## Iteration (87) #################
PASSED!
## Iteration (86) #################
PASSED!
## Iteration (85) #################
PASSED!
## Iteration (84) #################
PASSED!
## Iteration (83) #################
PASSED!
## Iteration (82) #################
PASSED!
## Iteration (81) #################
PASSED!
## Iteration (80) #################
PASSED!
## Iteration (79) #################
PASSED!
## Iteration (78) #################
PASSED!
## Iteration (77) #################
PASSED!
## Iteration (76) #################
PASSED!
## Iteration (75) #################
PASSED!
## Iteration (74) #################
PASSED!
## Iteration (73) #################
PASSED!
## Iteration (72) #################
PASSED!
## Iteration (71) #################
PASSED!
## Iteration (70) #################
PASSED!
## Iteration (69) #################
PASSED!
## Iteration (68) #################
PASSED!
## Iteration (67) #################
PASSED!
## Iteration (66) #################
PASSED!
## Iteration (65) #################
PASSED!
## Iteration (64) #################
PASSED!
## Iteration (63) #################
PASSED!
## Iteration (62) #################
PASSED!
## Iteration (61) #################
PASSED!
## Iteration (60) #################
PASSED!
## Iteration (59) #################
PASSED!
## Iteration (58) #################
PASSED!
## Iteration (57) #################
PASSED!
## Iteration (56) #################
PASSED!
## Iteration (55) #################
PASSED!
## Iteration (54) #################
PASSED!
## Iteration (53) #################
PASSED!
## Iteration (52) #################
PASSED!
## Iteration (51) #################
PASSED!
## Iteration (50) #################
PASSED!
## Iteration (49) #################
PASSED!
## Iteration (48) #################
PASSED!
## Iteration (47) #################
PASSED!
## Iteration (46) #################
PASSED!
## Iteration (45) #################
PASSED!
## Iteration (44) #################
PASSED!
## Iteration (43) #################
PASSED!
## Iteration (42) #################
PASSED!
## Iteration (41) #################
PASSED!
## Iteration (40) #################
PASSED!
## Iteration (39) #################
PASSED!
## Iteration (38) #################
PASSED!
## Iteration (37) #################
PASSED!
## Iteration (36) #################
PASSED!
## Iteration (35) #################
PASSED!
## Iteration (34) #################
PASSED!
## Iteration (33) #################
PASSED!
## Iteration (32) #################
PASSED!
## Iteration (31) #################
PASSED!
## Iteration (30) #################
PASSED!
## Iteration (29) #################
PASSED!
## Iteration (28) #################
PASSED!
## Iteration (27) #################
PASSED!
## Iteration (26) #################
PASSED!
## Iteration (25) #################
PASSED!
## Iteration (24) #################
PASSED!
## Iteration (23) #################
PASSED!
## Iteration (22) #################
PASSED!
## Iteration (21) #################
PASSED!
## Iteration (20) #################
PASSED!
## Iteration (19) #################
PASSED!
## Iteration (18) #################
PASSED!
## Iteration (17) #################
PASSED!
## Iteration (16) #################
PASSED!
## Iteration (15) #################
PASSED!
## Iteration (14) #################
PASSED!
## Iteration (13) #################
PASSED!
## Iteration (12) #################
PASSED!
## Iteration (11) #################
PASSED!
## Iteration (10) #################
PASSED!
## Iteration (9) #################
PASSED!
## Iteration (8) #################
PASSED!
## Iteration (7) #################
PASSED!
## Iteration (6) #################
PASSED!
## Iteration (5) #################
PASSED!
## Iteration (4) #################
PASSED!
## Iteration (3) #################
PASSED!
## Iteration (2) #################
PASSED!
## Iteration (1) #################
PASSED!
## Iteration (0) #################
PASSED!
Tá difríocht comhad cosc orthu toisc go bhfuil sé ró-mhór Difríocht Luchtaigh
Tá difríocht comhad cosc orthu toisc go bhfuil sé ró-mhór Difríocht Luchtaigh
Tá difríocht comhad cosc orthu toisc go bhfuil sé ró-mhór Difríocht Luchtaigh
Tá difríocht comhad cosc orthu toisc go bhfuil sé ró-mhór Difríocht Luchtaigh
@@ -0,0 +1,65 @@
<hipSetDevice id(186) correlation_id(1) on-enter pid(877336) tid(877336)>
<hipSetDevice id(186) correlation_id(1) on-exit pid(877336) tid(877336)>
<__hipPushCallConfiguration id(2) correlation_id(2) on-enter pid(877336) tid(877336)>
<__hipPushCallConfiguration id(2) correlation_id(2) on-exit pid(877336) tid(877336)>
<__hipPopCallConfiguration id(1) correlation_id(3) on-enter pid(877336) tid(877336)>
<__hipPopCallConfiguration id(1) correlation_id(3) on-exit pid(877336) tid(877336)>
<hipLaunchKernel id(107) correlation_id(4) on-enter pid(877336) tid(877336)>
<hipLaunchKernel id(107) correlation_id(4) on-exit pid(877336) tid(877336)>
<hipDeviceSynchronize id(48) correlation_id(5) on-enter pid(877336) tid(877336)>
<hipDeviceSynchronize id(48) correlation_id(5) on-exit pid(877336) tid(877336)>
<hipSetDevice id(186) correlation_id(6) on-enter pid(877336) tid(877336)>
<hipSetDevice id(186) correlation_id(6) on-exit pid(877336) tid(877336)>
<__hipPushCallConfiguration id(2) correlation_id(7) on-enter pid(877336) tid(877336)>
<__hipPushCallConfiguration id(2) correlation_id(7) on-exit pid(877336) tid(877336)>
<__hipPopCallConfiguration id(1) correlation_id(8) on-enter pid(877336) tid(877336)>
<__hipPopCallConfiguration id(1) correlation_id(8) on-exit pid(877336) tid(877336)>
<hipLaunchKernel id(107) correlation_id(9) on-enter pid(877336) tid(877336)>
<hipLaunchKernel id(107) correlation_id(9) on-exit pid(877336) tid(877336)>
<hipDeviceSynchronize id(48) correlation_id(10) on-enter pid(877336) tid(877336)>
<hipDeviceSynchronize id(48) correlation_id(10) on-exit pid(877336) tid(877336)>
hipSetDevice correlation_id(6) time_ns(861794298279896:861794298283613)
__hipPushCallConfiguration correlation_id(7) time_ns(861794298290125:861794298293211)
__hipPopCallConfiguration correlation_id(8) time_ns(861794298293903:861794298295325)
hipLaunchKernel correlation_id(9) time_ns(861794298296377:861794298313029)
hipDeviceSynchronize correlation_id(10) time_ns(861794298313470:861794298331113)
hipSetDevice correlation_id(11) time_ns(861794298565986:861794298566277)
__hipPushCallConfiguration correlation_id(12) time_ns(861794298566738:861794298567148)
__hipPopCallConfiguration correlation_id(13) time_ns(861794298567569:861794298568010)
hipLaunchKernel correlation_id(14) time_ns(861794298568391:861794298577638)
hipDeviceSynchronize correlation_id(15) time_ns(861794298578069:861794298594841)
<hipSetDevice id(186) correlation_id(16) on-enter pid(877336) tid(877336)>
<hipSetDevice id(186) correlation_id(16) on-exit pid(877336) tid(877336)>
<__hipPushCallConfiguration id(2) correlation_id(17) on-enter pid(877336) tid(877336)>
<__hipPushCallConfiguration id(2) correlation_id(17) on-exit pid(877336) tid(877336)>
<__hipPopCallConfiguration id(1) correlation_id(18) on-enter pid(877336) tid(877336)>
<__hipPopCallConfiguration id(1) correlation_id(18) on-exit pid(877336) tid(877336)>
<hipLaunchKernel id(107) correlation_id(19) on-enter pid(877336) tid(877336)>
<hipLaunchKernel id(107) correlation_id(19) on-exit pid(877336) tid(877336)>
<hipDeviceSynchronize id(48) correlation_id(20) on-enter pid(877336) tid(877336)>
<hipDeviceSynchronize id(48) correlation_id(20) on-exit pid(877336) tid(877336)>
<hipSetDevice id(186) correlation_id(21) on-enter pid(877336) tid(877336)>
<hipSetDevice id(186) correlation_id(21) on-exit pid(877336) tid(877336)>
<__hipPushCallConfiguration id(2) correlation_id(22) on-enter pid(877336) tid(877336)>
<__hipPushCallConfiguration id(2) correlation_id(22) on-exit pid(877336) tid(877336)>
<__hipPopCallConfiguration id(1) correlation_id(23) on-enter pid(877336) tid(877336)>
<__hipPopCallConfiguration id(1) correlation_id(23) on-exit pid(877336) tid(877336)>
<hipLaunchKernel id(107) correlation_id(24) on-enter pid(877336) tid(877336)>
<hipLaunchKernel id(107) correlation_id(24) on-exit pid(877336) tid(877336)>
<hipDeviceSynchronize id(48) correlation_id(25) on-enter pid(877336) tid(877336)>
<hipDeviceSynchronize id(48) correlation_id(25) on-exit pid(877336) tid(877336)>
hipSetDevice correlation_id(21) time_ns(861794299364583:861794299365585)
__hipPushCallConfiguration correlation_id(22) time_ns(861794299366106:861794299367329)
__hipPopCallConfiguration correlation_id(23) time_ns(861794299367830:861794299369082)
hipLaunchKernel correlation_id(24) time_ns(861794299369523:861794299377227)
hipDeviceSynchronize correlation_id(25) time_ns(861794299377748:861794299394730)
<hipSetDevice id(186) correlation_id(26) on-enter pid(877336) tid(877336)>
<hipSetDevice id(186) correlation_id(26) on-exit pid(877336) tid(877336)>
<__hipPushCallConfiguration id(2) correlation_id(27) on-enter pid(877336) tid(877336)>
<__hipPushCallConfiguration id(2) correlation_id(27) on-exit pid(877336) tid(877336)>
<__hipPopCallConfiguration id(1) correlation_id(28) on-enter pid(877336) tid(877336)>
<__hipPopCallConfiguration id(1) correlation_id(28) on-exit pid(877336) tid(877336)>
<hipLaunchKernel id(107) correlation_id(29) on-enter pid(877336) tid(877336)>
<hipLaunchKernel id(107) correlation_id(29) on-exit pid(877336) tid(877336)>
<hipDeviceSynchronize id(48) correlation_id(30) on-enter pid(877336) tid(877336)>
<hipDeviceSynchronize id(48) correlation_id(30) on-exit pid(877336) tid(877336)>
@@ -0,0 +1,16 @@
ROCTracer (pid=882619): input from "test/input.xml"
0x560905682a90 agent cpu
0x560905685470 agent gpu
0x5609056bd500 agent gpu
538167097046266
HSA-trace( hsa_agent_get_info hsa_amd_memory_pool_allocate)
HSA-activity-trace()
538167097315584:538167097318359 882619:882619 hsa_agent_get_info({handle=94596745407120}, 0, 0x7fffa4fa77a8) = 0
538167097322808:538167097323088 882619:882619 hsa_agent_get_info({handle=94596745407120}, 17, 0x7fffa4fa7790) = 0
538167097333378:538167097334620 882619:882619 hsa_agent_get_info({handle=94596745417840}, 0, 0x7fffa4fa77a8) = 0
538167097335141:538167097335411 882619:882619 hsa_agent_get_info({handle=94596745417840}, 17, 0x7fffa4fa7790) = 0
538167097338377:538167097338798 882619:882619 hsa_agent_get_info({handle=94596745647360}, 0, 0x7fffa4fa77a8) = 0
538167097339319:538167097339569 882619:882619 hsa_agent_get_info({handle=94596745647360}, 17, 0x7fffa4fa7790) = 0
538167119333344:538167119348423 882619:882622 hsa_amd_memory_pool_allocate({handle=94596745407920}, 72, 0, 0x7f7d8f7f9c30) = 0
538167119471866:538167119484039 882619:882622 hsa_amd_memory_pool_allocate({handle=94596745407920}, 256, 0, 0x7f7d8f7f9c30) = 0
538167119592754:538167119603945 882619:882622 hsa_amd_memory_pool_allocate({handle=94596745407920}, 256, 0, 0x7f7d8f7f9c30) = 0
@@ -0,0 +1,68 @@
ROCTracer (pid=566828):
0x55e1b9d507c0 agent cpu
0x55e1b9d4eeb0 agent gpu
0x55e1b9d8b540 agent gpu
975779239309496
HSA-trace()
HSA-activity-trace()
975779240024464:975779240024815 566828:566828 hsa_agent_get_info(, 0, 0x7ffdd25cc9a8) = 0 :6
975779240029173:975779240029274 566828:566828 hsa_agent_get_info(, 17, 0x7ffdd25cc990) = 0 :7
975779240035816:975779240036187 566828:566828 hsa_amd_memory_pool_get_info(, 0, 0x7ffdd25cc6bc) = 0 :9
975779240036667:975779240036768 566828:566828 hsa_amd_memory_pool_get_info(, 1, 0x7ffdd25cc6c0) = 0 :10
975779240037219:975779240037319 566828:566828 hsa_amd_memory_pool_get_info(, 2, 0x7ffdd25cc6e0) = 0 :11
975779240037760:975779240037860 566828:566828 hsa_amd_memory_pool_get_info(, 6, 0x7ffdd25cc6e8) = 0 :12
975779240039823:975779240039914 566828:566828 hsa_amd_memory_pool_get_info(, 0, 0x7ffdd25cc6bc) = 0 :13
975779240040455:975779240040555 566828:566828 hsa_amd_memory_pool_get_info(, 1, 0x7ffdd25cc6c0) = 0 :14
975779240041076:975779240041156 566828:566828 hsa_amd_memory_pool_get_info(, 2, 0x7ffdd25cc6e0) = 0 :15
975779240041697:975779240041777 566828:566828 hsa_amd_memory_pool_get_info(, 6, 0x7ffdd25cc6e8) = 0 :16
975779240042619:975779240042709 566828:566828 hsa_amd_memory_pool_get_info(, 0, 0x7ffdd25cc6bc) = 0 :17
975779240043250:975779240043350 566828:566828 hsa_amd_memory_pool_get_info(, 1, 0x7ffdd25cc6c0) = 0 :18
975779240043871:975779240043961 566828:566828 hsa_amd_memory_pool_get_info(, 2, 0x7ffdd25cc6e0) = 0 :19
975779240044482:975779240044562 566828:566828 hsa_amd_memory_pool_get_info(, 6, 0x7ffdd25cc6e8) = 0 :20
975779240044482:975779240045264 566828:566828 hsa_amd_agent_iterate_memory_pools(, 1, 0x7ffdd25cc9e8) = 0 :8
975779240048430:975779240049341 566828:566828 hsa_agent_get_info(, 0, 0x7ffdd25cc9a8) = 0 :21
975779240049822:975779240049932 566828:566828 hsa_agent_get_info(, 17, 0x7ffdd25cc990) = 0 :22
975779240050654:975779240050744 566828:566828 hsa_amd_memory_pool_get_info(, 0, 0x7ffdd25cc62c) = 0 :24
975779240051275:975779240051365 566828:566828 hsa_amd_memory_pool_get_info(, 1, 0x7ffdd25cc630) = 0 :25
975779240051786:975779240051866 566828:566828 hsa_amd_memory_pool_get_info(, 2, 0x7ffdd25cc650) = 0 :26
975779240052287:975779240052377 566828:566828 hsa_amd_memory_pool_get_info(, 6, 0x7ffdd25cc658) = 0 :27
975779240053048:975779240053159 566828:566828 hsa_amd_memory_pool_get_info(, 0, 0x7ffdd25cc62c) = 0 :28
975779240053048:975779240053609 566828:566828 hsa_amd_agent_iterate_memory_pools(, 1, 0x7ffdd25cc9e8) = 0 :23
975779240055373:975779240055663 566828:566828 hsa_agent_get_info(, 0, 0x7ffdd25cc9a8) = 0 :29
975779240056144:975779240056234 566828:566828 hsa_agent_get_info(, 17, 0x7ffdd25cc990) = 0 :30
975779240056986:975779240057076 566828:566828 hsa_amd_memory_pool_get_info(, 0, 0x7ffdd25cc62c) = 0 :32
975779240057507:975779240057587 566828:566828 hsa_amd_memory_pool_get_info(, 1, 0x7ffdd25cc630) = 0 :33
975779240058008:975779240058088 566828:566828 hsa_amd_memory_pool_get_info(, 2, 0x7ffdd25cc650) = 0 :34
975779240058509:975779240058589 566828:566828 hsa_amd_memory_pool_get_info(, 6, 0x7ffdd25cc658) = 0 :35
975779240061504:975779240061605 566828:566828 hsa_amd_memory_pool_get_info(, 0, 0x7ffdd25cc62c) = 0 :36
975779240061504:975779240062035 566828:566828 hsa_amd_agent_iterate_memory_pools(, 1, 0x7ffdd25cc9e8) = 0 :31
975779240061504:975779240063528 566828:566828 hsa_iterate_agents(1, 0) = 0 :5
975779240167184:975779249865797 566828:566834 hsa_queue_create(, 1024, 1, 0, 0, 0, 0, 0x7f70535fdbc8) = 0 :37
975779249901595:975779249990022 566828:566834 hsa_code_object_reader_create_from_file(8, 0x7f70535fdbf8) = 0 :38
975779249990984:975779250001383 566828:566834 hsa_executable_create_alt(1, 0, 0x0, 0x7f70535fdc00) = 0 :27
975779250002345:975779250430202 566828:566834 hsa_executable_load_agent_code_object(, , , 0x0, 0) = 0 :28
975779250432296:975779250926909 566828:566834 hsa_executable_freeze(, 0x0) = 0 :29
975779250929755:975779250931207 566828:566834 hsa_executable_get_symbol_by_name(, 0x7f7054002950, 0x7f70535fdb28, 0x7f70535fdb48) = 1013 :2a
975779250931979:975779250932690 566828:566834 hsa_executable_get_symbol_by_name(, 0x7f7054003850, 0x7f70535fdb28, 0x7f70535fdb48) = 0 :2b
975779250934403:975779250934854 566828:566834 hsa_executable_symbol_get_info(, 22, 0x7f70535fdc10) = 0 :44
975779250935425:975779250935536 566828:566834 hsa_executable_symbol_get_info(, 14, 0x7f70535fdc18) = 0 :45
975779250936097:975779250936177 566828:566834 hsa_executable_symbol_get_info(, 13, 0x7f70535fdc1c) = 0 :46
975779250936728:975779250936798 566828:566834 hsa_executable_symbol_get_info(, 11, 0x7f70535fdc20) = 0 :47
975779250937349:975779250937419 566828:566834 hsa_executable_symbol_get_info(, 12, 0x7f70535fdc24) = 0 :48
975779250938321:975779250956876 566828:566834 hsa_amd_memory_pool_allocate(, 72, 0, 0x7f70535fdb70) = 0 :49
975779250958098:975779251048298 566828:566834 hsa_amd_agents_allow_access(3, 0x55e1b9df4c30, 0, 0x7f7261070000) = 0 :50
975779251049150:975779251065531 566828:566834 hsa_amd_memory_pool_allocate(, 256, 0, 0x7f70535fdb70) = 0 :51
975779251066232:975779251149319 566828:566834 hsa_amd_agents_allow_access(3, 0x55e1b9df4c30, 0, 0x7f726106e000) = 0 :52
975779251150000:975779251165960 566828:566834 hsa_amd_memory_pool_allocate(, 256, 0, 0x7f70535fdb70) = 0 :53
975779251166531:975779251256912 566828:566834 hsa_amd_agents_allow_access(3, 0x55e1b9df4c30, 0, 0x7f726106c000) = 0 :54
975779251258114:975779251261601 566828:566834 hsa_amd_signal_create(1, 0, 0, 0, 0x7f70535fdbd0) = 0 :55
975779251262923:975779251263204 566828:566834 hsa_queue_load_write_index_relaxed(0x7f726109e000) = 0 :56
975779251264065:975779251264276 566828:566834 hsa_queue_load_read_index_relaxed(0x7f726109e000) = 0 :57
975779251264937:975779251265178 566828:566834 hsa_queue_store_write_index_screlease(0x7f726109e000, 1) = void :58
975779251265969:975779251266951 566828:566834 hsa_signal_store_screlease(, 0) = void :59
975779251267472:975779251283773 566828:566834 hsa_signal_wait_scacquire(, 0, 0, 18446744073709551615, 0) = 0 :60
975779251284654:975779251286848 566828:566834 hsa_signal_destroy() = 0 :61
975779251290806:975779251322035 566828:566834 hsa_memory_free(0x7f726106e000) = 0 :62
975779251322646:975779251341261 566828:566834 hsa_memory_free(0x7f726106c000) = 0 :63
975779251342043:975779251389061 566828:566834 hsa_executable_destroy() = 0 :64
975779251389843:975779251392488 566828:566834 hsa_code_object_reader_destroy() = 0 :65
@@ -0,0 +1,22 @@
ROCTracer (pid=566858):
0x55ae2fa607c0 agent cpu
0x55ae2fa5eeb0 agent gpu
0x55ae2fa9b540 agent gpu
975785718853775
HSA-trace()
HSA-activity-trace()
975785719398623:975785719398824 566858:566858 hsa_agent_get_info(, 17, 0x7ffe30b97814) = 0 :6
975785719403482:975785719403643 566858:566858 hsa_agent_get_info(, 17, 0x7ffe30b97814) = 0 :7
975785719404274:975785719404364 566858:566858 hsa_agent_get_info(, 17, 0x7ffe30b97814) = 0 :8
975785719404274:975785719404885 566858:566858 hsa_iterate_agents(1, 0) = 0 :5
ROCTracer (pid=566858):
0x55ae2fa607c0 agent cpu
0x55ae2fb02cc0 agent gpu
0x55ae2fa62970 agent gpu
975785742239830
HSA-trace()
HSA-activity-trace()
975785742436120:975785742436310 566858:566858 hsa_agent_get_info(, 17, 0x7ffe30b97814) = 0 :14
975785742437352:975785742437472 566858:566858 hsa_agent_get_info(, 17, 0x7ffe30b97814) = 0 :15
975785742437963:975785742438053 566858:566858 hsa_agent_get_info(, 17, 0x7ffe30b97814) = 0 :16
975785742437963:975785742438464 566858:566858 hsa_iterate_agents(1, 0) = 0 :13
@@ -0,0 +1,20 @@
:CopyHostToDevice : correlation_id(1) time_ns(109660008446578:109660008452178)
:hipMemcpy : correlation_id(1) time_ns(109659777462237:109660008474607)
:CopyHostToDevice : correlation_id(2) time_ns(109660011646881:109660011651041)
:hipMemcpy : correlation_id(2) time_ns(109660011115400:109660011817555)
:CopyHostToDevice : correlation_id(3) time_ns(109660011942080:109660011946240)
:hipMemcpy : correlation_id(3) time_ns(109660011846359:109660011951538)
:CopyHostToDevice : correlation_id(4) time_ns(109660011985759:109660011989919)
:hipMemcpy : correlation_id(4) time_ns(109660011961286:109660011994288)
:CopyHostToDevice : correlation_id(5) time_ns(109660012053439:109660012057599)
:hipMemcpy : correlation_id(5) time_ns(109660012029645:109660012062688)
:CopyHostToDevice : correlation_id(6) time_ns(109660012096639:109660012100799)
:hipMemcpy : correlation_id(6) time_ns(109660012073037:109660012105278)
:CopyHostToDevice : correlation_id(7) time_ns(109660012138239:109660012142879)
:hipMemcpy : correlation_id(7) time_ns(109660012114796:109660012147087)
:CopyHostToDevice : correlation_id(8) time_ns(109660012180158:109660012184478)
:hipMemcpy : correlation_id(8) time_ns(109660012156274:109660012188795)
:CopyHostToDevice : correlation_id(9) time_ns(109660012221438:109660012225758)
:hipMemcpy : correlation_id(9) time_ns(109660012198213:109660012230234)
:CopyHostToDevice : correlation_id(10) time_ns(109660012262398:109660012266878)
:hipMemcpy : correlation_id(10) time_ns(109660012239211:109660012271171)
@@ -0,0 +1,18 @@
ROCTracer (pid=993231):
rocTX-trace()
0xce5450 agent cpu
0xd1d520 agent gpu
0xd1fe80 agent gpu
628584618590744
628584859661999 993231:993231 1:0:"NestedRangeA"
628584859674021 993231:993231 2:0:""
628584859674693 993231:993231 1:0:"NestedRangeB"
628584859675344 993231:993231 1:0:"NestedRangeC"
628584859676115 993231:993231 3:1:"StartStopRangeA"
628584859678390 993231:993231 2:0:""
628584859678921 993231:993231 2:0:""
628584859755545 993231:993233 4:1:""
628584859819756 993231:993231 1:0:"NestedRangeD"
628584859820708 993231:993231 1:0:"NestedRangeE"
628584859821219 993231:993231 2:0:""
628584859824095 993231:993231 2:0:""
@@ -0,0 +1,24 @@
# dummy
MatrixTranspose_dryrun_trace --check-none
copy_dryrun_trace --check-none
MatrixTranspose_ctest_trace --check-count .*
MatrixTranspose_test_trace --check-count .* --ignore-count KernelExecution
MatrixTranspose_hipaact_test_trace --check-count .* --ignore-count KernelExecution|hipMemcpy|__hipPushCallConfiguration|hipLaunchKernel|__hipPopCallConfiguration
MatrixTranspose_mgpu_trace --check-events .*
MatrixTranspose_sys_trace --check-count .* --ignore-count matrixTranspose|hsa_.*
MatrixTranspose_sys_hsa_trace --check-count .* --ignore-count hsa_.*
MatrixTranspose_hip_period_trace --check-events .* --ignore-event hipMalloc|hipFree
MatrixTranspose_hip_flush_trace --check-count .* --ignore-count matrixTranspose
MatrixTranspose_kfd_trace --check-events .*
MatrixTranspose_hip_input_trace --check-events .*
copy_hsa_trace --check-events .*
copy_hsa_input_trace --check-events .*
load_unload_reload_trace --check-order .* --ignore-count hsa_agent_get_info
code_obj_trace --check-none
trace_buffer --check-none
memory_pool --check-none
activity_and_callback_trace --check-order .*
multi_pool_activities_trace --check-order .*
roctx_test_trace --check-count .*
backward_compat_test_trace --check-none
dlopen --check-none
+147
Féach ar an gComhad
@@ -0,0 +1,147 @@
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#include <iostream>
// hip header file
#include <hip/hip_runtime.h>
#include "roctracer_ext.h"
// roctx header file
#include <roctx.h>
#define HIP_CALL(call) \
do { \
hipError_t err = call; \
if (err != hipSuccess) { \
fprintf(stderr, "%s\n", hipGetErrorString(err)); \
abort(); \
} \
} while (0)
#define WIDTH 1024
#define NUM (WIDTH * WIDTH)
#define THREADS_PER_BLOCK_X 4
#define THREADS_PER_BLOCK_Y 4
#define THREADS_PER_BLOCK_Z 1
// Device (Kernel) function, it must be void
__global__ void matrixTranspose(float* out, float* in, const int width) {
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
out[y * width + x] = in[x * width + y];
}
// CPU implementation of matrix transpose
void matrixTransposeCPUReference(float* output, float* input, const unsigned int width) {
for (unsigned int j = 0; j < width; j++) {
for (unsigned int i = 0; i < width; i++) {
output[i * width + j] = input[j * width + i];
}
}
}
int main() {
float* Matrix;
float* TransposeMatrix;
float* cpuTransposeMatrix;
float* gpuMatrix;
float* gpuTransposeMatrix;
hipDeviceProp_t devProp;
HIP_CALL(hipGetDeviceProperties(&devProp, 0));
std::cerr << "Device name " << devProp.name << std::endl;
int i;
int errors;
Matrix = (float*)malloc(NUM * sizeof(float));
TransposeMatrix = (float*)malloc(NUM * sizeof(float));
cpuTransposeMatrix = (float*)malloc(NUM * sizeof(float));
// initialize the input data
for (i = 0; i < NUM; i++) {
Matrix[i] = (float)i * 10.0f;
}
// allocate the memory on the device side
HIP_CALL(hipMalloc((void**)&gpuMatrix, NUM * sizeof(float)));
HIP_CALL(hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)));
uint32_t iterations = 100;
while (iterations-- > 0) {
std::cerr << "## Iteration (" << iterations << ") #################" << std::endl;
// Memory transfer from host to device
HIP_CALL(hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice));
roctxMark("before hipLaunchKernel");
int rangeId = roctxRangeStart("hipLaunchKernel range");
roctxRangePush("hipLaunchKernel");
// Lauching kernel from host
hipLaunchKernelGGL(
matrixTranspose, dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y),
dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, gpuTransposeMatrix, gpuMatrix, WIDTH);
roctxMark("after hipLaunchKernel");
// Memory transfer from device to host
roctxRangePush("hipMemcpy");
HIP_CALL(
hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost));
roctxRangePop(); // for "hipMemcpy"
roctxRangePop(); // for "hipLaunchKernel"
roctxRangeStop(rangeId);
// CPU MatrixTranspose computation
matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH);
// verify the results
errors = 0;
double eps = 1.0E-6;
for (i = 0; i < NUM; i++) {
if (std::abs(TransposeMatrix[i] - cpuTransposeMatrix[i]) > eps) {
errors++;
}
}
if (errors != 0) {
fprintf(stderr, "FAILED: %d errors\n", errors);
} else {
fprintf(stderr, "PASSED!\n");
}
}
// free the resources on device side
HIP_CALL(hipFree(gpuMatrix));
HIP_CALL(hipFree(gpuTransposeMatrix));
// free the resources on host side
free(Matrix);
free(TransposeMatrix);
free(cpuTransposeMatrix);
return errors;
}
+24
Féach ar an gComhad
@@ -0,0 +1,24 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
__kernel void copy(__global unsigned int* a, __global unsigned int* b) {
uint tid = get_global_id(0);
a[tid] = b[tid];
}
+454
Féach ar an gComhad
@@ -0,0 +1,454 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#include <hsa/hsa.h>
#include <hsa/hsa_ext_amd.h>
#include <hsa/hsa_ext_image.h>
#include <fcntl.h>
#include <assert.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <string>
#include <vector>
#include <map>
#include <atomic>
#include <chrono>
#include <thread>
#define CHECK(x) \
do { \
if ((x) != HSA_STATUS_SUCCESS) { \
assert(false); \
abort(); \
} \
} while (false);
struct Device {
struct Memory {
hsa_amd_memory_pool_t pool;
bool fine;
bool kernarg;
size_t size;
size_t granule;
};
hsa_agent_t agent;
char name[64];
std::vector<Memory> pools;
uint32_t fine;
uint32_t coarse;
static std::vector<hsa_agent_t> all_devices;
};
std::vector<hsa_agent_t> Device::all_devices;
struct Kernel {
uint64_t handle;
uint32_t scratch;
uint32_t group;
uint32_t kernarg_size;
uint32_t kernarg_align;
};
// Assumes bitfield layout is little endian.
// Assumes std::atomic<uint16_t> is binary compatible with uint16_t and uses HW atomics.
union AqlHeader {
struct {
uint16_t type : 8;
uint16_t barrier : 1;
uint16_t acquire : 2;
uint16_t release : 2;
uint16_t reserved : 3;
};
uint16_t raw;
};
struct BarrierValue {
AqlHeader header;
uint8_t AmdFormat;
uint8_t reserved;
uint32_t reserved1;
hsa_signal_t signal;
hsa_signal_value_t value;
hsa_signal_value_t mask;
uint32_t cond;
uint32_t reserved2;
uint64_t reserved3;
uint64_t reserved4;
hsa_signal_t completion_signal;
};
union Aql {
AqlHeader header;
hsa_kernel_dispatch_packet_t dispatch;
hsa_barrier_and_packet_t barrier_and;
hsa_barrier_or_packet_t barrier_or;
BarrierValue barrier_value;
};
struct OCLHiddenArgs {
uint64_t offset_x;
uint64_t offset_y;
uint64_t offset_z;
void* printf_buffer;
void* enqueue;
void* enqueue2;
void* multi_grid;
};
struct hip_hiddens {
uint64_t offset_x;
uint64_t offset_y;
uint64_t offset_z;
uint64_t _;
uint64_t _2;
uint64_t _3;
uint64_t multi_grid_sync;
};
std::vector<Device> cpu, gpu;
Device::Memory kernarg;
struct CodeObject {
hsa_file_t file;
hsa_code_object_reader_t code_obj_rdr;
hsa_executable_t executable;
};
bool DeviceDiscovery() {
hsa_status_t err;
err = hsa_iterate_agents(
[](hsa_agent_t agent, void*) {
hsa_status_t err;
Device dev;
dev.agent = agent;
dev.fine = -1u;
dev.coarse = -1u;
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, dev.name);
CHECK(err);
hsa_device_type_t type;
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &type);
CHECK(err);
err = hsa_amd_agent_iterate_memory_pools(
agent,
[](hsa_amd_memory_pool_t pool, void* data) {
std::vector<Device::Memory>& pools =
*reinterpret_cast<std::vector<Device::Memory>*>(data);
hsa_status_t err;
hsa_amd_segment_t segment;
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment);
CHECK(err);
if (segment != HSA_AMD_SEGMENT_GLOBAL) return HSA_STATUS_SUCCESS;
uint32_t flags;
err =
hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flags);
CHECK(err);
Device::Memory mem;
mem.pool = pool;
mem.fine = (flags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED);
mem.kernarg = (flags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT);
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE, &mem.size);
CHECK(err);
err = hsa_amd_memory_pool_get_info(
pool, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE, &mem.granule);
CHECK(err);
pools.push_back(mem);
return HSA_STATUS_SUCCESS;
},
(void*)&dev.pools);
if (!dev.pools.empty()) {
for (size_t i = 0; i < dev.pools.size(); i++) {
if (dev.pools[i].fine && dev.pools[i].kernarg && dev.fine == -1u) dev.fine = i;
if (dev.pools[i].fine && !dev.pools[i].kernarg) dev.fine = i;
if (!dev.pools[i].fine) dev.coarse = i;
}
if (type == HSA_DEVICE_TYPE_CPU)
cpu.push_back(dev);
else
gpu.push_back(dev);
Device::all_devices.push_back(dev.agent);
}
return HSA_STATUS_SUCCESS;
},
nullptr);
[]() {
for (auto& dev : cpu) {
for (auto& mem : dev.pools) {
if (mem.fine && mem.kernarg) {
kernarg = mem;
return;
}
}
}
}();
CHECK(err);
if (cpu.empty() || gpu.empty() || kernarg.pool.handle == 0) return false;
return true;
}
bool LoadCodeObject(std::string filename, hsa_agent_t agent, CodeObject& code_object) {
hsa_status_t err;
code_object.file = open(filename.c_str(), O_RDONLY);
if (code_object.file == -1) return false;
err = hsa_code_object_reader_create_from_file(code_object.file, &code_object.code_obj_rdr);
CHECK(err);
err = hsa_executable_create_alt(HSA_PROFILE_FULL, HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT,
nullptr, &code_object.executable);
CHECK(err);
err = hsa_executable_load_agent_code_object(code_object.executable, agent,
code_object.code_obj_rdr, nullptr, nullptr);
if (err != HSA_STATUS_SUCCESS) return false;
err = hsa_executable_freeze(code_object.executable, nullptr);
CHECK(err);
return true;
}
bool GetKernel(const CodeObject& code_object, std::string kernel, hsa_agent_t agent, Kernel& kern) {
hsa_executable_symbol_t symbol;
hsa_status_t err =
hsa_executable_get_symbol_by_name(code_object.executable, kernel.c_str(), &agent, &symbol);
if (err != HSA_STATUS_SUCCESS) {
err = hsa_executable_get_symbol_by_name(code_object.executable, (kernel + ".kd").c_str(),
&agent, &symbol);
if (err != HSA_STATUS_SUCCESS) {
return false;
}
}
err = hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT,
&kern.handle);
CHECK(err);
err = hsa_executable_symbol_get_info(
symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE, &kern.scratch);
CHECK(err);
// printf("Scratch: %d\n", kern.scratch);
err = hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE,
&kern.group);
CHECK(err);
// printf("LDS: %d\n", kern.group);
// Remaining needs code object v2 or comgr.
err = hsa_executable_symbol_get_info(
symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE, &kern.kernarg_size);
CHECK(err);
// printf("Kernarg Size: %d\n", kern.kernarg_size);
err = hsa_executable_symbol_get_info(
symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_ALIGNMENT, &kern.kernarg_align);
CHECK(err);
// printf("Kernarg Align: %d\n", kern.kernarg_align);
return true;
}
// Not for parallel insertion.
bool SubmitPacket(hsa_queue_t* queue, Aql& pkt) {
size_t mask = queue->size - 1;
Aql* ring = (Aql*)queue->base_address;
uint64_t write = hsa_queue_load_write_index_relaxed(queue);
uint64_t read = hsa_queue_load_read_index_relaxed(queue);
if (write - read + 1 > queue->size) return false;
Aql& dst = ring[write & mask];
uint16_t header = pkt.header.raw;
pkt.header.raw = dst.header.raw;
dst = pkt;
__atomic_store_n(&dst.header.raw, header, __ATOMIC_RELEASE);
pkt.header.raw = header;
hsa_queue_store_write_index_release(queue, write + 1);
hsa_signal_store_screlease(queue->doorbell_signal, write);
return true;
}
void* hsaMalloc(size_t size, const Device::Memory& mem) {
void* ret;
hsa_status_t err = hsa_amd_memory_pool_allocate(mem.pool, size, 0, &ret);
CHECK(err);
err = hsa_amd_agents_allow_access(Device::all_devices.size(), &Device::all_devices[0], nullptr,
ret);
CHECK(err);
return ret;
}
void* hsaMalloc(size_t size, const Device& dev, bool fine) {
uint32_t index = fine ? dev.fine : dev.coarse;
assert(index != -1u && "Memory type unavailable.");
return hsaMalloc(size, dev.pools[index]);
}
void test_func(int kiter, int diter, int agents) {
for (int device_index = 0; device_index < agents; ++device_index) {
hsa_status_t err;
hsa_queue_t* queue;
err = hsa_queue_create(gpu[device_index].agent, 1024, HSA_QUEUE_TYPE_SINGLE, nullptr, nullptr,
0, 0, &queue);
CHECK(err);
CodeObject code_object;
if (!LoadCodeObject(std::string(gpu[device_index].name) + "_copy.hsaco",
gpu[device_index].agent, code_object)) {
printf("Kernel file not found or not usable with given agent.\n");
abort();
}
Kernel copy;
if (!GetKernel(code_object, "copy", gpu[device_index].agent, copy)) {
printf("Test kernel not found.\n");
abort();
}
for (int i = 0; i < kiter; ++i) {
struct args_t {
uint32_t* a;
uint32_t* b;
OCLHiddenArgs hidden;
};
args_t* args;
args = (args_t*)hsaMalloc(sizeof(args_t), kernarg);
memset(args, 0, sizeof(args_t));
uint32_t* a = (uint32_t*)hsaMalloc(64 * sizeof(uint32_t), kernarg);
uint32_t* b = (uint32_t*)hsaMalloc(64 * sizeof(uint32_t), kernarg);
memset(a, 0, 64 * sizeof(uint32_t));
memset(b, 1, 64 * sizeof(uint32_t));
hsa_signal_t signal;
// Use interrupts.
err = hsa_amd_signal_create(1, 0, nullptr, 0, &signal);
CHECK(err);
for (int j = 1; j <= diter; ++j) {
Aql packet{};
packet.header.type = HSA_PACKET_TYPE_KERNEL_DISPATCH;
packet.header.barrier = 1;
packet.header.acquire = HSA_FENCE_SCOPE_SYSTEM;
packet.header.release = HSA_FENCE_SCOPE_SYSTEM;
packet.dispatch.setup = 1;
packet.dispatch.workgroup_size_x = 64;
packet.dispatch.workgroup_size_y = 1;
packet.dispatch.workgroup_size_z = 1;
packet.dispatch.grid_size_x = 64;
packet.dispatch.grid_size_y = 1;
packet.dispatch.grid_size_z = 1;
packet.dispatch.group_segment_size = copy.group;
packet.dispatch.private_segment_size = copy.scratch;
packet.dispatch.kernel_object = copy.handle;
packet.dispatch.kernarg_address = args;
if (j == diter) packet.dispatch.completion_signal = signal;
args->a = a;
args->b = b;
SubmitPacket(queue, packet);
}
hsa_signal_wait_acquire(signal, HSA_SIGNAL_CONDITION_EQ, 0, -1, HSA_WAIT_STATE_BLOCKED);
err = hsa_signal_destroy(signal);
CHECK(err);
for (int i = 0; i < 64; i++) {
if (a[i] != b[i]) {
printf("error at %d: expected %d, got %d\n", i, b[i], a[i]);
abort();
}
}
err = hsa_memory_free(a);
CHECK(err);
err = hsa_memory_free(b);
CHECK(err);
}
err = hsa_executable_destroy(code_object.executable);
CHECK(err);
err = hsa_code_object_reader_destroy(code_object.code_obj_rdr);
CHECK(err);
close(code_object.file);
}
}
int main(int argc, char** argv) {
const char* kiter_s = getenv("ROCP_KITER");
const char* diter_s = getenv("ROCP_DITER");
const char* agents_s = getenv("ROCP_AGENTS");
const char* threads_s = getenv("ROCP_THRS");
int kiter = (kiter_s != nullptr) ? atoi(kiter_s) : 1;
int diter = (diter_s != nullptr) ? atoi(diter_s) : 1;
int agents = (agents_s != nullptr) ? atoi(agents_s) : 1;
int threads = (threads_s != nullptr) ? atoi(threads_s) : 1;
hsa_status_t err;
err = hsa_init();
CHECK(err);
if (!DeviceDiscovery()) {
printf("Usable devices not found.\n");
return -1;
}
std::vector<std::thread> t(threads);
for (int n = 0; n < threads; ++n)
t[n] = std::thread(test_func, kiter, diter, std::min(agents, (int)gpu.size()));
for (int n = 0; n < threads; ++n) t[n].join();
err = hsa_shut_down();
CHECK(err);
return 0;
}
@@ -0,0 +1,50 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#include <hsa/hsa.h>
#include <cassert>
#include <cstdlib>
#define CHECK(x) \
do { \
if ((x) != HSA_STATUS_SUCCESS) { \
assert(false); \
abort(); \
} \
} while (false);
int main() {
// Run 2 loops of {hsa_init(); hsa_iterate_agents(); hsa_shut_down()} to test that the
// tracer tool correctly unloaded after the 1st iteration and then reloaded for the 2nd
// iteration.
for (int i = 0; i < 2; ++i) {
hsa_init();
CHECK(hsa_iterate_agents(
[](hsa_agent_t agent, void*) {
hsa_device_type_t type;
return hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &type);
},
nullptr));
hsa_shut_down();
}
}
+206
Féach ar an gComhad
@@ -0,0 +1,206 @@
#!/bin/sh
################################################################################
# Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
################################################################################
# cd to build directory
BIN_NAME=`basename $0`
BIN_DIR=`dirname $0`
#To enable symbol lookup in .dynsyn section after llvm-strip
export LOADER_USE_DYNSYM=1
cd $BIN_DIR
if [ -z "$ROCTRACER_LIB_PATH" ] ; then
if test -f "${BIN_DIR}/../../lib/libroctracer64.so" ; then
ROCTRACER_LIB_PATH="${BIN_DIR}/../../lib"
fi
fi
# enable tools load failure reporting
export HSA_TOOLS_REPORT_LOAD_FAILURE=1
# paths to ROC profiler and other libraries
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PWD
if [ -n "$ROCTRACER_LIB_PATH" ] ; then
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$ROCTRACER_LIB_PATH
fi
if [ -z "$ROCTRACER_LIB_PATH" ] ; then
ROCTRACER_LIB_PATH="."
fi
if [ -z "$ROCTRACER_TOOL_PATH" ] ; then
if test -f "${BIN_DIR}/../../lib/roctracer/libroctracer_tool.so" ; then
ROCTRACER_TOOL_PATH="${BIN_DIR}/../../lib/roctracer"
else
ROCTRACER_TOOL_PATH="."
fi
fi
# test filter input
test_filter=-1
check_trace_flag=1
if [ -n "$1" ] ; then
test_filter=$1
shift
fi
if [ "$2" = "-n" ] ; then
check_trace_flag=0
fi
# test check routin
test_status=0
test_runnum=0
test_number=0
failed_tests="Failed tests:"
xeval_test() {
test_number=$test_number
}
ncolors=$(tput colors || echo 0)
if [ -n "$ncolors" ] && [ $ncolors -ge 8 ]; then
bright="$(tput bold || echo)"
red="$(tput setaf 1 || echo)"
green="$(tput setaf 2 || echo)"
blue="$(tput setaf 4 || echo)"
normal="$(tput sgr0 || echo)"
fi
eval_test() {
label=$1
cmdline=$2
test_name=$3
if [ $test_filter = -1 -o $test_filter = $test_number ] ; then
echo "test $test_number: $test_name \"$label\""
echo "CMD: \"$cmdline\""
mkdir -p /tmp/test/out
test_runnum=$((test_runnum + 1))
eval "$cmdline" 1>/tmp/test/out/$test_name.out 2>/tmp/test/out/$test_name.err
is_failed=$?
if [ $is_failed != 0 ] ; then
echo "--- stdout ---"
cat /tmp/test/out/$test_name.out
echo "--- stderr ---"
cat /tmp/test/out/$test_name.err
fi
if [ $is_failed = 0 ] ; then
python3 ./test/check_trace.py -in $test_name -ck $check_trace_flag
is_failed=$?
if [ $is_failed != 0 ] ; then
echo "Trace checker error:"
python3 ./test/check_trace.py -v -in $test_name -ck $check_trace_flag
fi
fi
if [ $is_failed = 0 ] ; then
echo "${bright:-}${blue:-}$test_name: ${green:-}PASSED${normal:-}"
else
echo "${bright:-}${blue:-}$test_name: ${red:-}FAILED${normal:-}"
failed_tests="$failed_tests\n $test_number: $test_name - \"$label\""
test_status=$(($test_status + 1))
fi
fi
test_number=$((test_number + 1))
}
# Tests dry run
eval_test "MatrixTranspose dry run" ./test/MatrixTranspose MatrixTranspose_dryrun_trace
eval_test "copy dry run" ./test/copy copy_dryrun_trace
# Standalone test
# ROCtracer is used explicitely by test
eval_test "standalone C test" "./test/MatrixTranspose_ctest" MatrixTranspose_ctest_trace
eval_test "standalone HIP test" "./test/MatrixTranspose_test" MatrixTranspose_test_trace
eval_test "standalone HIP hipaact test" "./test/MatrixTranspose_hipaact_test" MatrixTranspose_hipaact_test_trace
eval_test "standalone HIP MGPU test" "./test/MatrixTranspose_mgpu" MatrixTranspose_mgpu_trace
# Tool test
# ROCtracer/tool is loaded by HSA runtime
export LD_PRELOAD="$ROCTRACER_TOOL_PATH/libroctracer_tool.so"
# ROCTX test
export ROCTRACER_DOMAIN="roctx"
eval_test "roctx test" ./test/roctx_test roctx_test_trace
# SYS test
export ROCTRACER_DOMAIN="sys:roctx"
eval_test "tool SYS test" ./test/MatrixTranspose MatrixTranspose_sys_trace
export ROCTRACER_DOMAIN="sys:hsa:roctx"
eval_test "tool SYS/HSA test" ./test/MatrixTranspose MatrixTranspose_sys_hsa_trace
# Tracing control <delay:length:rate>
export ROCTRACER_DOMAIN="hip"
eval_test "tool period test" "ROCP_CTRL_RATE=10:50000:500000 ./test/MatrixTranspose" MatrixTranspose_hip_period_trace
eval_test "tool flushing test" "ROCP_FLUSH_RATE=100000 ./test/MatrixTranspose" MatrixTranspose_hip_flush_trace
#API records filtering
echo "<trace name=\"HIP\"><parameters api=\"hipFree, hipMalloc, hipMemcpy\"></parameters></trace>" > /tmp/input.xml
export ROCP_INPUT=/tmp/input.xml
eval_test "tool HIP test input" ./test/MatrixTranspose MatrixTranspose_hip_input_trace
unset ROCP_INPUT
# HSA test
export ROCTRACER_DOMAIN="hsa"
# test trace
export ROC_TEST_TRACE=1
# kernels loading iterations
export ROCP_KITER=1
# kernels dispatching iterations per kernel load
# dispatching to the same queue
export ROCP_DITER=1
# GPU agents number
export ROCP_AGENTS=1
# host threads number
# each thread creates a queue pre GPU agent
export ROCP_THRS=1
eval_test "tool HSA test" ./test/copy copy_hsa_trace
echo "<trace name=\"HSA\"><parameters api=\"hsa_agent_get_info, hsa_amd_memory_pool_allocate\"></parameters></trace>" > /tmp/input.xml
export ROCP_INPUT=/tmp/input.xml
eval_test "tool HSA test input" ./test/copy copy_hsa_input_trace
unset ROCP_INPUT
# Check that the tracer tool can be unloaded and then reloaded.
eval_test "Load/Unload/Reload the tracer tool" ./test/load_unload_reload_test load_unload_reload_trace
export LD_PRELOAD=${BIN_DIR}/test/libcodeobj_test.so
eval_test "tool tracer codeobj" ./test/MatrixTranspose code_obj_trace
unset LD_PRELOAD
#valgrind --leak-check=full $tbin
#valgrind --tool=massif $tbin
#ms_print massif.out.<N>
eval_test "directed TraceBuffer test" ./test/trace_buffer trace_buffer
eval_test "directed MemoryPool test" ./test/memory_pool memory_pool
eval_test "enable/disable callbacks and activities test" ./test/activity_and_callback activity_and_callback_trace
eval_test "use multiple memory pools in HIP activities test" ./test/multi_pool_activities multi_pool_activities_trace
eval_test "Dynamically load the tracer library test" ./test/dlopen dlopen
eval_test "backward compatibility tests" ./test/backward_compat_test backward_compat_test_trace
echo "$test_number tests total / $test_runnum tests run / $test_status tests failed"
if [ $test_status != 0 ] ; then
echo $failed_tests
fi
exit $test_status
@@ -0,0 +1,81 @@
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#include <roctracer_hip.h>
#include <algorithm>
#include <cstdlib>
#include <fstream>
#include <iterator>
#include <thread>
#include <vector>
// Create as many threads as there are cores, half changing the hipSetDevice roctracer API callback
// and argument, and the other half calling hipSetDevice, all running concurrently. If there is a
// race when setting the API callback and argument, the test aborts.
constexpr int N_ITER = 1000000;
namespace {
std::ifstream cpuinfo("/proc/cpuinfo");
const int num_cpu_cores =
std::count(std::istream_iterator<std::string>(cpuinfo), std::istream_iterator<std::string>(),
std::string("processor"));
template <std::size_t N> void callback(uint32_t, uint32_t, const void*, void* arg) {
// The callback argument must match the callback function.
if (arg != callback<N>) abort();
}
template <std::size_t... Is> constexpr auto create_callbacks(std::index_sequence<Is...>) {
return std::array{&callback<Is>...};
}
template <std::size_t N> constexpr auto create_callbacks() {
return create_callbacks(std::make_index_sequence<N>{});
}
constexpr auto callbacks = create_callbacks<128>();
} // namespace
int main() {
if (hipSetDevice(0) != hipSuccess) abort();
std::vector<std::thread> threads;
for (int i = 0; i < std::max(2, num_cpu_cores / 2); ++i) {
threads.emplace_back(
[](auto callback) {
for (int n = 0; n < N_ITER; ++n)
roctracer_enable_op_callback(ACTIVITY_DOMAIN_HIP_API, HIP_API_ID_hipSetDevice, callback,
reinterpret_cast<void*>(callback));
},
callbacks[i % callbacks.size()]);
threads.emplace_back([]() {
for (int n = 0; n < N_ITER; ++n)
if (hipSetDevice(0) != hipSuccess) abort();
});
}
for (auto&& thread : threads) thread.join();
roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HIP_API);
return 0;
}