Add 'projects/roctracer/' from commit 'dd745ed9c731cf1c67a182a4ce41ce30afbfb8ca'
git-subtree-dir: projects/roctracer git-subtree-mainline:d8cba83d42git-subtree-split:dd745ed9c7
Tá an tiomantas seo le fáil i:
@@ -0,0 +1,40 @@
|
||||
resources:
|
||||
repositories:
|
||||
- repository: pipelines_repo
|
||||
type: github
|
||||
endpoint: ROCm
|
||||
name: ROCm/ROCm
|
||||
|
||||
variables:
|
||||
- group: common
|
||||
- template: /.azuredevops/variables-global.yml@pipelines_repo
|
||||
|
||||
trigger:
|
||||
batch: true
|
||||
branches:
|
||||
include:
|
||||
- amd-mainline
|
||||
- amd-staging
|
||||
paths:
|
||||
exclude:
|
||||
- .github
|
||||
- doc
|
||||
- LICENSE
|
||||
- README.md
|
||||
|
||||
pr:
|
||||
autoCancel: true
|
||||
branches:
|
||||
include:
|
||||
- amd-mainline
|
||||
- amd-staging
|
||||
paths:
|
||||
exclude:
|
||||
- .github
|
||||
- doc
|
||||
- LICENSE
|
||||
- README.md
|
||||
drafts: false
|
||||
|
||||
jobs:
|
||||
- template: ${{ variables.CI_COMPONENT_PATH }}/roctracer.yml@pipelines_repo
|
||||
@@ -0,0 +1,60 @@
|
||||
---
|
||||
Language: Cpp
|
||||
# BasedOnStyle: Google
|
||||
AccessModifierOffset: -1
|
||||
ConstructorInitializerIndentWidth: 4
|
||||
AlignEscapedNewlinesLeft: false
|
||||
AlignTrailingComments: true
|
||||
AlignConsecutiveAssignments: false
|
||||
AlignOperands: false
|
||||
AllowAllParametersOfDeclarationOnNextLine: true
|
||||
AllowShortBlocksOnASingleLine: false
|
||||
AllowShortIfStatementsOnASingleLine: true
|
||||
AllowShortLoopsOnASingleLine: true
|
||||
AllowShortFunctionsOnASingleLine: All
|
||||
AlwaysBreakAfterDefinitionReturnType: false
|
||||
AlwaysBreakTemplateDeclarations: false
|
||||
AlwaysBreakBeforeMultilineStrings: true
|
||||
BreakBeforeBinaryOperators: false
|
||||
BreakBeforeTernaryOperators: true
|
||||
BreakConstructorInitializersBeforeComma: false
|
||||
BinPackParameters: true
|
||||
ColumnLimit: 100
|
||||
ConstructorInitializerAllOnOneLineOrOnePerLine: true
|
||||
ExperimentalAutoDetectBinPacking: false
|
||||
IndentCaseLabels: true
|
||||
IndentWrappedFunctionNames: false
|
||||
IndentFunctionDeclarationAfterType: false
|
||||
MaxEmptyLinesToKeep: 2
|
||||
KeepEmptyLinesAtTheStartOfBlocks: false
|
||||
NamespaceIndentation: None
|
||||
ObjCSpaceAfterProperty: false
|
||||
ObjCSpaceBeforeProtocolList: false
|
||||
PenaltyBreakBeforeFirstCallParameter: 1
|
||||
PenaltyBreakComment: 300
|
||||
PenaltyBreakString: 1000
|
||||
PenaltyBreakFirstLessLess: 120
|
||||
PenaltyExcessCharacter: 1000000
|
||||
PenaltyReturnTypeOnItsOwnLine: 200
|
||||
DerivePointerAlignment: false
|
||||
PointerAlignment: Left
|
||||
SpacesBeforeTrailingComments: 2
|
||||
Cpp11BracedListStyle: true
|
||||
Standard: Auto
|
||||
IndentWidth: 2
|
||||
TabWidth: 8
|
||||
UseTab: Never
|
||||
BreakBeforeBraces: Attach
|
||||
SpacesInParentheses: false
|
||||
SpacesInAngles: false
|
||||
SpaceInEmptyParentheses: false
|
||||
SpacesInCStyleCastParentheses: false
|
||||
SpacesInContainerLiterals: true
|
||||
SpaceBeforeAssignmentOperators: true
|
||||
ContinuationIndentWidth: 4
|
||||
CommentPragmas: '^ IWYU pragma:'
|
||||
ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ]
|
||||
SpaceBeforeParens: ControlStatements
|
||||
DisableFormat: false
|
||||
SortIncludes: false
|
||||
...
|
||||
@@ -0,0 +1,5 @@
|
||||
disabled: false
|
||||
scmId: gh-emu-rocm
|
||||
branchesToScan:
|
||||
- amd-staging
|
||||
- amd-mainline
|
||||
@@ -0,0 +1,15 @@
|
||||
name: Rocm Validation Suite KWS
|
||||
on:
|
||||
push:
|
||||
branches: [amd-staging]
|
||||
pull_request:
|
||||
types: [opened, synchronize, reopened]
|
||||
workflow_dispatch:
|
||||
jobs:
|
||||
kws:
|
||||
if: ${{ github.event_name == 'pull_request' }}
|
||||
uses: AMD-ROCm-Internal/rocm_ci_infra/.github/workflows/kws.yml@mainline
|
||||
secrets: inherit
|
||||
with:
|
||||
pr_number: ${{github.event.pull_request.number}}
|
||||
base_branch: ${{github.base_ref}}
|
||||
@@ -0,0 +1,25 @@
|
||||
name: ROCm CI Caller
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [amd-staging, amd-npi, release/rocm-rel-*, amd-mainline]
|
||||
types: [opened, reopened, synchronize]
|
||||
push:
|
||||
branches: [amd-mainline]
|
||||
workflow_dispatch:
|
||||
issue_comment:
|
||||
types: [created]
|
||||
|
||||
jobs:
|
||||
call-workflow:
|
||||
if: ${{ github.event_name != 'issue_comment' || github.event.comment.body == '!verify' }}
|
||||
uses: AMD-ROCm-Internal/rocm_ci_infra/.github/workflows/rocm_ci.yml@mainline
|
||||
secrets: inherit
|
||||
with:
|
||||
input_sha: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
|
||||
input_pr_num: ${{ github.event_name == 'pull_request' && github.event.pull_request.number || 0 }}
|
||||
input_pr_url: ${{ github.event_name == 'pull_request' && github.event.pull_request.html_url || '' }}
|
||||
input_pr_title: ${{ github.event_name == 'pull_request' && github.event.pull_request.title || '' }}
|
||||
repository_name: ${{ github.repository }}
|
||||
base_ref: ${{ github.event_name == 'pull_request' && github.base_ref || github.ref }}
|
||||
trigger_event_type: ${{ github.event_name }}
|
||||
@@ -0,0 +1,17 @@
|
||||
name: Sync amd-mainline to public repository
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ amd-mainline ]
|
||||
|
||||
jobs:
|
||||
git-mirror:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: git-sync
|
||||
uses: AMD-ROCm-Internal/rocprofiler-github-actions@git-sync-v3
|
||||
with:
|
||||
source_repo: "https://${{ secrets.TOKEN }}@github.com/AMD-ROCm-Internal/roctracer.git"
|
||||
source_branch: "amd-mainline"
|
||||
destination_repo: "https://${{ secrets.EXT_TOKEN }}@github.com/ROCm/roctracer.git"
|
||||
destination_branch: "amd-mainline"
|
||||
@@ -0,0 +1,17 @@
|
||||
name: Sync amd-staging to public repository
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ amd-staging ]
|
||||
|
||||
jobs:
|
||||
git-mirror:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: git-sync
|
||||
uses: AMD-ROCm-Internal/rocprofiler-github-actions@git-sync-v3
|
||||
with:
|
||||
source_repo: "https://${{ secrets.TOKEN }}@github.com/AMD-ROCm-Internal/roctracer.git"
|
||||
source_branch: "amd-staging"
|
||||
destination_repo: "https://${{ secrets.EXT_TOKEN }}@github.com/ROCm/roctracer.git"
|
||||
destination_branch: "amd-staging"
|
||||
@@ -0,0 +1,7 @@
|
||||
.*
|
||||
!.gitignore
|
||||
*.o
|
||||
*.exe
|
||||
*.swp
|
||||
*.Po
|
||||
build
|
||||
@@ -0,0 +1,240 @@
|
||||
################################################################################
|
||||
## Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
##
|
||||
## Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
## of this software and associated documentation files (the "Software"), to
|
||||
## deal in the Software without restriction, including without limitation the
|
||||
## rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
## sell copies of the Software, and to permit persons to whom the Software is
|
||||
## furnished to do so, subject to the following conditions:
|
||||
##
|
||||
## The above copyright notice and this permission notice shall be included in
|
||||
## all copies or substantial portions of the Software.
|
||||
##
|
||||
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
## FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
## IN THE SOFTWARE.
|
||||
################################################################################
|
||||
|
||||
cmake_minimum_required(VERSION 3.18.0)
|
||||
|
||||
project(roctracer VERSION 4.1.0)
|
||||
|
||||
if(${ROCM_PATCH_VERSION})
|
||||
set(PROJECT_VERSION_PATCH ${ROCM_PATCH_VERSION})
|
||||
set(PROJECT_VERSION "${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}")
|
||||
endif()
|
||||
|
||||
include(GNUInstallDirs)
|
||||
# set default ROCM_PATH
|
||||
if(NOT DEFINED ROCM_PATH)
|
||||
set(ROCM_PATH "/opt/rocm" CACHE STRING "Default ROCM installation directory")
|
||||
endif()
|
||||
|
||||
## Build is not supported on Windows plaform
|
||||
if(WIN32)
|
||||
message(FATAL_ERROR "Windows build is not supported.")
|
||||
endif()
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
add_compile_options(-Wall -Wno-error=ignored-attributes -Werror)
|
||||
# To set addition RUNPATH in libraries
|
||||
# installed in /opt/rocm-ver/lib/roctracer
|
||||
set(ROCM_APPEND_PRIVLIB_RPATH "$ORIGIN/..")
|
||||
set(THREADS_PREFER_PTHREAD_FLAG ON)
|
||||
|
||||
find_package(Threads REQUIRED)
|
||||
|
||||
find_package(hsa-runtime64 REQUIRED CONFIG PATHS ${ROCM_PATH})
|
||||
find_package(HIP REQUIRED CONFIG PATHS ${ROCM_PATH})
|
||||
|
||||
if(NOT DEFINED LIBRARY_TYPE)
|
||||
set(LIBRARY_TYPE SHARED)
|
||||
endif()
|
||||
|
||||
## Build libraries
|
||||
add_subdirectory(src)
|
||||
|
||||
## Build tests
|
||||
if(${LIBRARY_TYPE} STREQUAL SHARED)
|
||||
add_subdirectory(test)
|
||||
endif()
|
||||
|
||||
## Build Plugins
|
||||
add_subdirectory(plugin)
|
||||
|
||||
if(${LIBRARY_TYPE} STREQUAL SHARED)
|
||||
|
||||
## Installation and packaging
|
||||
if(DEFINED CPACK_PACKAGING_INSTALL_PREFIX)
|
||||
get_filename_component(DEST_NAME ${CPACK_PACKAGING_INSTALL_PREFIX} NAME)
|
||||
get_filename_component(DEST_DIR ${CPACK_PACKAGING_INSTALL_PREFIX} DIRECTORY)
|
||||
set(CPACK_PACKAGING_INSTALL_PREFIX ${DEST_DIR})
|
||||
endif()
|
||||
message("-----------Dest-name: ${DEST_NAME}")
|
||||
message("------Install-prefix: ${CMAKE_INSTALL_PREFIX}")
|
||||
message("-----------CPACK-dir: ${CPACK_PACKAGING_INSTALL_PREFIX}")
|
||||
|
||||
## Packaging directives
|
||||
set(CPACK_GENERATOR "DEB" "RPM" "TGZ" CACHE STRING "CPACK GENERATOR DEB;RPM")
|
||||
set(ENABLE_LDCONFIG ON CACHE BOOL "Set library links and caches using ldconfig.")
|
||||
set(CPACK_PACKAGE_NAME "${PROJECT_NAME}")
|
||||
set(CPACK_PACKAGE_VENDOR "Advanced Micro Devices, Inc.")
|
||||
set(CPACK_PACKAGE_VERSION_MAJOR ${PROJECT_VERSION_MAJOR})
|
||||
set(CPACK_PACKAGE_VERSION_MINOR ${PROJECT_VERSION_MINOR})
|
||||
set(CPACK_PACKAGE_VERSION_PATCH ${PROJECT_VERSION_PATCH})
|
||||
set(CPACK_PACKAGE_VERSION "${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}")
|
||||
set(CPACK_PACKAGE_CONTACT "ROCm Profiler Support <dl.ROCm-Profiler.support@amd.com>")
|
||||
set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "AMD ROCTRACER library")
|
||||
set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE")
|
||||
|
||||
if(DEFINED ENV{ROCM_LIBPATCH_VERSION})
|
||||
set(CPACK_PACKAGE_VERSION "${CPACK_PACKAGE_VERSION}.$ENV{ROCM_LIBPATCH_VERSION}")
|
||||
message("Using CPACK_PACKAGE_VERSION ${CPACK_PACKAGE_VERSION}")
|
||||
endif()
|
||||
|
||||
## Install license file
|
||||
install(FILES ${CPACK_RESOURCE_FILE_LICENSE}
|
||||
DESTINATION ${CMAKE_INSTALL_DOCDIR}
|
||||
COMPONENT runtime)
|
||||
|
||||
install(FILES ${CPACK_RESOURCE_FILE_LICENSE}
|
||||
DESTINATION ${CMAKE_INSTALL_DOCDIR}-asan
|
||||
COMPONENT asan)
|
||||
|
||||
## Debian package specific variables
|
||||
if(DEFINED ENV{CPACK_DEBIAN_PACKAGE_RELEASE})
|
||||
set(CPACK_DEBIAN_PACKAGE_RELEASE $ENV{CPACK_DEBIAN_PACKAGE_RELEASE})
|
||||
else()
|
||||
set(CPACK_DEBIAN_PACKAGE_RELEASE "local")
|
||||
endif()
|
||||
message("Using CPACK_DEBIAN_PACKAGE_RELEASE ${CPACK_DEBIAN_PACKAGE_RELEASE}")
|
||||
set(CPACK_DEB_COMPONENT_INSTALL ON)
|
||||
set(CPACK_DEBIAN_FILE_NAME "DEB-DEFAULT")
|
||||
set(CPACK_DEBIAN_RUNTIME_PACKAGE_NAME "${PROJECT_NAME}")
|
||||
set(CPACK_DEBIAN_RUNTIME_PACKAGE_DEPENDS "rocm-core")
|
||||
set(CPACK_DEBIAN_DEV_PACKAGE_NAME "${PROJECT_NAME}-dev")
|
||||
set(CPACK_DEBIAN_DEV_PACKAGE_DEPENDS "${PROJECT_NAME}, hsa-rocr-dev, rocm-core")
|
||||
set(CPACK_DEBIAN_TESTS_PACKAGE_NAME "${PROJECT_NAME}-tests")
|
||||
set(CPACK_DEBIAN_TESTS_PACKAGE_DEPENDS "${PROJECT_NAME}-dev")
|
||||
# Debian package specific variable for ASAN
|
||||
set(CPACK_DEBIAN_ASAN_PACKAGE_NAME "${PROJECT_NAME}-asan" )
|
||||
set(CPACK_DEBIAN_ASAN_PACKAGE_DEPENDS "rocm-core-asan" )
|
||||
|
||||
## RPM package specific variables
|
||||
if(DEFINED ENV{CPACK_RPM_PACKAGE_RELEASE})
|
||||
set(CPACK_RPM_PACKAGE_RELEASE $ENV{CPACK_RPM_PACKAGE_RELEASE})
|
||||
else()
|
||||
set(CPACK_RPM_PACKAGE_RELEASE "local")
|
||||
endif()
|
||||
message("Using CPACK_RPM_PACKAGE_RELEASE ${CPACK_RPM_PACKAGE_RELEASE}")
|
||||
|
||||
set(CPACK_RPM_PACKAGE_LICENSE "MIT")
|
||||
|
||||
## 'dist' breaks manual builds on debian systems due to empty Provides
|
||||
execute_process(COMMAND rpm --eval %{?dist}
|
||||
RESULT_VARIABLE PROC_RESULT
|
||||
OUTPUT_VARIABLE EVAL_RESULT
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
message("RESULT_VARIABLE ${PROC_RESULT} OUTPUT_VARIABLE: ${EVAL_RESULT}")
|
||||
|
||||
if(PROC_RESULT EQUAL "0" AND NOT EVAL_RESULT STREQUAL "")
|
||||
string(APPEND CPACK_RPM_PACKAGE_RELEASE "%{?dist}")
|
||||
endif()
|
||||
set(CPACK_RPM_COMPONENT_INSTALL ON)
|
||||
set(CPACK_RPM_FILE_NAME "RPM-DEFAULT")
|
||||
set(CPACK_RPM_RUNTIME_PACKAGE_NAME "${PROJECT_NAME}")
|
||||
set(CPACK_RPM_RUNTIME_PACKAGE_REQUIRES "rocm-core")
|
||||
set(CPACK_RPM_DEV_PACKAGE_NAME "${PROJECT_NAME}-devel")
|
||||
set(CPACK_RPM_DEV_PACKAGE_REQUIRES "${PROJECT_NAME}, rocm-core")
|
||||
set(CPACK_RPM_DEV_PACKAGE_PROVIDES "${PROJECT_NAME}-dev")
|
||||
set(CPACK_RPM_DEV_PACKAGE_OBSOLETES "${PROJECT_NAME}-dev")
|
||||
set(CPACK_RPM_TESTS_PACKAGE_NAME "${PROJECT_NAME}-tests")
|
||||
set(CPACK_RPM_TESTS_PACKAGE_REQUIRES "${PROJECT_NAME}-devel, rocm-llvm-devel")
|
||||
message("CPACK_RPM_PACKAGE_RELEASE: ${CPACK_RPM_PACKAGE_RELEASE}")
|
||||
# RPM package specific variable for ASAN
|
||||
set(CPACK_RPM_ASAN_PACKAGE_NAME "${PROJECT_NAME}-asan" )
|
||||
set(CPACK_RPM_ASAN_PACKAGE_REQUIRES "rocm-core-asan" )
|
||||
|
||||
#Disable build id for rocprofiler as its creating transaction error
|
||||
set ( CPACK_RPM_SPEC_MORE_DEFINE "%define _build_id_links none
|
||||
%global __strip ${CPACK_STRIP_EXECUTABLE}
|
||||
%global __objdump ${CPACK_OBJDUMP_EXECUTABLE}
|
||||
%global __objcopy ${CPACK_OBJCOPY_EXECUTABLE}
|
||||
%global __readelf ${CPACK_READELF_EXECUTABLE}")
|
||||
|
||||
if(NOT ROCM_DEP_ROCMCORE)
|
||||
string(REGEX REPLACE ",? ?rocm-core" "" CPACK_RPM_RUNTIME_PACKAGE_REQUIRES ${CPACK_RPM_RUNTIME_PACKAGE_REQUIRES})
|
||||
string(REGEX REPLACE ",? ?rocm-core" "" CPACK_RPM_DEV_PACKAGE_REQUIRES ${CPACK_RPM_DEV_PACKAGE_REQUIRES})
|
||||
string(REGEX REPLACE ",? ?rocm-core-asan" "" CPACK_RPM_ASAN_PACKAGE_REQUIRES ${CPACK_RPM_ASAN_PACKAGE_REQUIRES})
|
||||
string(REGEX REPLACE ",? ?rocm-core" "" CPACK_DEBIAN_RUNTIME_PACKAGE_DEPENDS ${CPACK_DEBIAN_RUNTIME_PACKAGE_DEPENDS})
|
||||
string(REGEX REPLACE ",? ?rocm-core" "" CPACK_DEBIAN_DEV_PACKAGE_DEPENDS ${CPACK_DEBIAN_DEV_PACKAGE_DEPENDS})
|
||||
string(REGEX REPLACE ",? ?rocm-core-asan" "" CPACK_DEBIAN_ASAN_PACKAGE_DEPENDS ${CPACK_DEBIAN_ASAN_PACKAGE_DEPENDS})
|
||||
endif()
|
||||
|
||||
if(ENABLE_ASAN_PACKAGING)
|
||||
# ASAN Package requires asan component with only libraries and license file
|
||||
set(CPACK_COMPONENTS_ALL asan)
|
||||
else()
|
||||
set(CPACK_COMPONENTS_ALL runtime dev tests)
|
||||
endif()
|
||||
|
||||
include(CPack)
|
||||
|
||||
cpack_add_component(runtime
|
||||
DISPLAY_NAME "Runtime"
|
||||
DESCRIPTION "Dynamic libraries for the ROCtracer")
|
||||
|
||||
cpack_add_component(dev
|
||||
DISPLAY_NAME "Devel"
|
||||
DESCRIPTION "Header files and documentation for ROCtracer")
|
||||
|
||||
cpack_add_component(tests
|
||||
DISPLAY_NAME "Tests"
|
||||
DESCRIPTION "Tests for the ROCtracer"
|
||||
DEPENDS runtime)
|
||||
|
||||
cpack_add_component(asan
|
||||
DISPLAY_NAME "ASAN"
|
||||
DESCRIPTION "ASAN libraries for the ROCtracer")
|
||||
|
||||
endif()
|
||||
|
||||
find_package(Doxygen)
|
||||
if(DOXYGEN_FOUND)
|
||||
## Set input and output files
|
||||
set(DOXYGEN_IN ${CMAKE_CURRENT_SOURCE_DIR}/doc/Doxyfile.in)
|
||||
set(DOXYGEN_OUT ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile)
|
||||
|
||||
## Request to configure the file
|
||||
configure_file(${DOXYGEN_IN} ${DOXYGEN_OUT} @ONLY)
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/doc/html/index.html ${CMAKE_CURRENT_BINARY_DIR}/doc/latex/refman.pdf
|
||||
COMMAND ${DOXYGEN_EXECUTABLE} ${DOXYGEN_OUT}
|
||||
COMMAND make -C ${CMAKE_CURRENT_BINARY_DIR}/doc/latex pdf
|
||||
MAIN_DEPENDENCY ${DOXYGEN_OUT} ${DOXYGEN_IN}
|
||||
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/inc/roctracer.h ${CMAKE_CURRENT_SOURCE_DIR}/inc/roctracer_plugin.h
|
||||
COMMENT "Generating documentation")
|
||||
|
||||
add_custom_target(doc DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/doc/html/index.html
|
||||
${CMAKE_CURRENT_BINARY_DIR}/doc/latex/refman.pdf)
|
||||
|
||||
install(FILES
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/doc/latex/refman.pdf"
|
||||
DESTINATION ${CMAKE_INSTALL_DOCDIR}
|
||||
RENAME "roctracer.pdf"
|
||||
OPTIONAL
|
||||
COMPONENT dev)
|
||||
|
||||
install(DIRECTORY
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/doc/html/"
|
||||
DESTINATION ${CMAKE_INSTALL_DATADIR}/html/${PROJECT_NAME}
|
||||
OPTIONAL
|
||||
COMPONENT dev)
|
||||
endif()
|
||||
@@ -0,0 +1 @@
|
||||
* @ammarwa @bgopesh
|
||||
@@ -0,0 +1,20 @@
|
||||
Copyright (c) 2018-2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||
[MITx11 License]
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
@@ -0,0 +1,171 @@
|
||||
# ROC-tracer
|
||||
|
||||
> [!IMPORTANT]
|
||||
We are phasing out development and support for roctracer/rocprofiler/rocprof/rocprofv2 in favor of rocprofiler-sdk/rocprofv3 in upcoming ROCm releases. Going forward, only critical defect fixes will be addressed for older versions of profiling tools and libraries. We encourage all users to upgrade to the latest version, rocprofiler-sdk library and rocprofv3 tool, to ensure continued support and access to new features.
|
||||
|
||||
> [!NOTE]
|
||||
> The published documentation is available at [ROCTracer](https://rocm.docs.amd.com/projects/roctracer/en/latest/index.html) in an organized, easy-to-read format, with search and a table of contents.
|
||||
|
||||
- **ROC-tracer library: Runtimes Generic Callback/Activity APIs**
|
||||
|
||||
The goal of the implementation is to provide a generic independent from specific runtime profiler to trace API and asynchronous activity.
|
||||
|
||||
The API provides functionality for registering the runtimes API callbacks and asynchronous activity records pool support.
|
||||
|
||||
- **ROC-TX library: Code Annotation Events API**
|
||||
|
||||
Includes API for:
|
||||
|
||||
- `roctxMark`
|
||||
- `roctxRangePush`
|
||||
- `roctxRangePop`
|
||||
|
||||
## Usage
|
||||
|
||||
### `rocTracer` API
|
||||
|
||||
To use the rocTracer API you need the API header and to link your application with `roctracer` .so library:
|
||||
|
||||
- `/opt/rocm/include/roctracer/roctracer.h`
|
||||
|
||||
API header.
|
||||
|
||||
- `/opt/rocm/lib/libroctracer64.so`
|
||||
|
||||
.so library.
|
||||
|
||||
### `rocTX` API
|
||||
|
||||
To use the rocTX API you need the API header and to link your application with `roctx` .so library:
|
||||
|
||||
- `/opt/rocm/include/roctracer/roctx.h`
|
||||
|
||||
API header.
|
||||
|
||||
- `/opt/rocm/lib/libroctx64.so`
|
||||
|
||||
.so library.
|
||||
|
||||
## Library source tree
|
||||
|
||||
- `doc`
|
||||
|
||||
Documentation.
|
||||
|
||||
- `inc`
|
||||
|
||||
Include header files.
|
||||
|
||||
- `roctracer.h`
|
||||
|
||||
`rocTracer` library public API header.
|
||||
|
||||
- `roctx.h`
|
||||
|
||||
`rocTX` library public API header.
|
||||
|
||||
- `src`
|
||||
|
||||
Library sources.
|
||||
|
||||
- `core`
|
||||
|
||||
`rocTracer` library API sources.
|
||||
|
||||
- `roctx`
|
||||
|
||||
`rocTX` library API sources.
|
||||
|
||||
- `util`
|
||||
|
||||
Library utils sources.
|
||||
|
||||
- `test`
|
||||
|
||||
Test suit.
|
||||
|
||||
- `MatrixTranspose`
|
||||
|
||||
Test based on HIP MatrixTranspose sample.
|
||||
|
||||
## Documentation
|
||||
|
||||
- API description:
|
||||
- ['roctracer' / 'rocTX' profiling C API specification](doc/roctracer_spec.md)
|
||||
- Code examples:
|
||||
- [HIP API ops, GPU Activity Tracing](doc/roctracer_spec.md#41-hip-api-ops-gpu-activity-tracing)
|
||||
- [MatrixTranspose HIP sample with all APIs/activity tracing enabled](doc/roctracer_spec.md#42-matrixtranspose-hip-sample-with-all-apisactivity-tracing-enabled)
|
||||
|
||||
## Build and run tests
|
||||
|
||||
- ROCm is required
|
||||
|
||||
- Packages required:
|
||||
|
||||
1. For Ubuntu 18.04 and Ubuntu 20.04 the following adds the needed packages:
|
||||
|
||||
````shell
|
||||
apt install python3 python3-pip gcc g++ libatomic1 make rocm-llvm-dev \
|
||||
cmake doxygen graphviz texlive-full
|
||||
````
|
||||
|
||||
2. For CentOS 8.1 and RHEL 8.1 the following adds the needed packages:
|
||||
|
||||
````shell
|
||||
yum install -y python3 python3-pip gcc gcc-g++ make rocm-llvm-devel \
|
||||
cmake libatomic doxygen graphviz texlive \
|
||||
texlive-xtab texlive-multirow texlive-sectsty \
|
||||
texlive-tocloft texlive-tabu texlive-adjustbox
|
||||
````
|
||||
|
||||
3. For SLES 15 Service Pack 15 the following adds the needed packages:
|
||||
|
||||
````shell
|
||||
zypper in python3 python3-pip gcc gcc-g++ make rocm-llvm-devel \
|
||||
cmake libatomic doxygen graphviz \
|
||||
texlive-scheme-medium texlive-hanging texlive-stackengine \
|
||||
texlive-tocloft texlive-etoc texlive-tabu
|
||||
````
|
||||
|
||||
- Python modules requirements: `CppHeaderParser`, `argparse`.
|
||||
|
||||
To install:
|
||||
|
||||
```sh
|
||||
pip3 install CppHeaderParser argparse
|
||||
```
|
||||
|
||||
- Clone development branch of `roctracer`:
|
||||
|
||||
```sh
|
||||
git clone -b amd-master https://github.com/ROCm-Developer-Tools/roctracer
|
||||
```
|
||||
|
||||
- To build `roctracer` library:
|
||||
|
||||
```sh
|
||||
cd <your path>/roctracer
|
||||
./build.sh
|
||||
```
|
||||
|
||||
- To build and run test:
|
||||
|
||||
```sh
|
||||
cd <your path>/roctracer/build
|
||||
make mytest
|
||||
run.sh
|
||||
```
|
||||
|
||||
## Installation
|
||||
|
||||
Install by:
|
||||
|
||||
```sh
|
||||
make install
|
||||
```
|
||||
|
||||
or:
|
||||
|
||||
```sh
|
||||
make package && dpkg -i *.deb
|
||||
```
|
||||
Inrite
@@ -0,0 +1,75 @@
|
||||
#!/bin/bash -e
|
||||
|
||||
################################################################################
|
||||
# Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to
|
||||
# deal in the Software without restriction, including without limitation the
|
||||
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
# sell copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
################################################################################
|
||||
|
||||
SRC_DIR=`dirname $0`
|
||||
COMPONENT="roctracer"
|
||||
ROCM_PATH="${ROCM_PATH:=/opt/rocm}"
|
||||
LD_RUNPATH_FLAG=" -Wl,--enable-new-dtags -Wl,--rpath,$ROCM_PATH/lib:$ROCM_PATH/lib64"
|
||||
DEFAULTS=defaults.sh
|
||||
|
||||
fatal() {
|
||||
echo "$1"
|
||||
exit 1
|
||||
}
|
||||
|
||||
umask 022
|
||||
|
||||
if [ -e "$DEFAULTS" ] ; then source "$DEFAULTS"; fi
|
||||
|
||||
if [ -z "$ROCTRACER_ROOT" ]; then ROCTRACER_ROOT=$SRC_DIR; fi
|
||||
if [ -z "$BUILD_DIR" ] ; then BUILD_DIR=$PWD; fi
|
||||
if [ -z "$BUILD_TYPE" ] ; then BUILD_TYPE="release"; fi
|
||||
if [ -z "$PACKAGE_ROOT" ] ; then PACKAGE_ROOT=$ROCM_PATH; fi
|
||||
if [ -z "$PACKAGE_PREFIX" ] ; then PACKAGE_PREFIX="$ROCM_PATH/$COMPONENT"; fi
|
||||
if [ -z "$PREFIX_PATH" ] ; then PREFIX_PATH=$PACKAGE_ROOT; fi
|
||||
if [ -z "$HIP_VDI" ] ; then HIP_VDI=0; fi
|
||||
if [ -n "$ROCM_RPATH" ] ; then LD_RUNPATH_FLAG=" -Wl,--enable-new-dtags -Wl,--rpath,${ROCM_RPATH}"; fi
|
||||
if [ -z "$GPU_LIST" ] ; then GPU_LIST="gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030 gfx1100 gfx1101 gfx1102"; fi
|
||||
|
||||
ROCTRACER_ROOT=$(cd $ROCTRACER_ROOT && echo $PWD)
|
||||
|
||||
if [ "$TO_CLEAN" = "yes" ] ; then rm -rf $BUILD_DIR; fi
|
||||
mkdir -p $BUILD_DIR
|
||||
pushd $BUILD_DIR
|
||||
|
||||
cmake \
|
||||
-DCMAKE_MODULE_PATH=$ROCM_PATH/hip/cmake \
|
||||
-DCMAKE_BUILD_TYPE=$BUILD_TYPE \
|
||||
-DCMAKE_PREFIX_PATH="$PREFIX_PATH" \
|
||||
-DCMAKE_INSTALL_PREFIX=$PACKAGE_ROOT \
|
||||
-DCPACK_PACKAGING_INSTALL_PREFIX=$PACKAGE_PREFIX \
|
||||
-DCPACK_GENERATOR="${CPACKGEN:-"DEB;RPM"}" \
|
||||
-DCMAKE_SHARED_LINKER_FLAGS="$LD_RUNPATH_FLAG" \
|
||||
-DGPU_TARGETS="$GPU_LIST" \
|
||||
-DCPACK_OBJCOPY_EXECUTABLE="${PACKAGE_ROOT}/llvm/bin/llvm-objcopy" \
|
||||
-DCPACK_READELF_EXECUTABLE="${PACKAGE_ROOT}/llvm/bin/llvm-readelf" \
|
||||
-DCPACK_STRIP_EXECUTABLE="${PACKAGE_ROOT}/llvm/bin/llvm-strip" \
|
||||
-DCPACK_OBJDUMP_EXECUTABLE="${PACKAGE_ROOT}/llvm/bin/llvm-objdump" \
|
||||
$ROCTRACER_ROOT
|
||||
|
||||
make
|
||||
make mytest
|
||||
make package
|
||||
|
||||
exit 0
|
||||
@@ -0,0 +1,66 @@
|
||||
#!/bin/bash -x
|
||||
|
||||
################################################################################
|
||||
# Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to
|
||||
# deal in the Software without restriction, including without limitation the
|
||||
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
# sell copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
################################################################################
|
||||
|
||||
SRC_DIR=`dirname $0`
|
||||
COMPONENT="roctracer"
|
||||
ROCM_PATH="${ROCM_PATH:=/opt/rocm}"
|
||||
LD_RUNPATH_FLAG=" -Wl,--enable-new-dtags -Wl,--rpath,$ROCM_PATH/lib:$ROCM_PATH/lib64"
|
||||
DEFAULTS=defaults.sh
|
||||
|
||||
fatal() {
|
||||
echo "$1"
|
||||
exit 1
|
||||
}
|
||||
|
||||
umask 022
|
||||
|
||||
if [ -e "$DEFAULTS" ] ; then source "$DEFAULTS"; fi
|
||||
|
||||
if [ -z "$ROCTRACER_ROOT" ]; then ROCTRACER_ROOT=$SRC_DIR; fi
|
||||
if [ -z "$BUILD_DIR" ] ; then BUILD_DIR=$PWD; fi
|
||||
if [ -z "$BUILD_TYPE" ] ; then BUILD_TYPE="release"; fi
|
||||
if [ -z "$PACKAGE_ROOT" ] ; then PACKAGE_ROOT=$ROCM_PATH; fi
|
||||
if [ -z "$PACKAGE_PREFIX" ] ; then PACKAGE_PREFIX="$ROCM_PATH/$COMPONENT"; fi
|
||||
if [ -z "$PREFIX_PATH" ] ; then PREFIX_PATH=$PACKAGE_ROOT; fi
|
||||
if [ -z "$HIP_VDI" ] ; then HIP_VDI=0; fi
|
||||
if [ -n "$ROCM_RPATH" ] ; then LD_RUNPATH_FLAG=" -Wl,--enable-new-dtags -Wl,--rpath,${ROCM_RPATH}"; fi
|
||||
|
||||
ROCTRACER_ROOT=$(cd $ROCTRACER_ROOT && echo $PWD)
|
||||
|
||||
if [ "$TO_CLEAN" = "yes" ] ; then rm -rf $BUILD_DIR; fi
|
||||
mkdir -p $BUILD_DIR
|
||||
pushd $BUILD_DIR
|
||||
|
||||
cmake \
|
||||
-DCMAKE_MODULE_PATH=$ROCM_PATH/hip/cmake \
|
||||
-DCMAKE_BUILD_TYPE=$BUILD_TYPE \
|
||||
-DCMAKE_PREFIX_PATH="$PREFIX_PATH" \
|
||||
-DCMAKE_INSTALL_PREFIX=$PACKAGE_ROOT \
|
||||
-DCMAKE_SHARED_LINKER_FLAGS="$LD_RUNPATH_FLAG" \
|
||||
-DLIBRARY_TYPE=STATIC \
|
||||
$ROCTRACER_ROOT
|
||||
|
||||
make
|
||||
|
||||
exit 0
|
||||
@@ -0,0 +1,24 @@
|
||||
################################################################################
|
||||
# Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to
|
||||
# deal in the Software without restriction, including without limitation the
|
||||
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
# sell copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
################################################################################
|
||||
|
||||
BUILD_DIR=build
|
||||
TO_CLEAN=yes
|
||||
Tá difríocht comhad cosc orthu toisc go bhfuil sé ró-mhór
Difríocht Luchtaigh
@@ -0,0 +1,754 @@
|
||||
# ROC Tracer / ROC-TX Libraries Specification
|
||||
```
|
||||
ROC Tracer API version 2
|
||||
ROC-TX API version 1
|
||||
|
||||
- The rocTracer API is agnostic to specific runtime and may trace
|
||||
the runtime API calls and asynchronous GPU activity.
|
||||
- The rocTX API is provided for application code annotation.
|
||||
```
|
||||
## 1. High level overview
|
||||
```
|
||||
The goal of the implementation is to provide a runtime independent API
|
||||
for tracing of runtime calls and asynchronous activity, like GPU kernel
|
||||
dispatches and memory moves. The tracing includes callback API for
|
||||
runtime API tracing and activity API for asynchronous activity records
|
||||
logging.
|
||||
|
||||
Depending on particular runtime intercepting mechanism, the rocTracer
|
||||
library can be dynamically linked, dynamically loaded by the runtime as
|
||||
a plugin or some API wrapper can be loaded using LD_PRELOAD.
|
||||
The library has a C API.
|
||||
|
||||
The rocTracer library is an API that intercepts runtime API calls and
|
||||
traces asynchronous activity. The activity tracing results are recorded
|
||||
in a ring buffer.
|
||||
|
||||
The rocTX contains application code instrumentation API to support high
|
||||
level correlation of runtime API/activity events. The API includes mark
|
||||
and nested ranges.
|
||||
```
|
||||
## 2. General API
|
||||
### 2.1. Description
|
||||
```
|
||||
The library supports method for getting the error number and error string
|
||||
of the last failed library API call. It allows to check the conformance
|
||||
of used library API header and the library binary, the version macros and
|
||||
API methods can be used.
|
||||
|
||||
Returning the error and error string methods:
|
||||
• roctracer_status_t – error code enumeration
|
||||
• roctracer_error_string – method for returning the error string
|
||||
|
||||
Library version:
|
||||
• ROCTRACER_VERSION_MAJOR – API major version macro
|
||||
• ROCTRACER_VERSION_MINOR – API minor version macro
|
||||
• roctracer_version_major – library major version
|
||||
• roctracer_version_minor – library minor version
|
||||
```
|
||||
### 2.2. Error codes and error string methods
|
||||
```
|
||||
Error code enumeration:
|
||||
typedef enum {
|
||||
ROCTRACER_STATUS_SUCCESS = 0,
|
||||
ROCTRACER_STATUS_ERROR = 1,
|
||||
ROCTRACER_STATUS_UNINIT = 2,
|
||||
ROCTRACER_STATUS_BREAK = 3,
|
||||
ROCTRACER_STATUS_BAD_DOMAIN = 4,
|
||||
ROCTRACER_STATUS_BAD_PARAMETER = 5,
|
||||
ROCTRACER_STATUS_HIP_API_ERR = 6,
|
||||
ROCTRACER_STATUS_HCC_OPS_ERR = 7,
|
||||
ROCTRACER_STATUS_ROCTX_ERR = 8,
|
||||
} roctracer_status_t;
|
||||
|
||||
Return error string:
|
||||
const char* roctracer_error_string();
|
||||
```
|
||||
### 2.3. Library version
|
||||
```
|
||||
The library provides major and minor versions. Major version is for
|
||||
incompatible API changes and minor version for bug fixes.
|
||||
|
||||
API version macros defined in the library API header ‘roctracer.h’:
|
||||
ROCTRACER_VERSION_MAJOR
|
||||
ROCTRACER_VERSION_MINOR
|
||||
|
||||
Methods to check library major and minor venison:
|
||||
uint32_t roctracer_major_version();
|
||||
uint32_t roctracer_minor_version();
|
||||
```
|
||||
## 3. Frontend API
|
||||
### 3.1. Description
|
||||
```
|
||||
The rocTracer provides support for runtime API callbacks and activity
|
||||
records logging. The APIs of different runtimes at different levels
|
||||
are considered as different API domains with assigned domain IDs. For
|
||||
example, language level and driver level. The API callbacks provide
|
||||
the API calls arguments and are called on two phases on “enter” and
|
||||
on “exit”. The activity records are logged to the ring buffer and can
|
||||
be associated with the respective API calls using the correlation ID.
|
||||
Activity API can be used to enable collecting of the records with
|
||||
timestamping data for API calls and asynchronous activity like the
|
||||
kernel submits, memory copies and barriers
|
||||
|
||||
Tracing domains:
|
||||
• roctracer_domain_t – runtime API domains, HIP, HSA, etc…
|
||||
• roctracer_op_string – Return Op string by given domain and
|
||||
activity Op code
|
||||
• roctracer_op_code – Return Op code and kind by given string
|
||||
|
||||
Callback API:
|
||||
• roctracer_rtapi_callback_t – runtime API callback type
|
||||
• roctracer_enable_op_callback – enable runtime API callback
|
||||
by domain and Op code
|
||||
• roctracer_enable_domain_callback – enable runtime API callback
|
||||
by domain for all Ops
|
||||
• roctracer_enable_callback – enable runtime API callback for
|
||||
all domains, all Ops
|
||||
• roctracer_disable_op_callback – disable runtime API callback
|
||||
by domain and Op code
|
||||
• roctracer_enable_op_callback – enable runtime API callback
|
||||
by domain for all Ops
|
||||
• roctracer_enable_op_callback – enable runtime API callback for
|
||||
all domains, all Ops
|
||||
|
||||
Activity API:
|
||||
• roctracer_record_t – activity record
|
||||
• roctracer_pool_t – records pool type
|
||||
• roctracer_allocator_t – tracer allocator type
|
||||
• roctracer_buffer_callback_t – pool callback type
|
||||
• roctracer_open_pool[_expl] – create records pool
|
||||
• roctracer_close_pool[_expl] – close records pool
|
||||
• roctracer_default_pool[_expl] – get/set default pool
|
||||
• roctracer_properties_t – tracer properties
|
||||
• roctracer_enable_op_activity[_expl] – enable activity records logging
|
||||
• roctracer_enable_domain_activity[_expl] – enable activity records logging
|
||||
• roctracer_enable_activity[_expl] – enable activity records logging
|
||||
• roctracer_disable_op_activity – disable activity records logging
|
||||
• roctracer_disable_domain_activity – disable activity records logging
|
||||
• roctracer_disable_activity – disable activity records logging
|
||||
• roctracer_flush_activity[_expl] – disable activity records logging
|
||||
• roctracer_next_record – return next record
|
||||
• roctracer_get_timestamp – return correlated GPU/CPU system timestamp
|
||||
|
||||
External correlation ID API:
|
||||
• roctracer_activity_push_external_correlation_id - push an external
|
||||
correlation id for the calling thread
|
||||
• roctracer_activity_pop_external_correlation_id - pop an external
|
||||
correlation id for the calling thread
|
||||
|
||||
Tracing control API:
|
||||
• roctracer_start – tracing start
|
||||
• roctracer_stop – tracer stop
|
||||
|
||||
```
|
||||
### 3.2. Tracing Domains
|
||||
```
|
||||
Various tracing domains are supported. Each domain is assigned with
|
||||
a domain ID. The domains include HSA, HIP runtime levels.
|
||||
|
||||
Traced API domains:
|
||||
typedef enum {
|
||||
ACTIVITY_DOMAIN_HSA_API = 0, // HSA API domain
|
||||
ACTIVITY_DOMAIN_HSA_OPS = 1, // HSA async activity domain
|
||||
ACTIVITY_DOMAIN_HIP_API = 2, // HIP API domain
|
||||
ACTIVITY_DOMAIN_HIP_OPS = 3, // HIP async activity domain
|
||||
ACTIVITY_DOMAIN_KFD_API = 4, // KFD API domain
|
||||
ACTIVITY_DOMAIN_EXT_API = 5, // External ID domain
|
||||
ACTIVITY_DOMAIN_ROCTX = 6, // ROCTX domain
|
||||
ACTIVITY_DOMAIN_NUMBER = 7
|
||||
} activity_domain_t;
|
||||
|
||||
Return name by given domain and Op code:
|
||||
const char* roctracer_op_string( // NULL returned on error and error number is set
|
||||
uint32_t domain, // tracing domain
|
||||
uint32_t op, // activity op code
|
||||
uint32_t kind); // activity kind
|
||||
Return Op code and kind by given string:
|
||||
roctracer_status_t roctracer_op_code(
|
||||
uint32_t domain, // tracing domain
|
||||
const char* str, // [in] op string
|
||||
uint32_t* op, // [out] op code
|
||||
uint32_t* kind); // [out] op kind code if not NULL
|
||||
```
|
||||
### 3.3. Callback API
|
||||
```
|
||||
The tracer provides support for runtime API callbacks and activity records
|
||||
logging. The API callbacks provide the API calls arguments and are called
|
||||
on two phases on “enter”, on “exit”.
|
||||
|
||||
API phase passed to the callbacks:
|
||||
typedef enum {
|
||||
ROCTRACER_API_PHASE_ENTER,
|
||||
ROCTRACER_API_PHASE_EXIT,
|
||||
} roctracer_api_phase_t;
|
||||
|
||||
Runtime API callback type:
|
||||
typedef void (*roctracer_rtapi_callback_t)(
|
||||
uint32_t domain, // runtime API domain
|
||||
uint32_t cid, // API call ID
|
||||
const void* data, // [in] callback data with correlation id and the call
|
||||
// arguments
|
||||
void* arg); // [in/out] user passed data
|
||||
|
||||
Enable runtime API callbacks:
|
||||
roctracer_status_t roctracer_enable_op_callback(
|
||||
activity_domain_t domain, // tracing domain
|
||||
uint32_t op, // API call ID
|
||||
activity_rtapi_callback_t callback, // callback function pointer
|
||||
void* arg); // [in/out] callback arg
|
||||
|
||||
roctracer_status_t roctracer_enable_domain_callback(
|
||||
activity_domain_t domain, // tracing domain
|
||||
activity_rtapi_callback_t callback, // callback function pointer
|
||||
void* arg); // [in/out] callback arg
|
||||
|
||||
|
||||
roctracer_status_t roctracer_enable_callback(
|
||||
activity_rtapi_callback_t callback, // callback function pointer
|
||||
void* arg); // [in/out] callback arg
|
||||
|
||||
Disable runtime API callbacks:
|
||||
roctracer_status_t roctracer_disable_op_callback(
|
||||
activity_domain_t domain, // tracing domain
|
||||
uint32_t op); // API call ID
|
||||
|
||||
roctracer_status_t roctracer_disable_domain_callback(
|
||||
activity_domain_t domain); // tracing domain
|
||||
|
||||
roctracer_status_t roctracer_disable_callback();
|
||||
```
|
||||
|
||||
### 3.4 Activity API
|
||||
|
||||
The activity records are asynchronously logged to the pool and can be
|
||||
associated with the respective API callbacks using the correlation ID.
|
||||
Activity API can be used to enable collecting the records with
|
||||
timestamp data for API calls and GPU activity like kernel submits,
|
||||
memory copies, and barriers.
|
||||
|
||||
```
|
||||
// Correlation id
|
||||
typedef uint64_t activity_correlation_id_t;
|
||||
|
||||
Activity record type:
|
||||
|
||||
// Activity record type
|
||||
struct activity_record_t {
|
||||
uint32_t domain; // activity domain id
|
||||
activity_kind_t kind; // activity kind
|
||||
activity_op_t op; // activity op
|
||||
activity_correlation_id_t correlation_id; // activity ID
|
||||
uint64_t begin_ns; // host begin timestamp
|
||||
uint64_t end_ns; // host end timestamp
|
||||
union {
|
||||
struct {
|
||||
int device_id; // device id
|
||||
uint64_t queue_id; // queue id
|
||||
};
|
||||
struct {
|
||||
uint32_t process_id; // device id
|
||||
uint32_t thread_id; // thread id
|
||||
};
|
||||
struct {
|
||||
activity_correlation_id_t external_id; // external correlation id
|
||||
};
|
||||
};
|
||||
size_t bytes; // data size bytes
|
||||
};
|
||||
```
|
||||
|
||||
> [!NOTE]
|
||||
> rocprofiler reported device ids are in sync with node-ids reported by KFD(kernel). This can easily be verified by `rocm-smi` under `Node`.
|
||||
> Please also note that this device id might not be in sync with the ones provided by `hipGetDeviceProperties` which includes CPU agents and starts from 0.
|
||||
|
||||
```
|
||||
Return next record:
|
||||
static inline int roctracer_next_record(
|
||||
const activity_record_t* record, // [in] record ptr
|
||||
const activity_record_t** next); // [out] next record ptr
|
||||
|
||||
Tracer allocator type:
|
||||
typedef void (*roctracer_allocator_t)(
|
||||
char** ptr, // memory pointer
|
||||
size_t size, // memory size
|
||||
void* arg); // allocator arg
|
||||
|
||||
Pool callback type:
|
||||
typedef void (*roctracer_buffer_callback_t)(
|
||||
const char* begin, // [in] available buffered trace records
|
||||
const char* end, // [in] end of buffered trace records
|
||||
void* arg); // [in/out] callback arg
|
||||
|
||||
Tracer properties:
|
||||
typedef struct {
|
||||
uint32_t mode; // roctracer mode
|
||||
size_t buffer_size; // buffer size
|
||||
// power of 2
|
||||
roctracer_allocator_t alloc_fun; // memory allocator
|
||||
// function pointer
|
||||
void* alloc_arg; // memory allocator
|
||||
// function pointer
|
||||
roctracer_buffer_callback_t buffer_callback_fun; // tracer record
|
||||
// callback function
|
||||
void* buffer_callback_arg; // tracer record
|
||||
// callback arg
|
||||
} roctracer_properties_t;
|
||||
|
||||
Tracer memory pool handle type:
|
||||
typedef void roctracer_pool_t;
|
||||
|
||||
Create tracer memory pool:
|
||||
roctracer_status_t roctracer_open_pool(
|
||||
const roctracer_properties_t* properties); // tracer pool properties
|
||||
|
||||
roctracer_status_t roctracer_open_pool_expl(
|
||||
const roctracer_properties_t* properties, // tracer pool properties
|
||||
roctracer_pool_t** pool); // [out] returns tracer pool if
|
||||
// not NULL, otherwise sets the
|
||||
// default one if it is not set
|
||||
// yet; otherwise the error is
|
||||
// generated
|
||||
|
||||
Close tracer memory pool:
|
||||
roctracer_status_t roctracer_close_pool();
|
||||
|
||||
roctracer_status_t roctracer_close_pool_expl(
|
||||
roctracer_pool_t* pool); // memory pool, NULL means default pool
|
||||
|
||||
Return current default pool. Set new default pool if the argument is not NULL:
|
||||
roctracer_pool_t* roctracer_default_pool();
|
||||
|
||||
roctracer_pool_t* roctracer_default_pool_expl(
|
||||
roctracer_pool_t* pool); // new default pool if not NULL
|
||||
```
|
||||
Enable activity records logging:
|
||||
```
|
||||
roctracer_status_t roctracer_enable_op_activity(
|
||||
activity_domain_t domain, // tracing domain
|
||||
uint32_t op); // activity op ID
|
||||
|
||||
roctracer_status_t roctracer_enable_op_activity_expl(
|
||||
activity_domain_t domain, // tracing domain
|
||||
uint32_t op, // activity op ID
|
||||
roctracer_pool_t* pool); // memory pool, NULL means default pool
|
||||
|
||||
roctracer_status_t roctracer_enable_domain_activity(
|
||||
activity_domain_t domain); // tracing domain
|
||||
|
||||
roctracer_status_t roctracer_enable_domain_activity_expl(
|
||||
activity_domain_t domain, // tracing domain
|
||||
roctracer_pool_t* pool); // memory pool, NULL means default pool
|
||||
|
||||
roctracer_status_t roctracer_enable_activity();
|
||||
|
||||
roctracer_status_t roctracer_enable_activity_expl(
|
||||
roctracer_pool_t* pool); // memory pool, NULL means default pool
|
||||
|
||||
Disable activity records logging:
|
||||
roctracer_status_t roctracer_disable_op_activity(
|
||||
activity_domain_t domain, // tracing domain
|
||||
uint32_t op); // activity op ID
|
||||
|
||||
roctracer_status_t roctracer_disable_domain_activity(
|
||||
activity_domain_t domain); // tracing domain
|
||||
|
||||
roctracer_status_t roctracer_disable_activity();
|
||||
|
||||
Flush available activity records:
|
||||
roctracer_status_t roctracer_flush_activity();
|
||||
|
||||
roctracer_status_t roctracer_flush_activity_expl(
|
||||
roctracer_pool_t* pool); // memory pool, NULL means default pool
|
||||
|
||||
Return correlated GPU/CPU system timestamp:
|
||||
roctracer_status_t roctracer_get_timestamp(
|
||||
uint64_t* timestamp); // [out] return timestamp
|
||||
```
|
||||
External correlation ID API
|
||||
```
|
||||
The API provides activity records to associate rocTracer correlation IDs with
|
||||
IDs provided by external APIs. The external ID records are identified by
|
||||
ACTIVITY_DOMAIN_EXT_API domain value.
|
||||
Using the ‘push’ method an external ID is pushed to a per CPU thread stack and
|
||||
the ‘pop’ method can be used to remove the last pushed ID.
|
||||
An external ID record is inserted before any generated rocTracer activity record
|
||||
if the same CPU external ID stack is non-empty.
|
||||
|
||||
Notifies that the calling thread is entering an external API region.
|
||||
Push an external correlation id for the calling thread.
|
||||
roctracer_status_t roctracer_activity_push_external_correlation_id(
|
||||
activity_correlation_id_t id); // external correlation id
|
||||
|
||||
Notifies that the calling thread is leaving an external API region.
|
||||
Pop an external correlation id for the calling thread.
|
||||
roctracer_status_t roctracer_activity_pop_external_correlation_id(
|
||||
activity_correlation_id_t* last_id); // returns the last external correlation id
|
||||
// if not NULL
|
||||
```
|
||||
Tracing control API
|
||||
```
|
||||
Tracing start:
|
||||
void roctracer_start();
|
||||
|
||||
Tracing stop:
|
||||
void roctracer_stop();
|
||||
```
|
||||
## 4. rocTracer Usage Code Examples
|
||||
### 4.1. HIP API ops, GPU Activity Tracing
|
||||
```
|
||||
#include <roctracer/roctracer_hip.h>
|
||||
|
||||
// HIP API callback function
|
||||
void hip_api_callback(
|
||||
uint32_t domain,
|
||||
uint32_t cid,
|
||||
const void* callback_data,
|
||||
void* arg)
|
||||
{
|
||||
(void)arg;
|
||||
const hip_api_data_t* data = reinterpret_cast <const hip_api_data_t*>
|
||||
(callback_data);
|
||||
fprintf(stdout, "<%s id(%u)\tcorrelation_id(%lu) %s> ",
|
||||
roctracer_id_string(ACTIVITY_DOMAIN_HIP_API, cid),
|
||||
cid,
|
||||
data->correlation_id,
|
||||
(data->phase == ACTIVITY_API_PHASE_ENTER) ? "on-enter" : "on-exit");
|
||||
<some code . . .>
|
||||
}
|
||||
|
||||
// Activity tracing callback
|
||||
void activity_callback(const char* begin, const char* end, void* arg) {
|
||||
const roctracer_record_t* record = reinterpret_cast<const
|
||||
roctracer_record_t*>(begin);
|
||||
const roctracer_record_t* end_record = reinterpret_cast<const
|
||||
roctracer_record_t*>(end);
|
||||
fprintf(stdout, "\tActivity records:\n");
|
||||
while (record < end_record) {
|
||||
const char * name = roctracer_op_string(record->domain,
|
||||
record->activity_id, 0);
|
||||
fprintf(stdout, "\t%s\tcorrelation_id(%lu) time_ns(%lu:%lu)
|
||||
device_id(%d) stream_id(%lu)\n",
|
||||
name,
|
||||
record->correlation_id,
|
||||
record->begin_ns,
|
||||
record->end_ns,
|
||||
record->device_id,
|
||||
record->stream_id
|
||||
);
|
||||
<some code . . .>
|
||||
ROCTRACER_CALL(roctracer_next_record(record, &record));
|
||||
}
|
||||
}
|
||||
|
||||
int main() {
|
||||
// Allocating tracing pool
|
||||
roctracer_properties_t properties{};
|
||||
properties.buffer_size = 12;
|
||||
properties.buffer_callback_fun = activity_callback;
|
||||
ROCTRACER_CALL(roctracer_open_pool(&properties));
|
||||
|
||||
// Enable HIP API callbacks. HIP_API_ID_ANY can be used to trace all HIP
|
||||
// API calls.
|
||||
ROCTRACER_CALL(roctracer_enable_op_callback(ACTIVITY_DOMAIN_HIP_API,
|
||||
HIP_API_ID_hipModuleLaunchKernel,
|
||||
hip_api_callback, NULL));
|
||||
ROCTRACER_CALL(roctracer_enable_op_activity(ACTIVITY_DOMAIN_HIP_API,
|
||||
HIP_API_ID_hipModuleLaunchKernel));
|
||||
// Enable HIP kernel dispatch activity tracing
|
||||
ROCTRACER_CALL(roctracer_enable_op_activity(ACTIVITY_DOMAIN_HIP_OPS,
|
||||
HIP_OP_ID_DISPATCH));
|
||||
|
||||
<test code>
|
||||
|
||||
// Disable tracing and closing the pool
|
||||
ROCTRACER_CALL(roctracer_disable_callback());
|
||||
ROCTRACER_CALL(roctracer_disable_activity());
|
||||
ROCTRACER_CALL(roctracer_close_pool());
|
||||
}
|
||||
```
|
||||
### 4.2. MatrixTranspose HIP sample with all APIs/activity tracing enabled
|
||||
```
|
||||
This shows a MatrixTranspose HIP sample with enabled tracing of
|
||||
all HIP API and all GPU asynchronous activity.
|
||||
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
|
||||
// hip header file
|
||||
#include <hip/hip_runtime.h>
|
||||
|
||||
#ifndef ITERATIONS
|
||||
# define ITERATIONS 100
|
||||
#endif
|
||||
#define WIDTH 1024
|
||||
|
||||
|
||||
#define NUM (WIDTH * WIDTH)
|
||||
|
||||
#define THREADS_PER_BLOCK_X 4
|
||||
#define THREADS_PER_BLOCK_Y 4
|
||||
#define THREADS_PER_BLOCK_Z 1
|
||||
|
||||
// Device (Kernel) function, it must be void
|
||||
// hipLaunchParm provides the execution configuration
|
||||
__global__ void matrixTranspose(hipLaunchParm lp, float* out, float* in,
|
||||
const int width) {
|
||||
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
|
||||
int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
|
||||
|
||||
out[y * width + x] = in[x * width + y];
|
||||
}
|
||||
|
||||
// CPU implementation of matrix transpose
|
||||
void matrixTransposeCPUReference(float* output, float* input, const unsigned
|
||||
int width) {
|
||||
for (unsigned int j = 0; j < width; j++) {
|
||||
for (unsigned int i = 0; i < width; i++) {
|
||||
output[i * width + j] = input[j * width + i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int iterations = ITERATIONS;
|
||||
void start_tracing();
|
||||
void stop_tracing();
|
||||
|
||||
int main() {
|
||||
float* Matrix;
|
||||
float* TransposeMatrix;
|
||||
float* cpuTransposeMatrix;
|
||||
|
||||
float* gpuMatrix;
|
||||
float* gpuTransposeMatrix;
|
||||
|
||||
hipDeviceProp_t devProp;
|
||||
hipGetDeviceProperties(&devProp, 0);
|
||||
|
||||
std::cout << "Device name " << devProp.name << std::endl;
|
||||
|
||||
int i;
|
||||
int errors;
|
||||
|
||||
while (iterations-- > 0) {
|
||||
start_tracing();
|
||||
|
||||
Matrix = (float*)malloc(NUM * sizeof(float));
|
||||
TransposeMatrix = (float*)malloc(NUM * sizeof(float));
|
||||
cpuTransposeMatrix = (float*)malloc(NUM * sizeof(float));
|
||||
|
||||
// initialize the input data
|
||||
for (i = 0; i < NUM; i++) {
|
||||
Matrix[i] = (float)i * 10.0f;
|
||||
}
|
||||
|
||||
// allocate the memory on the device side
|
||||
hipMalloc((void**)&gpuMatrix, NUM * sizeof(float));
|
||||
hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float));
|
||||
|
||||
// Memory transfer from host to device
|
||||
hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float),
|
||||
hipMemcpyHostToDevice);
|
||||
|
||||
// Lauching kernel from host
|
||||
hipLaunchKernel(matrixTranspose,
|
||||
dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH /
|
||||
THREADS_PER_BLOCK_Y),
|
||||
dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0,
|
||||
gpuTransposeMatrix, gpuMatrix, WIDTH);
|
||||
|
||||
// Memory transfer from device to host
|
||||
hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float),
|
||||
hipMemcpyDeviceToHost);
|
||||
|
||||
// CPU MatrixTranspose computation
|
||||
matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH);
|
||||
|
||||
// verify the results
|
||||
errors = 0;
|
||||
double eps = 1.0E-6;
|
||||
for (i = 0; i < NUM; i++) {
|
||||
if (std::abs(TransposeMatrix[i] - cpuTransposeMatrix[i]) > eps) {
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
if (errors != 0) {
|
||||
printf("FAILED: %d errors\n", errors);
|
||||
} else {
|
||||
printf("PASSED!\n");
|
||||
}
|
||||
|
||||
// free the resources on device side
|
||||
hipFree(gpuMatrix);
|
||||
hipFree(gpuTransposeMatrix);
|
||||
|
||||
// free the resources on host side
|
||||
free(Matrix);
|
||||
free(TransposeMatrix);
|
||||
free(cpuTransposeMatrix);
|
||||
|
||||
stop_tracing();
|
||||
}
|
||||
|
||||
return errors;
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// HIP Callbacks/Activity tracing
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
#include <roctracer/roctracer_hip.h>
|
||||
|
||||
// Macro to check ROC-tracer calls status
|
||||
#define ROCTRACER_CALL(call) \
|
||||
do { \
|
||||
int err = call; \
|
||||
if (err != 0) { \
|
||||
std::cerr << roctracer_error_string() << std::endl << std::flush; \
|
||||
abort(); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
// HIP API callback function
|
||||
void hip_api_callback(
|
||||
uint32_t domain,
|
||||
uint32_t cid,
|
||||
const void* callback_data,
|
||||
void* arg)
|
||||
{
|
||||
(void)arg;
|
||||
const hip_api_data_t* data = reinterpret_cast<const hip_api_data_t*>
|
||||
(callback_data);
|
||||
fprintf(stdout, "<%s id(%u)\tcorrelation_id(%lu) %s> ",
|
||||
roctracer_op_string(ACTIVITY_DOMAIN_HIP_API, cid, 0),
|
||||
cid,
|
||||
data->correlation_id,
|
||||
(data->phase == ACTIVITY_API_PHASE_ENTER) ? "on-enter" : "on-exit");
|
||||
if (data->phase == ACTIVITY_API_PHASE_ENTER) {
|
||||
switch (cid) {
|
||||
case HIP_API_ID_hipMemcpy:
|
||||
fprintf(stdout, "dst(%p) src(%p) size(0x%x) kind(%u)",
|
||||
data->args.hipMemcpy.dst,
|
||||
data->args.hipMemcpy.src,
|
||||
(uint32_t)(data->args.hipMemcpy.sizeBytes),
|
||||
(uint32_t)(data->args.hipMemcpy.kind));
|
||||
break;
|
||||
case HIP_API_ID_hipMalloc:
|
||||
fprintf(stdout, "ptr(%p) size(0x%x)",
|
||||
data->args.hipMalloc.ptr,
|
||||
(uint32_t)(data->args.hipMalloc.size));
|
||||
break;
|
||||
case HIP_API_ID_hipFree:
|
||||
fprintf(stdout, "ptr(%p),
|
||||
data->args.hipFree.ptr);
|
||||
break;
|
||||
case HIP_API_ID_hipModuleLaunchKernel:
|
||||
fprintf(stdout, "kernel(\"%s\") stream(%p)",
|
||||
hipKernelNameRef(data->args.hipModuleLaunchKernel.f),
|
||||
data->args.hipModuleLaunchKernel.stream);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
switch (cid) {
|
||||
case HIP_API_ID_hipMalloc:
|
||||
fprintf(stdout, "*ptr(0x%p)",
|
||||
*(data->args.hipMalloc.ptr));
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
fprintf(stdout, "\n"); fflush(stdout);
|
||||
}
|
||||
|
||||
// Activity tracing callback
|
||||
// hipMalloc id(3) correlation_id(1):
|
||||
// begin_ns(1525888652762640464) end_ns(1525888652762877067)
|
||||
void activity_callback(const char* begin, const char* end, void* arg) {
|
||||
const roctracer_record_t* record = reinterpret_cast
|
||||
<const roctracer_record_t*>(begin);
|
||||
const roctracer_record_t* end_record = reinterpret_cast
|
||||
<const roctracer_record_t*>(end);
|
||||
fprintf(stdout, "\tActivity records:\n"); fflush(stdout);
|
||||
while (record < end_record) {
|
||||
const char * name = roctracer_op_string(record->domain,
|
||||
record->activity_id, 0);
|
||||
fprintf(stdout, "\t%s\tcorrelation_id(%lu) time_ns(%lu:%lu) \
|
||||
device_id(%d) stream_id(%lu)",
|
||||
name,
|
||||
record->correlation_id,
|
||||
record->begin_ns,
|
||||
record->end_ns,
|
||||
record->device_id,
|
||||
record->stream_id
|
||||
);
|
||||
if (record->kind == hc::HSA_OP_ID_COPY)
|
||||
fprintf(stdout, " bytes(0x%zx)", record->bytes);
|
||||
fprintf(stdout, "\n");
|
||||
fflush(stdout);
|
||||
ROCTRACER_CALL(roctracer_next_record(record, &record));
|
||||
}
|
||||
}
|
||||
|
||||
// Start tracing routine
|
||||
void start_tracing() {
|
||||
std::cout << "# START #############################" << std::endl
|
||||
<< std::flush;
|
||||
// Allocating tracing pool
|
||||
roctracer_properties_t properties{};
|
||||
properties.buffer_size = 0x1000;
|
||||
properties.buffer_callback_fun = activity_callback;
|
||||
ROCTRACER_CALL(roctracer_open_pool(&properties));
|
||||
// Enable API callbacks, all domains
|
||||
ROCTRACER_CALL(roctracer_enable_callback(hip_api_callback, NULL));
|
||||
// Enable activity tracing, all domains
|
||||
ROCTRACER_CALL(roctracer_enable_activity());
|
||||
}
|
||||
|
||||
// Stop tracing routine
|
||||
void stop_tracing() {
|
||||
ROCTRACER_CALL(roctracer_disable_api_callback());
|
||||
ROCTRACER_CALL(roctracer_disable_api_activity());
|
||||
ROCTRACER_CALL(roctracer_close_pool());
|
||||
std::cout << "# STOP #############################" << std::endl
|
||||
<< std::flush;
|
||||
}
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
```
|
||||
## 5. rocTX application code annotation API
|
||||
```
|
||||
Basic annotation API: markers and nested ranges.
|
||||
// A marker created by given ASCII massage
|
||||
void roctxMark(const char* message);
|
||||
|
||||
// Returns the 0 based level of a nested range being started by given message associated to this range.
|
||||
// A negative value is returned on the error.
|
||||
int roctxRangePush(const char* message);
|
||||
|
||||
// Marks the end of a nested range.
|
||||
// Returns the 0 based level the range.
|
||||
// A negative value is returned on the error.
|
||||
int roctxRangePop();
|
||||
```
|
||||
@@ -0,0 +1,4 @@
|
||||
---
|
||||
BasedOnStyle: InheritParentConfig
|
||||
ColumnLimit: 79
|
||||
...
|
||||
@@ -0,0 +1,107 @@
|
||||
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef EXT_PROF_PROTOCOL_H_
|
||||
#define EXT_PROF_PROTOCOL_H_
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
/* Traced API domains */
|
||||
typedef enum {
|
||||
ACTIVITY_DOMAIN_HSA_API = 0, /* HSA API domain */
|
||||
ACTIVITY_DOMAIN_HSA_OPS = 1, /* HSA async activity domain */
|
||||
ACTIVITY_DOMAIN_HIP_OPS = 2, /* HIP async activity domain */
|
||||
ACTIVITY_DOMAIN_HCC_OPS =
|
||||
ACTIVITY_DOMAIN_HIP_OPS, /* HCC async activity domain */
|
||||
ACTIVITY_DOMAIN_HIP_VDI =
|
||||
ACTIVITY_DOMAIN_HIP_OPS, /* HIP VDI async activity domain */
|
||||
ACTIVITY_DOMAIN_HIP_API = 3, /* HIP API domain */
|
||||
ACTIVITY_DOMAIN_KFD_API = 4, /* KFD API domain */
|
||||
ACTIVITY_DOMAIN_EXT_API = 5, /* External ID domain */
|
||||
ACTIVITY_DOMAIN_ROCTX = 6, /* ROCTX domain */
|
||||
ACTIVITY_DOMAIN_HSA_EVT = 7, /* HSA events */
|
||||
ACTIVITY_DOMAIN_NUMBER
|
||||
} activity_domain_t;
|
||||
|
||||
/* API callback type */
|
||||
typedef void (*activity_rtapi_callback_t)(uint32_t domain, uint32_t cid,
|
||||
const void* data, void* arg);
|
||||
typedef uint32_t activity_kind_t;
|
||||
typedef uint32_t activity_op_t;
|
||||
|
||||
/* API callback phase */
|
||||
typedef enum {
|
||||
ACTIVITY_API_PHASE_ENTER = 0,
|
||||
ACTIVITY_API_PHASE_EXIT = 1
|
||||
} activity_api_phase_t;
|
||||
|
||||
/* Trace record types */
|
||||
|
||||
/* Correlation id */
|
||||
typedef uint64_t activity_correlation_id_t;
|
||||
|
||||
/* Timestamp in nanoseconds */
|
||||
typedef uint64_t roctracer_timestamp_t;
|
||||
|
||||
/* Activity record type */
|
||||
typedef struct activity_record_s {
|
||||
uint32_t domain; /* activity domain id */
|
||||
activity_kind_t kind; /* activity kind */
|
||||
activity_op_t op; /* activity op */
|
||||
union {
|
||||
struct {
|
||||
activity_correlation_id_t correlation_id; /* activity ID */
|
||||
roctracer_timestamp_t begin_ns; /* host begin timestamp */
|
||||
roctracer_timestamp_t end_ns; /* host end timestamp */
|
||||
};
|
||||
struct {
|
||||
uint32_t se; /* sampled SE */
|
||||
uint64_t cycle; /* sample cycle */
|
||||
uint64_t pc; /* sample PC */
|
||||
} pc_sample;
|
||||
};
|
||||
union {
|
||||
struct {
|
||||
int device_id; /* device id */
|
||||
uint64_t queue_id; /* queue id */
|
||||
};
|
||||
struct {
|
||||
uint32_t process_id; /* device id */
|
||||
uint32_t thread_id; /* thread id */
|
||||
};
|
||||
struct {
|
||||
activity_correlation_id_t external_id; /* external correlation id */
|
||||
};
|
||||
};
|
||||
union {
|
||||
size_t bytes; /* data size bytes */
|
||||
const char* kernel_name; /* kernel name */
|
||||
const char* mark_message;
|
||||
};
|
||||
} activity_record_t;
|
||||
|
||||
/* Activity sync callback type */
|
||||
typedef void (*activity_sync_callback_t)(uint32_t cid, activity_record_t* record, const void* data,
|
||||
void* arg);
|
||||
/* Activity async callback type */
|
||||
typedef void (*activity_async_callback_t)(uint32_t op, void* record, void* arg);
|
||||
|
||||
#endif /* EXT_PROF_PROTOCOL_H_ */
|
||||
@@ -0,0 +1,779 @@
|
||||
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
/** \mainpage ROC Tracer API Specification
|
||||
*
|
||||
* \section introduction Introduction
|
||||
*
|
||||
* ROCtracer library, Runtimes Generic Callback/Activity APIs.
|
||||
*
|
||||
* The goal of the implementation is to provide a generic independent from
|
||||
* specific runtime profiler to trace API and asynchronous activity.
|
||||
*
|
||||
* The API provides functionality for registering the runtimes API callbacks
|
||||
* and asynchronous activity records pool support.
|
||||
*
|
||||
* \section known_limitations Known Limitations and Restrictions
|
||||
*
|
||||
* The ROCtracer API library implementation currently has the following
|
||||
* restrictions. Future releases aim to address these restrictions.
|
||||
*
|
||||
* 1. The ACTIVITY_DOMAIN_HSA_OPS operations HSA_OP_ID_DISPATCH,
|
||||
* HSA_OP_ID_BARRIER, and HSA_OP_ID_RESERVED1 are not currently implemented.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file
|
||||
* ROCtracer API interface.
|
||||
*/
|
||||
|
||||
#ifndef ROCTRACER_H_
|
||||
#define ROCTRACER_H_
|
||||
|
||||
/* Placeholder for calling convention and import/export macros */
|
||||
#if !defined(ROCTRACER_CALL)
|
||||
#define ROCTRACER_CALL
|
||||
#endif /* !defined (ROCTRACER_CALL) */
|
||||
|
||||
#if !defined(ROCTRACER_EXPORT_DECORATOR)
|
||||
#if defined(__GNUC__)
|
||||
#define ROCTRACER_EXPORT_DECORATOR __attribute__((visibility("default")))
|
||||
#elif defined(_MSC_VER)
|
||||
#define ROCTRACER_EXPORT_DECORATOR __declspec(dllexport)
|
||||
#endif /* defined (_MSC_VER) */
|
||||
#endif /* !defined (ROCTRACER_EXPORT_DECORATOR) */
|
||||
|
||||
#if !defined(ROCTRACER_IMPORT_DECORATOR)
|
||||
#if defined(__GNUC__)
|
||||
#define ROCTRACER_IMPORT_DECORATOR
|
||||
#elif defined(_MSC_VER)
|
||||
#define ROCTRACER_IMPORT_DECORATOR __declspec(dllimport)
|
||||
#endif /* defined (_MSC_VER) */
|
||||
#endif /* !defined (ROCTRACER_IMPORT_DECORATOR) */
|
||||
|
||||
#define ROCTRACER_EXPORT ROCTRACER_EXPORT_DECORATOR ROCTRACER_CALL
|
||||
#define ROCTRACER_IMPORT ROCTRACER_IMPORT_DECORATOR ROCTRACER_CALL
|
||||
|
||||
#if !defined(ROCTRACER)
|
||||
#if defined(ROCTRACER_EXPORTS)
|
||||
#define ROCTRACER_API ROCTRACER_EXPORT
|
||||
#else /* !defined (ROCTRACER_EXPORTS) */
|
||||
#define ROCTRACER_API ROCTRACER_IMPORT
|
||||
#endif /* !defined (ROCTRACER_EXPORTS) */
|
||||
#endif /* !defined (ROCTRACER) */
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "ext/prof_protocol.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif /* __cplusplus */
|
||||
|
||||
/** \defgroup symbol_versions_group Symbol Versions
|
||||
*
|
||||
* The names used for the shared library versioned symbols.
|
||||
*
|
||||
* Every function is annotated with one of the version macros defined in this
|
||||
* section. Each macro specifies a corresponding symbol version string. After
|
||||
* dynamically loading the shared library with \p dlopen, the address of each
|
||||
* function can be obtained using \p dlvsym with the name of the function and
|
||||
* its corresponding symbol version string. An error will be reported by \p
|
||||
* dlvsym if the installed library does not support the version for the
|
||||
* function specified in this version of the interface.
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* The function was introduced in version 4.1 of the interface and has the
|
||||
* symbol version string of ``"ROCTRACER_4.1"``.
|
||||
*/
|
||||
#define ROCTRACER_VERSION_4_1
|
||||
|
||||
/** @} */
|
||||
|
||||
/** \defgroup versioning_group Versioning
|
||||
*
|
||||
* Version information about the interface and the associated installed
|
||||
* library.
|
||||
*
|
||||
* The semantic version of the interface following semver.org rules. A client
|
||||
* that uses this interface is only compatible with the installed library if
|
||||
* the major version numbers match and the interface minor version number is
|
||||
* less than or equal to the installed library minor version number.
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* The major version of the interface as a macro so it can be used by the
|
||||
* preprocessor.
|
||||
*/
|
||||
#define ROCTRACER_VERSION_MAJOR 4
|
||||
|
||||
/**
|
||||
* The minor version of the interface as a macro so it can be used by the
|
||||
* preprocessor.
|
||||
*/
|
||||
#define ROCTRACER_VERSION_MINOR 1
|
||||
|
||||
/**
|
||||
* Query the major version of the installed library.
|
||||
*
|
||||
* Return the major version of the installed library. This can be used to
|
||||
* check if it is compatible with this interface version. This function can be
|
||||
* used even when the library is not initialized.
|
||||
*/
|
||||
ROCTRACER_API uint32_t roctracer_version_major() ROCTRACER_VERSION_4_1;
|
||||
|
||||
/**
|
||||
* Query the minor version of the installed library.
|
||||
*
|
||||
* Return the minor version of the installed library. This can be used to
|
||||
* check if it is compatible with this interface version. This function can be
|
||||
* used even when the library is not initialized.
|
||||
*/
|
||||
ROCTRACER_API uint32_t roctracer_version_minor() ROCTRACER_VERSION_4_1;
|
||||
|
||||
/** @} */
|
||||
|
||||
/** \defgroup status_codes_group Status Codes
|
||||
*
|
||||
* Most operations return a status code to indicate success or error.
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* ROC Tracer API status codes.
|
||||
*/
|
||||
typedef enum {
|
||||
/**
|
||||
* The function has executed successfully.
|
||||
*/
|
||||
ROCTRACER_STATUS_SUCCESS = 0,
|
||||
/**
|
||||
* A generic error has occurred.
|
||||
*/
|
||||
ROCTRACER_STATUS_ERROR = -1,
|
||||
/**
|
||||
* The domain ID is invalid.
|
||||
*/
|
||||
ROCTRACER_STATUS_ERROR_INVALID_DOMAIN_ID = -2,
|
||||
/**
|
||||
* An invalid argument was given to the function.
|
||||
*/
|
||||
ROCTRACER_STATUS_ERROR_INVALID_ARGUMENT = -3,
|
||||
/**
|
||||
* No default pool is defined.
|
||||
*/
|
||||
ROCTRACER_STATUS_ERROR_DEFAULT_POOL_UNDEFINED = -4,
|
||||
/**
|
||||
* The default pool is already defined.
|
||||
*/
|
||||
ROCTRACER_STATUS_ERROR_DEFAULT_POOL_ALREADY_DEFINED = -5,
|
||||
/**
|
||||
* Memory allocation error.
|
||||
*/
|
||||
ROCTRACER_STATUS_ERROR_MEMORY_ALLOCATION = -6,
|
||||
/**
|
||||
* External correlation ID pop mismatch.
|
||||
*/
|
||||
ROCTRACER_STATUS_ERROR_MISMATCHED_EXTERNAL_CORRELATION_ID = -7,
|
||||
/**
|
||||
* The operation is not currently implemented. This error may be reported by
|
||||
* any function. Check the \ref known_limitations section to determine the
|
||||
* status of the library implementation of the interface.
|
||||
*/
|
||||
ROCTRACER_STATUS_ERROR_NOT_IMPLEMENTED = -8,
|
||||
/**
|
||||
* Deprecated error code.
|
||||
*/
|
||||
ROCTRACER_STATUS_UNINIT = 2,
|
||||
/**
|
||||
* Deprecated error code.
|
||||
*/
|
||||
ROCTRACER_STATUS_BREAK = 3,
|
||||
/**
|
||||
* Deprecated error code.
|
||||
*/
|
||||
ROCTRACER_STATUS_BAD_DOMAIN = ROCTRACER_STATUS_ERROR_INVALID_DOMAIN_ID,
|
||||
/**
|
||||
* Deprecated error code.
|
||||
*/
|
||||
ROCTRACER_STATUS_BAD_PARAMETER = ROCTRACER_STATUS_ERROR_INVALID_ARGUMENT,
|
||||
/**
|
||||
* Deprecated error code.
|
||||
*/
|
||||
ROCTRACER_STATUS_HIP_API_ERR = 6,
|
||||
/**
|
||||
* Deprecated error code.
|
||||
*/
|
||||
ROCTRACER_STATUS_HIP_OPS_ERR = 7,
|
||||
/**
|
||||
* Deprecated error code.
|
||||
*/
|
||||
ROCTRACER_STATUS_HCC_OPS_ERR = ROCTRACER_STATUS_HIP_OPS_ERR,
|
||||
/**
|
||||
* Deprecated error code.
|
||||
*/
|
||||
ROCTRACER_STATUS_HSA_ERR = 7,
|
||||
/**
|
||||
* Deprecated error code.
|
||||
*/
|
||||
ROCTRACER_STATUS_ROCTX_ERR = 8,
|
||||
} roctracer_status_t;
|
||||
|
||||
/**
|
||||
* Query the textual description of the last error for the current thread.
|
||||
*
|
||||
* Returns a NUL terminated string describing the error of the last ROC Tracer
|
||||
* API call by the calling thread that did not return success. The empty
|
||||
* string is returned if there is no previous error. The last error is not
|
||||
* cleared.
|
||||
*
|
||||
* \return Return the error string. The caller owns the returned string and
|
||||
* should use \p free() to deallocate it.
|
||||
*/
|
||||
ROCTRACER_API const char* roctracer_error_string() ROCTRACER_VERSION_4_1;
|
||||
|
||||
/** @} */
|
||||
|
||||
/** \defgroup domain_group Traced Runtime Domains
|
||||
*
|
||||
* The ROC Tracer API can trace multiple runtime libraries. Each library can
|
||||
* have API operations and asynchronous operations that can be traced.
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* Enumeration of domains that can be traced.
|
||||
*/
|
||||
typedef activity_domain_t roctracer_domain_t;
|
||||
|
||||
/**
|
||||
* Query textual name of an operation of a domain.
|
||||
*
|
||||
* @param[in] domain Domain being queried.
|
||||
*
|
||||
* @param[in] op Operation within \p domain.
|
||||
*
|
||||
* @param[in] kind \todo Define kind.
|
||||
*
|
||||
* @return Returns the NUL terminated string for the operation name, or NULL if
|
||||
* the domain or operation are invalid. The string is owned by the ROC Tracer
|
||||
* library.
|
||||
*/
|
||||
ROCTRACER_API const char* roctracer_op_string(
|
||||
uint32_t domain, uint32_t op, uint32_t kind) ROCTRACER_VERSION_4_1;
|
||||
|
||||
/**
|
||||
* Query the operation code given a domain and the name of an operation.
|
||||
*
|
||||
* @param[in] domain The domain being queried.
|
||||
*
|
||||
* @param[in] str The NUL terminated name of the operation name being queried.
|
||||
*
|
||||
* @param[out] op The operation code.
|
||||
*
|
||||
* @param[out] kind If not NULL then the operation kind code.
|
||||
*
|
||||
* @retval ::ROCTRACER_STATUS_SUCCESS The function has been executed
|
||||
* successfully. \p op and \p kind have been updated.
|
||||
*
|
||||
* @retval ::ROCTRACER_STATUS_ERROR_INVALID_ARGUMENT The \p op is invalid for
|
||||
* \p domain.
|
||||
*
|
||||
* @retval ::ROCTRACER_STATUS_ERROR_INVALID_DOMAIN_ID The domain is invalid or
|
||||
* not supported.
|
||||
*/
|
||||
ROCTRACER_API roctracer_status_t
|
||||
roctracer_op_code(uint32_t domain, const char* str, uint32_t* op,
|
||||
uint32_t* kind) ROCTRACER_VERSION_4_1;
|
||||
|
||||
/**
|
||||
* Set the properties of a domain.
|
||||
*
|
||||
* @param[in] domain The domain.
|
||||
*
|
||||
* @param[in] properties The properties. Each domain defines its own type for
|
||||
* the properties. Some domains require the properties to be set before they
|
||||
* can be enabled.
|
||||
*
|
||||
* @retval ::ROCTRACER_STATUS_SUCCESS The function has been executed
|
||||
* successfully.
|
||||
*/
|
||||
ROCTRACER_API roctracer_status_t roctracer_set_properties(
|
||||
roctracer_domain_t domain, void* properties) ROCTRACER_VERSION_4_1;
|
||||
|
||||
/** @} */
|
||||
|
||||
/** \defgroup callback_api_group Callback API
|
||||
*
|
||||
* ROC tracer provides support for runtime API callbacks and activity
|
||||
* records logging. The API callbacks provide the API calls arguments and are
|
||||
* called on different phases, on enter, on exit, on kernel completion.
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* Runtime API callback type.
|
||||
*
|
||||
* The callback that will be invoked when an enabled runtime API is called. The
|
||||
* callback is invoked on entry and on exit.
|
||||
*/
|
||||
typedef activity_rtapi_callback_t roctracer_rtapi_callback_t;
|
||||
|
||||
/**
|
||||
* Enable runtime API callback for a specific operation of a domain.
|
||||
*
|
||||
* @param domain The domain.
|
||||
*
|
||||
* @param op The operation ID in \p domain.
|
||||
*
|
||||
* @param callback The callback to invoke each time the operation is performed
|
||||
* on entry and exit.
|
||||
*
|
||||
* @param arg Value to pass as last argument of \p callback.
|
||||
*
|
||||
* @retval ::ROCTRACER_STATUS_SUCCESS The function has been executed
|
||||
* successfully.
|
||||
*
|
||||
* @retval ::ROCTRACER_STATUS_ERROR_INVALID_DOMAIN_ID \p domain is invalid.
|
||||
*
|
||||
* @retval ::ROCTRACER_STATUS_ERROR_INVALID_ARGUMENT \p op is invalid for \p
|
||||
* domain.
|
||||
*/
|
||||
ROCTRACER_API roctracer_status_t roctracer_enable_op_callback(
|
||||
activity_domain_t domain, uint32_t op, activity_rtapi_callback_t callback,
|
||||
void* arg) ROCTRACER_VERSION_4_1;
|
||||
|
||||
/**
|
||||
* Enable runtime API callback for all operations of a domain.
|
||||
*
|
||||
* @param domain The domain
|
||||
*
|
||||
* @param callback The callback to invoke each time the operation is performed
|
||||
* on entry and exit.
|
||||
*
|
||||
* @param arg Value to pass as last argument of \p callback.
|
||||
*
|
||||
* @retval ::ROCTRACER_STATUS_SUCCESS The function has been executed
|
||||
* successfully.
|
||||
*
|
||||
* @retval ::ROCTRACER_STATUS_ERROR_INVALID_DOMAIN_ID \p domain is invalid.
|
||||
*/
|
||||
ROCTRACER_API roctracer_status_t roctracer_enable_domain_callback(
|
||||
activity_domain_t domain, activity_rtapi_callback_t callback,
|
||||
void* arg) ROCTRACER_VERSION_4_1;
|
||||
|
||||
/**
|
||||
* Disable runtime API callback for a specific operation of a domain.
|
||||
*
|
||||
* @param domain The domain
|
||||
*
|
||||
* @param op The operation in \p domain.
|
||||
*
|
||||
* @retval ::ROCTRACER_STATUS_SUCCESS The function has been executed
|
||||
* successfully.
|
||||
*
|
||||
* @retval ::ROCTRACER_STATUS_ERROR_INVALID_DOMAIN_ID \p domain is invalid.
|
||||
*
|
||||
* @retval ::ROCTRACER_STATUS_ERROR_INVALID_ARGUMENT \p op is invalid for \p
|
||||
* domain.
|
||||
*/
|
||||
ROCTRACER_API roctracer_status_t roctracer_disable_op_callback(
|
||||
activity_domain_t domain, uint32_t op) ROCTRACER_VERSION_4_1;
|
||||
|
||||
/**
|
||||
* Disable runtime API callback for all operations of a domain.
|
||||
*
|
||||
* @param domain The domain
|
||||
*
|
||||
* @retval ::ROCTRACER_STATUS_SUCCESS The function has been executed
|
||||
* successfully.
|
||||
*
|
||||
* @retval ::ROCTRACER_STATUS_ERROR_INVALID_DOMAIN_ID \p domain is invalid.
|
||||
*/
|
||||
ROCTRACER_API roctracer_status_t roctracer_disable_domain_callback(
|
||||
activity_domain_t domain) ROCTRACER_VERSION_4_1;
|
||||
|
||||
/** @} */
|
||||
|
||||
/** \defgroup activity_api_group Activity API
|
||||
*
|
||||
* The activity records are asynchronously logged to the pool and can be
|
||||
* associated with the respective API callbacks using the correlation ID.
|
||||
* Activity API can be used to enable collecting of the records with
|
||||
* timestamping data for API calls and the kernel submits.
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* Activity record.
|
||||
*
|
||||
* Asynchronous activity events generate activity records.
|
||||
*/
|
||||
typedef activity_record_t roctracer_record_t;
|
||||
|
||||
/**
|
||||
* Get a pointer to the next activity record.
|
||||
*
|
||||
* A memory pool generates buffers that contain multiple activity records.
|
||||
* This function steps to the next activity record.
|
||||
*
|
||||
* @param[in] record Pointer to ac activity record in a memory pool buffer.
|
||||
*
|
||||
* @param[out] next Pointer to the following activity record in the memory pool
|
||||
* buffer.
|
||||
*
|
||||
* @retval ::ROCTRACER_STATUS_SUCCESS The function has been executed
|
||||
* successfully.
|
||||
*/
|
||||
ROCTRACER_API roctracer_status_t
|
||||
roctracer_next_record(const activity_record_t* record,
|
||||
const activity_record_t** next) ROCTRACER_VERSION_4_1;
|
||||
|
||||
/**
|
||||
* Memory pool allocator callback.
|
||||
*
|
||||
* If \p *ptr is NULL, then allocate memory of \p size bytes and save address
|
||||
* in \p *ptr.
|
||||
*
|
||||
* If \p *ptr is non-NULL and size is non-0, then reallocate the memory at \p
|
||||
* *ptr with size \p size and save the address in \p *ptr. The memory will have
|
||||
* been allocated by the same callback.
|
||||
*
|
||||
* If \p *ptr is non-NULL and size is 0, then deallocate the memory at \p *ptr.
|
||||
* The memory will have been allocated by the same callback.
|
||||
*
|
||||
* \p size is the size of the memory allocation or reallocation, or 0 if
|
||||
* deallocating.
|
||||
*
|
||||
* \p arg Argument provided in the ::roctracer_properties_t passed to the
|
||||
* ::roctracer_open_pool function.
|
||||
*/
|
||||
typedef void (*roctracer_allocator_t)(char** ptr, size_t size, void* arg);
|
||||
|
||||
/**
|
||||
* Memory pool buffer callback.
|
||||
*
|
||||
* The callback that will be invoked when a memory pool buffer becomes full or
|
||||
* is flushed.
|
||||
*
|
||||
* \p begin pointer to first entry entry in the buffer.
|
||||
*
|
||||
* \p end pointer to one past the end entry in the buffer.
|
||||
*
|
||||
* \p arg the argument specified when the callback was defined.
|
||||
*/
|
||||
typedef void (*roctracer_buffer_callback_t)(const char* begin, const char* end,
|
||||
void* arg);
|
||||
|
||||
/**
|
||||
* Memory pool properties.
|
||||
*
|
||||
* Defines the properties when a tracer memory pool is created.
|
||||
*/
|
||||
typedef struct {
|
||||
/**
|
||||
* ROC Tracer mode.
|
||||
*/
|
||||
uint32_t mode;
|
||||
|
||||
/**
|
||||
* Size of buffer in bytes.
|
||||
*/
|
||||
size_t buffer_size;
|
||||
|
||||
/**
|
||||
* The allocator function to use to allocate and deallocate the buffer. If
|
||||
* NULL then \p malloc, \p realloc, and \p free are used.
|
||||
*/
|
||||
roctracer_allocator_t alloc_fun;
|
||||
|
||||
/**
|
||||
* The argument to pass when invoking the \p alloc_fun allocator.
|
||||
*/
|
||||
void* alloc_arg;
|
||||
|
||||
/**
|
||||
* The function to call when a buffer becomes full or is flushed.
|
||||
*/
|
||||
roctracer_buffer_callback_t buffer_callback_fun;
|
||||
|
||||
/**
|
||||
* The argument to pass when invoking the \p buffer_callback_fun callback.
|
||||
*/
|
||||
void* buffer_callback_arg;
|
||||
} roctracer_properties_t;
|
||||
|
||||
/**
|
||||
* Tracer memory pool type.
|
||||
*/
|
||||
typedef void roctracer_pool_t;
|
||||
|
||||
/**
|
||||
* Create tracer memory pool.
|
||||
*
|
||||
* If \p pool is not NULL, returns the created memory pool. Does not change the
|
||||
* default memory pool.
|
||||
*
|
||||
* If \p pool is NULL, sets the default memory pool to the created pool if not
|
||||
* already defined. Otherwise, return an error.
|
||||
*
|
||||
* @param[in] properties Tracer memory pool properties.
|
||||
*
|
||||
* @param[out] pool Tracer memory pool created if not NULL.
|
||||
*
|
||||
* @retval ::ROCTRACER_STATUS_SUCCESS The function has been executed
|
||||
* successfully.
|
||||
*
|
||||
* @retval ROCTRACER_STATUS_ERROR_DEFAULT_POOL_ALREADY_DEFINED \p pool is NULL
|
||||
* and the default pool is already defined. Unable to create the pool.
|
||||
*
|
||||
* @retval ROCTRACER_STATUS_ERROR_MEMORY_ALLOCATION Unable to allocate memory
|
||||
* for the \p pool. Unable to create the pool.
|
||||
*/
|
||||
ROCTRACER_API roctracer_status_t
|
||||
roctracer_open_pool_expl(const roctracer_properties_t* properties,
|
||||
roctracer_pool_t** pool) ROCTRACER_VERSION_4_1;
|
||||
|
||||
/**
|
||||
* Create tracer memory pool.
|
||||
*
|
||||
* Sets the default memory pool to the created pool if not already defined.
|
||||
* Otherwise, return an error.
|
||||
*
|
||||
* @param[in] properties Tracer memory pool properties.
|
||||
*
|
||||
* @retval ::ROCTRACER_STATUS_SUCCESS The function has been executed
|
||||
* successfully.
|
||||
*
|
||||
* @retval ROCTRACER_STATUS_ERROR_DEFAULT_POOL_ALREADY_DEFINED The default pool
|
||||
* is already defined. Unable to create the pool.
|
||||
*
|
||||
* @retval ROCTRACER_STATUS_ERROR_MEMORY_ALLOCATION Unable to allocate memory
|
||||
* for the \p pool. Unable to create the pool.
|
||||
*/
|
||||
ROCTRACER_API roctracer_status_t roctracer_open_pool(
|
||||
const roctracer_properties_t* properties) ROCTRACER_VERSION_4_1;
|
||||
|
||||
/**
|
||||
* Close tracer memory pool.
|
||||
*
|
||||
* All enabled activities that use the pool must have completed writing to the
|
||||
* pool, before deleting the pool. Deleting a pool automatically disables any
|
||||
* activities that specify the pool, and flushes it.
|
||||
*
|
||||
* @param[in] pool Memory pool to close. If NULL, the default memory pool is
|
||||
* closed if defined. The default memory pool is set to undefined if closed.
|
||||
*
|
||||
* @retval ::ROCTRACER_STATUS_SUCCESS The function has been executed
|
||||
* successfully or pool was NULL and there is no default pool.
|
||||
*/
|
||||
ROCTRACER_API roctracer_status_t
|
||||
roctracer_close_pool_expl(roctracer_pool_t* pool) ROCTRACER_VERSION_4_1;
|
||||
|
||||
/**
|
||||
* Close default tracer memory pool, if defined, and set to undefined.
|
||||
*
|
||||
* All enabled activities that use the pool must have completed writing to the
|
||||
* pool, before deleting the pool. Deleting a pool automatically disables any
|
||||
* activities that specify the pool, and flushes it.
|
||||
*
|
||||
* @retval ::ROCTRACER_STATUS_SUCCESS The function has been executed
|
||||
* successfully or there is no default pool.
|
||||
*/
|
||||
ROCTRACER_API roctracer_status_t roctracer_close_pool() ROCTRACER_VERSION_4_1;
|
||||
|
||||
/**
|
||||
* Query and set the default memory pool.
|
||||
*
|
||||
* @param[in] pool If not NULL, change the current default pool to \p pool. If
|
||||
* NULL, the default pool is not changed.
|
||||
*
|
||||
* @return Return the current default memory pool before any change, or NULL if
|
||||
* none is defined.
|
||||
*/
|
||||
ROCTRACER_API roctracer_pool_t* roctracer_default_pool_expl(
|
||||
roctracer_pool_t* pool) ROCTRACER_VERSION_4_1;
|
||||
|
||||
/**
|
||||
* Query the current default memory pool.
|
||||
*
|
||||
* @return Return the current default memory pool, or NULL is none is defined.
|
||||
*/
|
||||
ROCTRACER_API roctracer_pool_t* roctracer_default_pool() ROCTRACER_VERSION_4_1;
|
||||
|
||||
/**
|
||||
* Enable activity record logging for a specified operation of a domain
|
||||
* providing a memory pool.
|
||||
*
|
||||
* @param[in] domain The domain.
|
||||
*
|
||||
* @param[in] op The activity operation ID in \p domain.
|
||||
*
|
||||
* @param[in] pool The memory pool to write the activity record. If NULL, use
|
||||
* the default memory pool.
|
||||
*
|
||||
* @retval ::ROCTRACER_STATUS_SUCCESS The function has been executed
|
||||
* successfully.
|
||||
*
|
||||
* @retval ROCTRACER_STATUS_ERROR \p pool is NULL and no default pool is
|
||||
* defined.
|
||||
*/
|
||||
ROCTRACER_API roctracer_status_t roctracer_enable_op_activity_expl(
|
||||
activity_domain_t domain, uint32_t op,
|
||||
roctracer_pool_t* pool) ROCTRACER_VERSION_4_1;
|
||||
|
||||
/**
|
||||
* Enable activity record logging for a specified operation of a domain using
|
||||
* the default memory pool.
|
||||
*
|
||||
* @param[in] domain The domain.
|
||||
*
|
||||
* @param[in] op The activity operation ID in \p domain.
|
||||
*
|
||||
* @retval ::ROCTRACER_STATUS_SUCCESS The function has been executed
|
||||
* successfully.
|
||||
*
|
||||
* @retval ROCTRACER_STATUS_ERROR No default pool is defined.
|
||||
*/
|
||||
ROCTRACER_API roctracer_status_t roctracer_enable_op_activity(
|
||||
activity_domain_t domain, uint32_t op) ROCTRACER_VERSION_4_1;
|
||||
|
||||
/**
|
||||
* Enable activity record logging for all operations of a domain providing a
|
||||
* memory pool.
|
||||
*
|
||||
* @param[in] domain The domain.
|
||||
*
|
||||
* @param[in] pool The memory pool to write the activity record. If NULL, use
|
||||
* the default memory pool.
|
||||
*
|
||||
* @retval ::ROCTRACER_STATUS_SUCCESS The function has been executed
|
||||
* successfully.
|
||||
*
|
||||
* @retval ROCTRACER_STATUS_ERROR \p pool is NULL and no default pool is
|
||||
* defined.
|
||||
*/
|
||||
ROCTRACER_API roctracer_status_t roctracer_enable_domain_activity_expl(
|
||||
activity_domain_t domain, roctracer_pool_t* pool) ROCTRACER_VERSION_4_1;
|
||||
|
||||
/**
|
||||
* Enable activity record logging for all operations of a domain using the
|
||||
* default memory pool.
|
||||
*
|
||||
* @param[in] domain The domain.
|
||||
*
|
||||
* @retval ::ROCTRACER_STATUS_SUCCESS The function has been executed
|
||||
* successfully.
|
||||
*
|
||||
* @retval ROCTRACER_STATUS_ERROR No default pool is defined.
|
||||
*/
|
||||
ROCTRACER_API roctracer_status_t roctracer_enable_domain_activity(
|
||||
activity_domain_t domain) ROCTRACER_VERSION_4_1;
|
||||
|
||||
/**
|
||||
* Disable activity record logging for a specified operation of a domain.
|
||||
*
|
||||
* @param[in] domain The domain.
|
||||
*
|
||||
* @param[in] op The activity operation ID in \p domain.
|
||||
*
|
||||
* @retval ::ROCTRACER_STATUS_SUCCESS The function has been executed
|
||||
* successfully.
|
||||
*/
|
||||
ROCTRACER_API roctracer_status_t roctracer_disable_op_activity(
|
||||
activity_domain_t domain, uint32_t op) ROCTRACER_VERSION_4_1;
|
||||
|
||||
/**
|
||||
* Disable activity record logging for all operations of a domain.
|
||||
*
|
||||
* @param[in] domain The domain.
|
||||
*
|
||||
* @retval ::ROCTRACER_STATUS_SUCCESS The function has been executed
|
||||
* successfully.
|
||||
*/
|
||||
ROCTRACER_API roctracer_status_t roctracer_disable_domain_activity(
|
||||
activity_domain_t domain) ROCTRACER_VERSION_4_1;
|
||||
|
||||
/**
|
||||
* Flush available activity records for a memory pool.
|
||||
*
|
||||
* If flushing encounters an activity record still being written, flushing
|
||||
* stops. Use a subsequent flush when the record has completed being written to
|
||||
* resume the flush.
|
||||
*
|
||||
* @param[in] pool The memory pool to flush. If NULL, flushes the default
|
||||
* memory pool.
|
||||
*
|
||||
* @retval ::ROCTRACER_STATUS_SUCCESS The function has been executed
|
||||
* successfully.
|
||||
*/
|
||||
ROCTRACER_API roctracer_status_t
|
||||
roctracer_flush_activity_expl(roctracer_pool_t* pool) ROCTRACER_VERSION_4_1;
|
||||
|
||||
/**
|
||||
* Flush available activity records for the default memory pool.
|
||||
*
|
||||
* If flushing encounters an activity record still being written, flushing
|
||||
* stops. Use a subsequent flush when the record has completed being written to
|
||||
* resume the flush.
|
||||
*
|
||||
* @retval ::ROCTRACER_STATUS_SUCCESS The function has been executed
|
||||
* successfully.
|
||||
*/
|
||||
ROCTRACER_API roctracer_status_t roctracer_flush_activity()
|
||||
ROCTRACER_VERSION_4_1;
|
||||
|
||||
/** @} */
|
||||
|
||||
/** \defgroup timestamp_group Timestamp Operations
|
||||
*
|
||||
*
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* Get the system clock timestamp.
|
||||
*
|
||||
* @param[out] timestamp The system clock timestamp in nano seconds.
|
||||
*
|
||||
* @retval ::ROCTRACER_STATUS_SUCCESS The function has been executed
|
||||
* successfully.
|
||||
*/
|
||||
ROCTRACER_API roctracer_status_t roctracer_get_timestamp(
|
||||
roctracer_timestamp_t* timestamp) ROCTRACER_VERSION_4_1;
|
||||
|
||||
/** @} */
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" block */
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#endif /* ROCTRACER_H_ */
|
||||
@@ -0,0 +1,81 @@
|
||||
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// ROC Tracer Extension API
|
||||
//
|
||||
// The API provides functionality for application annotation with event and
|
||||
// external ranges correlation
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef ROCTRACER_EXT_H_
|
||||
#define ROCTRACER_EXT_H_
|
||||
|
||||
#include "roctracer.h"
|
||||
|
||||
/* Extension API opcodes */
|
||||
typedef enum {
|
||||
ACTIVITY_EXT_OP_MARK = 0,
|
||||
ACTIVITY_EXT_OP_EXTERN_ID = 1
|
||||
} activity_ext_op_t;
|
||||
|
||||
typedef void (*roctracer_start_cb_t)();
|
||||
typedef void (*roctracer_stop_cb_t)();
|
||||
typedef struct {
|
||||
roctracer_start_cb_t start_cb;
|
||||
roctracer_stop_cb_t stop_cb;
|
||||
} roctracer_ext_properties_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif // __cplusplus
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Application annotation API
|
||||
|
||||
// Tracing start API
|
||||
void ROCTRACER_API roctracer_start() ROCTRACER_VERSION_4_1;
|
||||
|
||||
// Tracing stop API
|
||||
void ROCTRACER_API roctracer_stop() ROCTRACER_VERSION_4_1;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// External correlation id API
|
||||
|
||||
// Notifies that the calling thread is entering an external API region.
|
||||
// Push an external correlation id for the calling thread.
|
||||
roctracer_status_t ROCTRACER_API
|
||||
roctracer_activity_push_external_correlation_id(activity_correlation_id_t id)
|
||||
ROCTRACER_VERSION_4_1;
|
||||
|
||||
// Notifies that the calling thread is leaving an external API region.
|
||||
// Pop an external correlation id for the calling thread.
|
||||
// 'lastId' returns the last external correlation if not NULL
|
||||
roctracer_status_t ROCTRACER_API
|
||||
roctracer_activity_pop_external_correlation_id(
|
||||
activity_correlation_id_t* last_id) ROCTRACER_VERSION_4_1;
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C" block
|
||||
#endif // __cplusplus
|
||||
|
||||
#endif // ROCTRACER_EXT_H_
|
||||
@@ -0,0 +1,24 @@
|
||||
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#pragma message( \
|
||||
"This file has been deprecated and marked for removal. Please use roctracer_hip.h instead.")
|
||||
|
||||
#include "roctracer_hip.h"
|
||||
@@ -0,0 +1,38 @@
|
||||
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef ROCTRACER_HIP_H_
|
||||
#define ROCTRACER_HIP_H_
|
||||
|
||||
#include "roctracer.h"
|
||||
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <hip/hip_deprecated.h>
|
||||
#include "hip_ostream_ops.h"
|
||||
#include <hip/amd_detail/hip_prof_str.h>
|
||||
|
||||
typedef enum {
|
||||
HIP_OP_ID_DISPATCH = 0,
|
||||
HIP_OP_ID_COPY = 1,
|
||||
HIP_OP_ID_BARRIER = 2,
|
||||
HIP_OP_ID_NUMBER = 3
|
||||
} hip_op_id_t;
|
||||
|
||||
#endif // ROCTRACER_HIP_H_
|
||||
@@ -0,0 +1,112 @@
|
||||
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef ROCTRACER_HSA_H_
|
||||
#define ROCTRACER_HSA_H_
|
||||
|
||||
#include "roctracer.h"
|
||||
|
||||
#include <hsa/hsa.h>
|
||||
#include <hsa/hsa_ext_amd.h>
|
||||
#include "hsa_ostream_ops.h"
|
||||
#include "hsa_prof_str.h"
|
||||
|
||||
// HSA OP ID enumeration
|
||||
enum hsa_op_id_t {
|
||||
HSA_OP_ID_DISPATCH = 0,
|
||||
HSA_OP_ID_COPY = 1,
|
||||
HSA_OP_ID_BARRIER = 2,
|
||||
HSA_OP_ID_RESERVED1 = 3,
|
||||
HSA_OP_ID_NUMBER
|
||||
};
|
||||
|
||||
// HSA EVT ID enumeration
|
||||
enum hsa_evt_id_t {
|
||||
HSA_EVT_ID_ALLOCATE = 0, // Memory allocate callback
|
||||
HSA_EVT_ID_DEVICE = 1, // Device assign callback
|
||||
HSA_EVT_ID_MEMCOPY = 2, // Memcopy callback
|
||||
HSA_EVT_ID_SUBMIT = 3, // Packet submission callback
|
||||
HSA_EVT_ID_KSYMBOL = 4, // Loading/unloading of kernel symbol
|
||||
HSA_EVT_ID_CODEOBJ = 5, // Loading/unloading of device code object
|
||||
HSA_EVT_ID_NUMBER
|
||||
};
|
||||
|
||||
struct hsa_ops_properties_t {
|
||||
void* reserved1[4];
|
||||
};
|
||||
|
||||
// HSA EVT data type
|
||||
typedef struct {
|
||||
union {
|
||||
struct {
|
||||
const void* ptr; // allocated area ptr
|
||||
size_t size; // allocated area size, zero size means 'free' callback
|
||||
hsa_amd_segment_t segment; // allocated area's memory segment type
|
||||
hsa_amd_memory_pool_global_flag_t
|
||||
global_flag; // allocated area's memory global flag
|
||||
int is_code; // equal to 1 if code is allocated
|
||||
} allocate;
|
||||
|
||||
struct {
|
||||
hsa_device_type_t type; // type of assigned device
|
||||
uint32_t id; // id of assigned device
|
||||
hsa_agent_t agent; // device HSA agent handle
|
||||
const void* ptr; // ptr the device is assigned to
|
||||
} device;
|
||||
|
||||
struct {
|
||||
const void* dst; // memcopy dst ptr
|
||||
const void* src; // memcopy src ptr
|
||||
size_t size; // memcopy size bytes
|
||||
} memcopy;
|
||||
|
||||
struct {
|
||||
const void* packet; // submitted to GPU packet
|
||||
const char*
|
||||
kernel_name; // kernel name, NULL if not a kernel dispatch packet
|
||||
hsa_queue_t* queue; // HSA queue the packet was submitted to
|
||||
uint32_t device_type; // type of device the packet is submitted to
|
||||
uint32_t device_id; // id of device the packet is submitted to
|
||||
} submit;
|
||||
|
||||
struct {
|
||||
uint64_t object; // kernel symbol object
|
||||
const char* name; // kernel symbol name
|
||||
uint32_t name_length; // kernel symbol name length
|
||||
int unload; // symbol executable destroy
|
||||
} ksymbol;
|
||||
|
||||
struct {
|
||||
uint32_t storage_type; // code object storage type
|
||||
int storage_file; // origin file descriptor
|
||||
uint64_t memory_base; // origin memory base
|
||||
uint64_t memory_size; // origin memory size
|
||||
uint64_t load_base; // code object load base
|
||||
uint64_t load_size; // code object load size
|
||||
uint64_t load_delta; // code object load size
|
||||
uint32_t uri_length; // URI string length (not including the terminating
|
||||
// NUL character)
|
||||
const char* uri; // URI string
|
||||
int unload; // unload flag
|
||||
} codeobj;
|
||||
};
|
||||
} hsa_evt_data_t;
|
||||
|
||||
#endif // ROCTRACER_HSA_H_
|
||||
@@ -0,0 +1,137 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
/** \section roctracer_plugin_api ROCtracer Plugin API
|
||||
*
|
||||
* The ROCtracer Plugin API is used by the ROCtracer Tool to output all tracing
|
||||
* information. Different implementations of the ROCtracer Plugin API can be
|
||||
* developed that output the tracing data in different formats.
|
||||
* The ROCtracer Tool can be configured to load a specific library that
|
||||
* supports the user desired format.
|
||||
*
|
||||
* The API is not thread safe. It is the responsibility of the ROCtracer Tool
|
||||
* to ensure the operations are synchronized and not called concurrently. There
|
||||
* is no requirement for the ROCtracer Tool to report trace data in any
|
||||
* specific order. If the format supported by plugin requires specific
|
||||
* ordering, it is the responsibility of the plugin implementation to perform
|
||||
* any necessary sorting.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file
|
||||
* ROCtracer Tool Plugin API interface.
|
||||
*/
|
||||
|
||||
#ifndef ROCTRACER_PLUGIN_H_
|
||||
#define ROCTRACER_PLUGIN_H_
|
||||
|
||||
#include "roctracer.h"
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif /* __cplusplus */
|
||||
|
||||
/** \defgroup initialization_group Initialization and Finalization
|
||||
*
|
||||
* The ROCtracer Plugin API must be initialized before using any of the
|
||||
* operations to report trace data, and finalized after the last trace data has
|
||||
* been reported.
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* Initialize plugin.
|
||||
*
|
||||
* Must be called before any other operation.
|
||||
*
|
||||
* @param[in] roctracer_major_version The major version of the ROCtracer API
|
||||
* being used by the ROCtracer Tool. An error is reported if this does not
|
||||
* match the major version of the ROCtracer API used to build the plugin
|
||||
* library. This ensures compatibility of the trace data format.
|
||||
*
|
||||
* @param[in] roctracer_minor_version The minor version of the ROCtracer API
|
||||
* being used by the ROCtracer Tool. An error is reported if the
|
||||
* \p roctracer_major_version matches and this is greater than the minor
|
||||
* version of the ROCtracer API used to build the plugin library. This ensures
|
||||
* compatibility of the trace data format.
|
||||
*
|
||||
* @return Returns 0 on success and -1 on error.
|
||||
*/
|
||||
ROCTRACER_EXPORT int roctracer_plugin_initialize(
|
||||
uint32_t roctracer_major_version, uint32_t roctracer_minor_version);
|
||||
|
||||
/**
|
||||
* Finalize plugin.
|
||||
*
|
||||
* This must be called after ::roctracer_plugin_initialize and after all trace
|
||||
* data has been reported by ::roctracer_plugin_write_callback_record and
|
||||
* ::roctracer_plugin_write_activity_records.
|
||||
*/
|
||||
ROCTRACER_EXPORT void roctracer_plugin_finalize();
|
||||
|
||||
/** @} */
|
||||
|
||||
/** \defgroup trace_record_write_functions Trace data reporting
|
||||
*
|
||||
* Operations to output trace data.
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* Report a single callback trace data.
|
||||
*
|
||||
* @param[in] record Primarily domain independent trace data.
|
||||
*
|
||||
* @param[in] callback_data Domain specific trace data. The type of this
|
||||
* argument depends on the values of \p record.domain.
|
||||
*
|
||||
* @return Returns 0 on success and -1 on error.
|
||||
*/
|
||||
ROCTRACER_EXPORT int roctracer_plugin_write_callback_record(
|
||||
const roctracer_record_t* record, const void* callback_data);
|
||||
|
||||
/**
|
||||
* Report a range of activity trace data.
|
||||
*
|
||||
* Reports a range of primarily domain independent trace data. The range is
|
||||
* specified by a pointer to the first record and a pointer to one past the
|
||||
* last record. ::roctracer_next_record is used to iterate the range in forward
|
||||
* order.
|
||||
*
|
||||
* @param[in] begin Pointer to the first record.
|
||||
*
|
||||
* @param[in] end Pointer to one past the last record.
|
||||
*
|
||||
* @return Returns 0 on success and -1 on error.
|
||||
*/
|
||||
ROCTRACER_EXPORT int roctracer_plugin_write_activity_records(
|
||||
const roctracer_record_t* begin, const roctracer_record_t* end);
|
||||
|
||||
/** @} */
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#endif /* ROCTRACER_PLUGIN_H_ */
|
||||
@@ -0,0 +1,67 @@
|
||||
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef ROCTRACER_ROCTX_H_
|
||||
#define ROCTRACER_ROCTX_H_
|
||||
|
||||
#include "roctx.h"
|
||||
|
||||
/**
|
||||
* ROCTX API ID enumeration
|
||||
*/
|
||||
enum roctx_api_id_t {
|
||||
ROCTX_API_ID_roctxMarkA = 0,
|
||||
ROCTX_API_ID_roctxRangePushA = 1,
|
||||
ROCTX_API_ID_roctxRangePop = 2,
|
||||
ROCTX_API_ID_roctxRangeStartA = 3,
|
||||
ROCTX_API_ID_roctxRangeStop = 4,
|
||||
ROCTX_API_ID_NUMBER,
|
||||
};
|
||||
|
||||
/**
|
||||
* ROCTX callbacks data type
|
||||
*/
|
||||
typedef struct roctx_api_data_s {
|
||||
union {
|
||||
struct {
|
||||
const char* message;
|
||||
roctx_range_id_t id;
|
||||
};
|
||||
struct {
|
||||
const char* message;
|
||||
} roctxMarkA;
|
||||
struct {
|
||||
const char* message;
|
||||
} roctxRangePushA;
|
||||
struct {
|
||||
const char* message;
|
||||
} roctxRangePop;
|
||||
struct {
|
||||
const char* message;
|
||||
roctx_range_id_t id;
|
||||
} roctxRangeStartA;
|
||||
struct {
|
||||
const char* message;
|
||||
roctx_range_id_t id;
|
||||
} roctxRangeStop;
|
||||
} args;
|
||||
} roctx_api_data_t;
|
||||
|
||||
#endif /* ROCTRACER_ROCTX_H_ */
|
||||
@@ -0,0 +1,229 @@
|
||||
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
/** \mainpage ROCTX API Specification
|
||||
*
|
||||
* \section introduction Introduction
|
||||
* ROCTX is a library that implements the AMD code annotation API. It provides
|
||||
* the support necessary to annotate events and code ranges in applications.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file
|
||||
* ROCTX API interface.
|
||||
*/
|
||||
|
||||
#ifndef ROCTX_H_
|
||||
#define ROCTX_H_ 1
|
||||
|
||||
/* Placeholder for calling convention and import/export macros */
|
||||
#if !defined(ROCTX_CALL)
|
||||
#define ROCTX_CALL
|
||||
#endif /* !defined (ROCTX_CALL) */
|
||||
|
||||
#if !defined(ROCTX_EXPORT_DECORATOR)
|
||||
#if defined(__GNUC__)
|
||||
#define ROCTX_EXPORT_DECORATOR __attribute__((visibility("default")))
|
||||
#elif defined(_MSC_VER)
|
||||
#define ROCTX_EXPORT_DECORATOR __declspec(dllexport)
|
||||
#endif /* defined (_MSC_VER) */
|
||||
#endif /* !defined (ROCTX_EXPORT_DECORATOR) */
|
||||
|
||||
#if !defined(ROCTX_IMPORT_DECORATOR)
|
||||
#if defined(__GNUC__)
|
||||
#define ROCTX_IMPORT_DECORATOR
|
||||
#elif defined(_MSC_VER)
|
||||
#define ROCTX_IMPORT_DECORATOR __declspec(dllimport)
|
||||
#endif /* defined (_MSC_VER) */
|
||||
#endif /* !defined (ROCTX_IMPORT_DECORATOR) */
|
||||
|
||||
#define ROCTX_EXPORT ROCTX_EXPORT_DECORATOR ROCTX_CALL
|
||||
#define ROCTX_IMPORT ROCTX_IMPORT_DECORATOR ROCTX_CALL
|
||||
|
||||
#if !defined(ROCTX)
|
||||
#if defined(ROCTX_EXPORTS)
|
||||
#define ROCTX_API ROCTX_EXPORT
|
||||
#else /* !defined (ROCTX_EXPORTS) */
|
||||
#define ROCTX_API ROCTX_IMPORT
|
||||
#endif /* !defined (ROCTX_EXPORTS) */
|
||||
#endif /* !defined (ROCTX) */
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif /* defined(__cplusplus) */
|
||||
|
||||
/** \defgroup symbol_versions_group Symbol Versions
|
||||
*
|
||||
* The names used for the shared library versioned symbols.
|
||||
*
|
||||
* Every function is annotated with one of the version macros defined in this
|
||||
* section. Each macro specifies a corresponding symbol version string. After
|
||||
* dynamically loading the shared library with \p dlopen, the address of each
|
||||
* function can be obtained using \p dlvsym with the name of the function and
|
||||
* its corresponding symbol version string. An error will be reported by \p
|
||||
* dlvsym if the installed library does not support the version for the
|
||||
* function specified in this version of the interface.
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* The function was introduced in version 4.1 of the interface and has the
|
||||
* symbol version string of ``"ROCTX_4.1"``.
|
||||
*/
|
||||
#define ROCTX_VERSION_4_1
|
||||
|
||||
/** @} */
|
||||
|
||||
/** \defgroup versioning_group Versioning
|
||||
*
|
||||
* Version information about the interface and the associated installed
|
||||
* library.
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* The semantic version of the interface following
|
||||
* [semver.org][semver] rules.
|
||||
*
|
||||
* A client that uses this interface is only compatible with the installed
|
||||
* library if the major version numbers match and the interface minor version
|
||||
* number is less than or equal to the installed library minor version number.
|
||||
*/
|
||||
|
||||
/**
|
||||
* The major version of the interface as a macro so it can be used by the
|
||||
* preprocessor.
|
||||
*/
|
||||
#define ROCTX_VERSION_MAJOR 4
|
||||
|
||||
/**
|
||||
* The minor version of the interface as a macro so it can be used by the
|
||||
* preprocessor.
|
||||
*/
|
||||
#define ROCTX_VERSION_MINOR 1
|
||||
|
||||
/**
|
||||
* Query the major version of the installed library.
|
||||
*
|
||||
* Return the major version of the installed library. This can be used to check
|
||||
* if it is compatible with this interface version.
|
||||
*
|
||||
* \return Returns the major version number.
|
||||
*/
|
||||
ROCTX_API uint32_t roctx_version_major() ROCTX_VERSION_4_1;
|
||||
|
||||
/**
|
||||
* Query the minor version of the installed library.
|
||||
*
|
||||
* Return the minor version of the installed library. This can be used to check
|
||||
* if it is compatible with this interface version.
|
||||
*
|
||||
* \return Returns the minor version number.
|
||||
*/
|
||||
ROCTX_API uint32_t roctx_version_minor() ROCTX_VERSION_4_1;
|
||||
|
||||
/** @} */
|
||||
|
||||
/** \defgroup marker_group ROCTX Markers
|
||||
*
|
||||
* Marker annotations are used to describe events in a ROCm application.
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* Mark an event.
|
||||
*
|
||||
* \param[in] message The message associated with the event.
|
||||
*/
|
||||
ROCTX_API void roctxMarkA(const char* message) ROCTX_VERSION_4_1;
|
||||
#define roctxMark(message) roctxMarkA(message)
|
||||
|
||||
/** @} */
|
||||
|
||||
/** \defgroup range_group ROCTX Ranges
|
||||
*
|
||||
* Range annotations are used to describe events in a ROCm application.
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* Start a new nested range.
|
||||
*
|
||||
* Nested ranges are stacked and local to the current CPU thread.
|
||||
*
|
||||
* \param[in] message The message associated with this range.
|
||||
*
|
||||
* \return Returns the level this nested range is started at. Nested range
|
||||
* levels are 0 based.
|
||||
*/
|
||||
ROCTX_API int roctxRangePushA(const char* message) ROCTX_VERSION_4_1;
|
||||
#define roctxRangePush(message) roctxRangePushA(message)
|
||||
|
||||
/**
|
||||
* Stop the current nested range.
|
||||
*
|
||||
* Stop the current nested range, and pop it from the stack. If a nested range
|
||||
* was active before the last one was started, it becomes again the current
|
||||
* nested range.
|
||||
*
|
||||
* \return Returns the level the stopped nested range was started at, or a
|
||||
* negative value if there was no nested range active.
|
||||
*/
|
||||
ROCTX_API int roctxRangePop() ROCTX_VERSION_4_1;
|
||||
|
||||
/**
|
||||
* ROCTX range ID.
|
||||
*
|
||||
* This is the range ID used to identify start/end ranges.
|
||||
*/
|
||||
typedef uint64_t roctx_range_id_t;
|
||||
|
||||
/**
|
||||
* Starts a process range.
|
||||
*
|
||||
* Start/stop ranges can be started and stopped in different threads. Each
|
||||
* timespan is assigned a unique range ID.
|
||||
*
|
||||
* \param[in] message The message associated with this range.
|
||||
*
|
||||
* \return Returns the ID of the new range.
|
||||
*/
|
||||
ROCTX_API roctx_range_id_t roctxRangeStartA(const char* message)
|
||||
ROCTX_VERSION_4_1;
|
||||
#define roctxRangeStart(message) roctxRangeStartA(message)
|
||||
|
||||
/**
|
||||
* Stop a process range.
|
||||
*/
|
||||
ROCTX_API void roctxRangeStop(roctx_range_id_t id) ROCTX_VERSION_4_1;
|
||||
|
||||
/** @} */
|
||||
|
||||
#if defined(__cplusplus)
|
||||
} /* extern "C" */
|
||||
#endif /* defined (__cplusplus) */
|
||||
|
||||
#endif /* ROCTX_H_ */
|
||||
@@ -0,0 +1,23 @@
|
||||
################################################################################
|
||||
## Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
##
|
||||
## Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
## of this software and associated documentation files (the "Software"), to
|
||||
## deal in the Software without restriction, including without limitation the
|
||||
## rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
## sell copies of the Software, and to permit persons to whom the Software is
|
||||
## furnished to do so, subject to the following conditions:
|
||||
##
|
||||
## The above copyright notice and this permission notice shall be included in
|
||||
## all copies or substantial portions of the Software.
|
||||
##
|
||||
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
## FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
## IN THE SOFTWARE.
|
||||
################################################################################
|
||||
|
||||
add_subdirectory(file)
|
||||
@@ -0,0 +1,7 @@
|
||||
{
|
||||
global: roctracer_plugin_initialize;
|
||||
roctracer_plugin_finalize;
|
||||
roctracer_plugin_write_callback_record;
|
||||
roctracer_plugin_write_activity_records;
|
||||
local: *;
|
||||
};
|
||||
@@ -0,0 +1,55 @@
|
||||
################################################################################
|
||||
## Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
##
|
||||
## Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
## of this software and associated documentation files (the "Software"), to
|
||||
## deal in the Software without restriction, including without limitation the
|
||||
## rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
## sell copies of the Software, and to permit persons to whom the Software is
|
||||
## furnished to do so, subject to the following conditions:
|
||||
##
|
||||
## The above copyright notice and this permission notice shall be included in
|
||||
## all copies or substantial portions of the Software.
|
||||
##
|
||||
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
## FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
## IN THE SOFTWARE.
|
||||
################################################################################
|
||||
|
||||
find_package(amd_comgr REQUIRED CONFIG
|
||||
PATHS
|
||||
/opt/rocm/
|
||||
PATH_SUFFIXES
|
||||
lib/cmake/amd_comgr
|
||||
)
|
||||
MESSAGE(STATUS "Code Object Manager found at ${amd_comgr_DIR}.")
|
||||
|
||||
file(GLOB FILE_SOURCES "*.cpp")
|
||||
add_library(file_plugin ${LIBRARY_TYPE} ${FILE_SOURCES})
|
||||
|
||||
set_target_properties(file_plugin PROPERTIES
|
||||
CXX_VISIBILITY_PRESET hidden
|
||||
LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/../exportmap
|
||||
LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}
|
||||
INSTALL_RPATH "${ROCM_APPEND_PRIVLIB_RPATH}")
|
||||
|
||||
target_compile_definitions(file_plugin
|
||||
PRIVATE HIP_PROF_HIP_API_STRING=1 __HIP_PLATFORM_AMD__=1)
|
||||
|
||||
target_include_directories(file_plugin PRIVATE ${PROJECT_SOURCE_DIR}/inc)
|
||||
|
||||
target_link_options(file_plugin PRIVATE -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../exportmap -Wl,--no-undefined)
|
||||
|
||||
target_link_libraries(file_plugin PRIVATE util roctracer amd_comgr hsa-runtime64::hsa-runtime64 stdc++fs amd_comgr)
|
||||
|
||||
install(TARGETS file_plugin LIBRARY
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/${PROJECT_NAME}
|
||||
COMPONENT runtime)
|
||||
|
||||
install(TARGETS file_plugin LIBRARY
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/${PROJECT_NAME}
|
||||
COMPONENT asan)
|
||||
@@ -0,0 +1,399 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "debug.h"
|
||||
|
||||
#include <roctracer_ext.h>
|
||||
#include <roctracer_hip.h>
|
||||
#include <roctracer_hsa.h>
|
||||
#include <roctracer_plugin.h>
|
||||
#include <roctracer_roctx.h>
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <experimental/filesystem>
|
||||
#include <fstream>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <ostream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
#include <amd_comgr/amd_comgr.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
#include <cassert>
|
||||
|
||||
// Macro to check ROCtracer calls status
|
||||
#define CHECK_ROCTRACER(call) \
|
||||
do { \
|
||||
if ((call) != 0) fatal("%s", roctracer_error_string()); \
|
||||
} while (false)
|
||||
|
||||
namespace fs = std::experimental::filesystem;
|
||||
|
||||
namespace {
|
||||
|
||||
uint32_t GetPid() {
|
||||
static uint32_t pid = syscall(__NR_getpid);
|
||||
return pid;
|
||||
}
|
||||
|
||||
/* The function extracts the kernel name from
|
||||
input string. By using the iterators it finds the
|
||||
window in the string which contains only the kernel name.
|
||||
For example 'Foo<int, float>::foo(a[], int (int))' -> 'foo'*/
|
||||
std::string truncate_name(const std::string& name) {
|
||||
auto rit = name.rbegin();
|
||||
auto rend = name.rend();
|
||||
uint32_t counter = 0;
|
||||
char open_token = 0;
|
||||
char close_token = 0;
|
||||
while (rit != rend) {
|
||||
if (counter == 0) {
|
||||
switch (*rit) {
|
||||
case ')':
|
||||
counter = 1;
|
||||
open_token = ')';
|
||||
close_token = '(';
|
||||
break;
|
||||
case '>':
|
||||
counter = 1;
|
||||
open_token = '>';
|
||||
close_token = '<';
|
||||
break;
|
||||
case ']':
|
||||
counter = 1;
|
||||
open_token = ']';
|
||||
close_token = '[';
|
||||
break;
|
||||
case ' ':
|
||||
++rit;
|
||||
continue;
|
||||
}
|
||||
if (counter == 0) break;
|
||||
} else {
|
||||
if (*rit == open_token) counter++;
|
||||
if (*rit == close_token) counter--;
|
||||
}
|
||||
++rit;
|
||||
}
|
||||
auto rbeg = rit;
|
||||
while ((rit != rend) && (*rit != ' ') && (*rit != ':')) rit++;
|
||||
return name.substr(rend - rit, rit - rbeg);
|
||||
}
|
||||
|
||||
#define amd_comgr_(call) \
|
||||
do { \
|
||||
if (amd_comgr_status_t status = amd_comgr_##call; status != AMD_COMGR_STATUS_SUCCESS) { \
|
||||
const char* reason = ""; \
|
||||
amd_comgr_status_string(status, &reason); \
|
||||
fatal(#call " failed: %s", reason); \
|
||||
} \
|
||||
} while (false)
|
||||
|
||||
// C++ symbol demangle
|
||||
std::string cxx_demangle(const std::string& symbol) {
|
||||
amd_comgr_data_t mangled_data;
|
||||
amd_comgr_(create_data(AMD_COMGR_DATA_KIND_BYTES, &mangled_data));
|
||||
amd_comgr_(set_data(mangled_data, symbol.size(), symbol.data()));
|
||||
|
||||
amd_comgr_data_t demangled_data;
|
||||
amd_comgr_(demangle_symbol_name(mangled_data, &demangled_data));
|
||||
|
||||
size_t demangled_size = 0;
|
||||
amd_comgr_(get_data(demangled_data, &demangled_size, nullptr));
|
||||
|
||||
std::string demangled_str;
|
||||
demangled_str.resize(demangled_size);
|
||||
amd_comgr_(get_data(demangled_data, &demangled_size, demangled_str.data()));
|
||||
|
||||
amd_comgr_(release_data(mangled_data));
|
||||
amd_comgr_(release_data(demangled_data));
|
||||
return demangled_str;
|
||||
}
|
||||
|
||||
class file_plugin_t {
|
||||
private:
|
||||
class output_file_t {
|
||||
public:
|
||||
output_file_t(std::string name) : name_(std::move(name)) {}
|
||||
|
||||
std::string name() const { return name_; }
|
||||
|
||||
template <typename T> std::ostream& operator<<(T&& value) {
|
||||
if (!is_open()) open();
|
||||
return stream_ << std::forward<T>(value);
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& (*func)(std::ostream&)) {
|
||||
if (!is_open()) open();
|
||||
return stream_ << func;
|
||||
}
|
||||
|
||||
void open() {
|
||||
// If the stream is already in the failed state, there's no need to try to open the file.
|
||||
if (fail()) return;
|
||||
|
||||
const char* output_dir = getenv("ROCP_OUTPUT_DIR");
|
||||
|
||||
if (output_dir == nullptr) {
|
||||
stream_.copyfmt(std::cout);
|
||||
stream_.clear(std::cout.rdstate());
|
||||
stream_.basic_ios<char>::rdbuf(std::cout.rdbuf());
|
||||
return;
|
||||
}
|
||||
|
||||
fs::path output_prefix(output_dir);
|
||||
if (!fs::is_directory(fs::status(output_prefix))) {
|
||||
if (!stream_.fail()) warning("Cannot open output directory '%s'", output_dir);
|
||||
stream_.setstate(std::ios_base::failbit);
|
||||
return;
|
||||
}
|
||||
|
||||
std::stringstream ss;
|
||||
ss << GetPid() << "_" << name_;
|
||||
stream_.open(output_prefix / ss.str());
|
||||
}
|
||||
|
||||
bool is_open() const { return stream_.is_open(); }
|
||||
bool fail() const { return stream_.fail(); }
|
||||
|
||||
private:
|
||||
const std::string name_;
|
||||
std::ofstream stream_;
|
||||
};
|
||||
|
||||
output_file_t* get_output_file(uint32_t domain, uint32_t op = 0) {
|
||||
switch (domain) {
|
||||
case ACTIVITY_DOMAIN_ROCTX:
|
||||
return &roctx_file_;
|
||||
case ACTIVITY_DOMAIN_HSA_API:
|
||||
return &hsa_api_file_;
|
||||
case ACTIVITY_DOMAIN_HIP_API:
|
||||
return &hip_api_file_;
|
||||
case ACTIVITY_DOMAIN_HIP_OPS:
|
||||
return &hip_activity_file_;
|
||||
case ACTIVITY_DOMAIN_HSA_OPS:
|
||||
if (op == HSA_OP_ID_COPY) {
|
||||
return &hsa_async_copy_file_;
|
||||
} else if (op == HSA_OP_ID_RESERVED1) {
|
||||
return &pc_sample_file_;
|
||||
}
|
||||
default:
|
||||
assert(!"domain/op not supported!");
|
||||
break;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
public:
|
||||
file_plugin_t() {
|
||||
// Dumping HSA handles for agents
|
||||
output_file_t hsa_handles("hsa_handles.txt");
|
||||
|
||||
[[maybe_unused]] hsa_status_t status = hsa_iterate_agents(
|
||||
[](hsa_agent_t agent, void* user_data) {
|
||||
auto* file = static_cast<decltype(hsa_handles)*>(user_data);
|
||||
hsa_device_type_t type;
|
||||
|
||||
if (hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &type) != HSA_STATUS_SUCCESS)
|
||||
return HSA_STATUS_ERROR;
|
||||
|
||||
*file << std::hex << std::showbase << agent.handle << " agent "
|
||||
<< ((type == HSA_DEVICE_TYPE_CPU) ? "cpu" : "gpu") << "\n";
|
||||
return HSA_STATUS_SUCCESS;
|
||||
},
|
||||
&hsa_handles);
|
||||
assert(status == HSA_STATUS_SUCCESS && "failed to iterate HSA agents");
|
||||
if (hsa_handles.fail()) {
|
||||
warning("Cannot write to '%s'", hsa_handles.name().c_str());
|
||||
return;
|
||||
}
|
||||
|
||||
// App begin timestamp begin_ts_file.txt
|
||||
output_file_t begin_ts("begin_ts_file.txt");
|
||||
|
||||
roctracer_timestamp_t app_begin_timestamp;
|
||||
CHECK_ROCTRACER(roctracer_get_timestamp(&app_begin_timestamp));
|
||||
begin_ts << std::dec << app_begin_timestamp << "\n";
|
||||
if (begin_ts.fail()) {
|
||||
warning("Cannot write to '%s'", begin_ts.name().c_str());
|
||||
return;
|
||||
}
|
||||
|
||||
valid_ = true;
|
||||
}
|
||||
|
||||
int write_callback_record(const roctracer_record_t* record, const void* callback_data) {
|
||||
std::stringstream ss;
|
||||
output_file_t* output_file{nullptr};
|
||||
switch (record->domain) {
|
||||
case ACTIVITY_DOMAIN_ROCTX: {
|
||||
const roctx_api_data_t* data = reinterpret_cast<const roctx_api_data_t*>(callback_data);
|
||||
output_file = get_output_file(ACTIVITY_DOMAIN_ROCTX);
|
||||
ss << std::dec << record->begin_ns << " " << record->process_id << ":" << record->thread_id
|
||||
<< " " << record->op << ":" << data->args.id << ":\""
|
||||
<< (data->args.message ? data->args.message : "") << "\""
|
||||
<< "\n";
|
||||
*output_file << ss.str();
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HSA_API: {
|
||||
const hsa_api_data_t* data = reinterpret_cast<const hsa_api_data_t*>(callback_data);
|
||||
output_file = get_output_file(ACTIVITY_DOMAIN_HSA_API);
|
||||
ss << std::dec << record->begin_ns << ":"
|
||||
<< ((record->op == HSA_API_ID_hsa_shut_down) ? record->begin_ns : record->end_ns) << " "
|
||||
<< record->process_id << ":" << record->thread_id << " "
|
||||
<< hsa_api_data_pair_t(record->op, *data) << " :" << std::dec << data->correlation_id
|
||||
<< "\n";
|
||||
*output_file << ss.str();
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HIP_API: {
|
||||
const hip_api_data_t* data = reinterpret_cast<const hip_api_data_t*>(callback_data);
|
||||
|
||||
std::string kernel_name;
|
||||
if (record->kernel_name) {
|
||||
static bool truncate = []() {
|
||||
const char* env_var = getenv("ROCP_TRUNCATE_NAMES");
|
||||
return env_var && std::atoi(env_var) != 0;
|
||||
}();
|
||||
kernel_name = cxx_demangle(record->kernel_name);
|
||||
if (truncate) kernel_name = truncate_name(kernel_name);
|
||||
kernel_name = " kernel=" + kernel_name;
|
||||
}
|
||||
|
||||
output_file = get_output_file(ACTIVITY_DOMAIN_HIP_API);
|
||||
ss << std::dec << record->begin_ns << ":" << record->end_ns << " " << record->process_id
|
||||
<< ":" << record->thread_id << " " << hipApiString((hip_api_id_t)record->op, data)
|
||||
<< kernel_name << " :" << std::dec << data->correlation_id << "\n";
|
||||
*output_file << ss.str();
|
||||
break;
|
||||
}
|
||||
default:
|
||||
warning("write_callback_record: ignored record for domain %d", record->domain);
|
||||
break;
|
||||
}
|
||||
|
||||
return (output_file && output_file->fail()) ? -1 : 0;
|
||||
}
|
||||
|
||||
int write_activity_records(const roctracer_record_t* begin, const roctracer_record_t* end) {
|
||||
while (begin != end) {
|
||||
std::stringstream ss;
|
||||
output_file_t* output_file{nullptr};
|
||||
const char* name = roctracer_op_string(begin->domain, begin->op, begin->kind);
|
||||
|
||||
switch (begin->domain) {
|
||||
case ACTIVITY_DOMAIN_HIP_OPS: {
|
||||
// The post-processing script cannot handle HIP ops without a correlation ID. The
|
||||
// correlation ID is needed to connect the record to a HIP stream and originating thread.
|
||||
// The script could be modified to handle ops without correlation IDs, but for backward
|
||||
// compatibilty, we are simply dropping the records here.
|
||||
if (begin->correlation_id == 0) break;
|
||||
|
||||
output_file = get_output_file(ACTIVITY_DOMAIN_HIP_OPS);
|
||||
ss << std::dec << begin->begin_ns << ":" << begin->end_ns << " " << begin->device_id
|
||||
<< ":" << begin->queue_id << " "
|
||||
<< ((begin->op == HIP_OP_ID_DISPATCH && begin->kernel_name != nullptr)
|
||||
? truncate_name(cxx_demangle(begin->kernel_name))
|
||||
: name)
|
||||
<< ":" << begin->correlation_id << ":" << GetPid() << "\n";
|
||||
*output_file << ss.str();
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HSA_OPS:
|
||||
output_file = get_output_file(ACTIVITY_DOMAIN_HSA_OPS, begin->op);
|
||||
if (begin->op == HSA_OP_ID_COPY) {
|
||||
ss << std::dec << begin->begin_ns << ":" << begin->end_ns
|
||||
<< " async-copy:" << begin->correlation_id << ":" << GetPid() << "\n";
|
||||
*output_file << ss.str();
|
||||
break;
|
||||
} else if (begin->op == HSA_OP_ID_RESERVED1) {
|
||||
ss << std::dec << begin->pc_sample.se << " " << begin->pc_sample.cycle << " "
|
||||
<< std::hex << std::showbase << begin->pc_sample.pc << " " << name << "\n";
|
||||
*output_file << ss.str();
|
||||
break;
|
||||
}
|
||||
[[fallthrough]];
|
||||
default: {
|
||||
warning("write_activity_records: ignored activity for domain %d", begin->domain);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (output_file && output_file->fail()) return -1;
|
||||
CHECK_ROCTRACER(roctracer_next_record(begin, &begin));
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool is_valid() const { return valid_; }
|
||||
|
||||
private:
|
||||
bool valid_{false};
|
||||
|
||||
output_file_t roctx_file_{"roctx_trace.txt"}, hsa_api_file_{"hsa_api_trace.txt"},
|
||||
hip_api_file_{"hip_api_trace.txt"}, hip_activity_file_{"hcc_ops_trace.txt"},
|
||||
hsa_async_copy_file_{"async_copy_trace.txt"}, pc_sample_file_{"pcs_trace.txt"};
|
||||
};
|
||||
|
||||
file_plugin_t* file_plugin = nullptr;
|
||||
|
||||
} // namespace
|
||||
|
||||
ROCTRACER_EXPORT int roctracer_plugin_initialize(uint32_t roctracer_major_version,
|
||||
uint32_t roctracer_minor_version) {
|
||||
if (roctracer_major_version != ROCTRACER_VERSION_MAJOR ||
|
||||
roctracer_minor_version < ROCTRACER_VERSION_MINOR)
|
||||
return -1;
|
||||
|
||||
if (file_plugin != nullptr) return -1;
|
||||
|
||||
file_plugin = new file_plugin_t();
|
||||
if (file_plugin->is_valid()) return 0;
|
||||
|
||||
// The plugin failed to initialied, destroy it and return an error.
|
||||
delete file_plugin;
|
||||
file_plugin = nullptr;
|
||||
return -1;
|
||||
}
|
||||
|
||||
ROCTRACER_EXPORT void roctracer_plugin_finalize() {
|
||||
if (!file_plugin) return;
|
||||
delete file_plugin;
|
||||
file_plugin = nullptr;
|
||||
}
|
||||
|
||||
ROCTRACER_EXPORT int roctracer_plugin_write_callback_record(const roctracer_record_t* record,
|
||||
const void* callback_data) {
|
||||
if (!file_plugin || !file_plugin->is_valid()) return -1;
|
||||
return file_plugin->write_callback_record(record, callback_data);
|
||||
}
|
||||
|
||||
ROCTRACER_EXPORT int roctracer_plugin_write_activity_records(const roctracer_record_t* begin,
|
||||
const roctracer_record_t* end) {
|
||||
if (!file_plugin || !file_plugin->is_valid()) return -1;
|
||||
return file_plugin->write_activity_records(begin, end);
|
||||
}
|
||||
@@ -0,0 +1,37 @@
|
||||
#!/bin/bash
|
||||
|
||||
################################################################################
|
||||
# Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to
|
||||
# deal in the Software without restriction, including without limitation the
|
||||
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
# sell copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
################################################################################
|
||||
|
||||
ROCM_PATH="${ROCM_PATH:=/opt/rocm}"
|
||||
|
||||
fatal() {
|
||||
echo "$1"
|
||||
exit 1
|
||||
}
|
||||
|
||||
if [ -z "$BUILD_DIR" ] ; then export BUILD_DIR=$PWD; fi
|
||||
|
||||
cd $BUILD_DIR
|
||||
./run.sh
|
||||
|
||||
exit 0
|
||||
@@ -0,0 +1,320 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
################################################################################
|
||||
# Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to
|
||||
# deal in the Software without restriction, including without limitation the
|
||||
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
# sell copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
################################################################################
|
||||
|
||||
import sys, os, re
|
||||
import filecmp
|
||||
import argparse
|
||||
|
||||
events_count = {}
|
||||
events_order = {}
|
||||
events_order_r = {}
|
||||
trace2info = {}
|
||||
trace2info_filename = 'test/golden_traces/tests_trace_cmp_levels.txt'
|
||||
|
||||
# Parses trace comparison config file and stores the info in a dictionary
|
||||
def parse_trace_levels(trace_config_filename, check_trace_flag):
|
||||
status = 0
|
||||
f = open(trace_config_filename)
|
||||
trace2info = {}
|
||||
for line in f:
|
||||
if check_trace_flag == 0:
|
||||
return (trace2info, status)
|
||||
if (check_trace_flag == None) and re.match('^# dummy',line):
|
||||
return (trace2info, status)
|
||||
status = 1
|
||||
lis = line.split(' ')
|
||||
trace_name = lis[0]
|
||||
comp_level = lis[1]
|
||||
no_events_cnt = ''
|
||||
events2ignore = ''
|
||||
events2chkcnt = ''
|
||||
events2chkord = ''
|
||||
events2ch = ''
|
||||
for l in lis:
|
||||
if no_events_cnt == ' ':
|
||||
no_events_cnt = l
|
||||
if events2ignore == ' ':
|
||||
events2ignore = l
|
||||
if events2chkcnt == ' ':
|
||||
events2chkcnt = l
|
||||
if events2chkord == ' ':
|
||||
events2chkord = l
|
||||
if events2ch == ' ':
|
||||
events2ch = l
|
||||
events2chkcnt = l
|
||||
no_events_cnt = l
|
||||
if l == '--ignore-count':
|
||||
no_events_cnt = ' '
|
||||
if l == '--ignore-event':
|
||||
events2ignore = ' '
|
||||
if l == '--check-count':
|
||||
events2chkcnt = ' '
|
||||
if l == '--check-order':
|
||||
events2chkord = ' '
|
||||
if l == '--check-events':
|
||||
events2ch = ' '
|
||||
|
||||
trace2info[trace_name] = (comp_level,no_events_cnt,events2ignore,events2chkcnt,events2chkord,events2ch)
|
||||
|
||||
return (trace2info, status)
|
||||
|
||||
# diff multi lines strings to show events differences
|
||||
def diff_strings(cnt_r, cnt, metric):
|
||||
global events_order_r
|
||||
global events_order
|
||||
|
||||
print ("\nDiffs (if any):\n")
|
||||
if metric == 'cnt':
|
||||
evt_ptrn = re.compile(r'(\w+).*$')
|
||||
#cnt_ptrn = re.compile(r'(\w+): count (\d+)$')
|
||||
for evt in cnt_r.split('\n'):
|
||||
mevt_ptrn = evt_ptrn.match(evt)
|
||||
#mcnt_ptrn = cnt_ptrn.match(evt)
|
||||
if mevt_ptrn:
|
||||
if not re.search(mevt_ptrn.group(1), cnt):
|
||||
print ('+ ' + evt)
|
||||
elif not re.search(evt, cnt):
|
||||
print ('>D< ' + evt)
|
||||
|
||||
for evt in cnt.split('\n'):
|
||||
mevt_ptrn = evt_ptrn.match(evt)
|
||||
#mcnt_ptrn = cnt_ptrn.match(evt)
|
||||
if mevt_ptrn:
|
||||
if not re.search(mevt_ptrn.group(1), cnt_r):
|
||||
print ('- ' + evt)
|
||||
if metric == 'or':
|
||||
cnt_tid_r = 0
|
||||
for tid_r in sorted (events_order_r.keys()):
|
||||
if len(events_order) == 0:
|
||||
print ("+ " + str(events_order_r[tid_r]) + "\n\n")
|
||||
continue
|
||||
cnt_tid = 0
|
||||
for tid in sorted (events_order.keys()):
|
||||
if cnt_tid == cnt_tid_r:
|
||||
if events_order_r[tid_r] != events_order[tid]:
|
||||
#print (">D< " + str(events_order_r[tid_r]) + "\n")
|
||||
#print (">D< " + str(events_order[tid]) + "\n\n")
|
||||
diff_cnt_r = 0
|
||||
found_diff_evt = 0
|
||||
for evt in events_order_r[tid_r]:
|
||||
diff_cnt = 0
|
||||
for evt2 in events_order[tid]:
|
||||
if diff_cnt == diff_cnt_r:
|
||||
if evt != evt2:
|
||||
print (">I< Difference starts at tid rank: " + str(cnt_tid) + " event index: " + str(diff_cnt_r) + ", tid_r " + str(tid_r) + ", tid " + str(tid) + ", with evts " + evt + " and " + evt2 + "\n")
|
||||
found_diff_evt = 1
|
||||
break
|
||||
diff_cnt += 1
|
||||
diff_cnt_r += 1
|
||||
if found_diff_evt: break
|
||||
if len(events_order_r[tid_r]) != len(events_order[tid]) and found_diff_evt == 0:
|
||||
print (">I< Difference starts at tid rank: " + str(cnt_tid) + " event index: " + str(min(len(events_order_r[tid_r]), len(events_order[tid]))) + ", with missing evts\n")
|
||||
break
|
||||
cnt_tid += 1
|
||||
cnt_tid_r += 1
|
||||
if len(events_order_r) == 0:
|
||||
for tid in sorted (events_order.keys()):
|
||||
print ("- " + str(events_order[tid]) + "\n")
|
||||
|
||||
# check trace againt golden reference and returns 0 for pass, 1 for fail
|
||||
def check_trace_status(tracename, verbose, check_trace_flag):
|
||||
global events_order_r
|
||||
global events_order
|
||||
|
||||
(trace2info, status) = parse_trace_levels(trace2info_filename, check_trace_flag)
|
||||
|
||||
if len(trace2info) == 0:
|
||||
if status == 1:
|
||||
print ("Error: no trace comparison info found in config file " + trace2info_filename + "\n")
|
||||
print('FAILED!')
|
||||
return 1
|
||||
if status == 0:
|
||||
print('PASSED!')
|
||||
return 0
|
||||
|
||||
trace = 'test/golden_traces/' + tracename + '.txt'
|
||||
rtrace = '/tmp/test/out/' + tracename + '.out'
|
||||
if os.path.basename(tracename) in trace2info.keys():
|
||||
(trace_level, no_events_cnt, events2ignore, events2chkcnt, events2chkord, events2ch) = trace2info[os.path.basename(tracename)]
|
||||
trace_level = trace_level.rstrip('\n')
|
||||
no_events_cnt = no_events_cnt.rstrip('\n')
|
||||
events2ignore = events2ignore.rstrip('\n')
|
||||
events2chkcnt = events2chkcnt.rstrip('\n')
|
||||
events2chkord = events2chkord.rstrip('\n')
|
||||
events2ch = events2ch.rstrip('\n')
|
||||
else:
|
||||
print('Trace ' + os.path.basename(tracename) + ' not found in ' + trace2info_filename)
|
||||
print('FAILED!')
|
||||
return 1
|
||||
|
||||
if no_events_cnt == '':
|
||||
no_events_cnt = 'empty-regex'
|
||||
if events2ignore == '':
|
||||
events2ignore = 'empty-regex'
|
||||
if events2chkcnt == '':
|
||||
events2chkcnt = ''
|
||||
if events2chkord == '':
|
||||
events2chkord = ''
|
||||
|
||||
if trace_level == '--check-none':
|
||||
print('PASSED!')
|
||||
return 0
|
||||
|
||||
if trace_level == '--check-diff':
|
||||
if filecmp.cmp(trace,rtrace):
|
||||
print('PASSED!')
|
||||
return 0
|
||||
else:
|
||||
print('FAILED!')
|
||||
os.system('/usr/bin/diff --brief ' + trace + ' ' + rtrace)
|
||||
return 1
|
||||
|
||||
metric = ''
|
||||
if trace_level == '--check-count' or trace_level == '--check-events':
|
||||
metric = 'cnt'
|
||||
if trace_level == '--check-order':
|
||||
metric = 'or'
|
||||
|
||||
cnt_r = gen_events_info(rtrace,trace_level,no_events_cnt,events2ignore,events2chkcnt,events2chkord,verbose)
|
||||
events_order_r = {}
|
||||
for tid in sorted (events_order.keys()) :
|
||||
events_order_r[tid] = events_order[tid]
|
||||
cnt = gen_events_info(trace,trace_level,no_events_cnt,events2ignore,events2chkcnt,events2chkord,verbose)
|
||||
if verbose:
|
||||
print ('\n' + rtrace + ':\n')
|
||||
print (cnt_r)
|
||||
print ('\n' + trace + ':\n')
|
||||
print (cnt)
|
||||
diff_strings(cnt_r, cnt, metric)
|
||||
|
||||
if cnt_r == cnt:
|
||||
print('PASSED!')
|
||||
return 0
|
||||
else:
|
||||
print('FAILED!')
|
||||
return 1
|
||||
|
||||
# Parses roctracer trace file for regression purpose
|
||||
# and generates events count per event (when cnt is on) or events order per tid (when order is on)
|
||||
def gen_events_info(tracefile, trace_level, no_events_cnt, events2ignore, events2chkcnt, events2chkord, verbose):
|
||||
global events_order
|
||||
metric = ''
|
||||
if trace_level == '--check-count' or trace_level == '--check-events':
|
||||
metric = 'cnt'
|
||||
if trace_level == '--check-order':
|
||||
metric = 'or'
|
||||
|
||||
events_count = {}
|
||||
events_order = {}
|
||||
res = ''
|
||||
re_no_events_cnt = r'{}'.format(no_events_cnt)
|
||||
re_events2ignore = r'{}'.format(events2ignore)
|
||||
re_events2chkcnt = r'{}'.format(events2chkcnt)
|
||||
re_events2chkord = r'{}'.format(events2chkord)
|
||||
|
||||
test_act_pattern = re.compile(r'\s*(\w+)\s+.*_id\((\d+)\)$')
|
||||
#' hipSetDevice correlation_id(1) time_ns(1548622357525055:1548622357542015) process_id(126283) thread_id(126283)'
|
||||
#' hcCommandKernel correlation_id(6) time_ns(1548622661443020:1548622662666935) device_id(0) queue_id(0)'
|
||||
test_api_cb_pattern = re.compile(r'.*<(\w+)\s+.*tid\((\d+)\)>')
|
||||
# <hsaKmtGetVersion id(2) correlation_id(0) on-enter pid(26224) tid(26224)>
|
||||
# below is roctx pattern
|
||||
# <hipLaunchKernel pid(123) tid(123)>
|
||||
tool_record = re.compile(r'\d+:\d+\s+\d+:(\d+)\s+(\w+)')
|
||||
# tool_api_record
|
||||
# 1822810364769411:1822810364771941 116477:116477 hsa_agent_get_info(<agent 0x8990e0>, 17, 0x7ffeac015fec) = 0
|
||||
# tool_gpu_act_record
|
||||
# 3632773658039902:3632773658046462 0:0 hcCommandMarker:273
|
||||
roctx_record = re.compile(r'\d+\s\d+:(\d)+\s(\d):\d+:\".*\"')
|
||||
|
||||
with open(tracefile) as f:
|
||||
for line in f:
|
||||
if re.search("before", line) or re.search("after",line):#roctx before/after not real events
|
||||
continue
|
||||
line=line.rstrip('\n')
|
||||
event = ''
|
||||
test_act_pattern_match = test_act_pattern.match(line)
|
||||
if test_act_pattern_match:
|
||||
event = test_act_pattern_match.group(1)
|
||||
tid = int(test_act_pattern_match.group(2))
|
||||
test_api_cb_pattern_match = test_api_cb_pattern.match(line)
|
||||
if test_api_cb_pattern_match:
|
||||
event = test_api_cb_pattern_match.group(1)
|
||||
tid = int(test_api_cb_pattern_match.group(2))
|
||||
tool_record_match = tool_record.match(line)
|
||||
if tool_record_match:
|
||||
event = tool_record_match.group(2)
|
||||
tid = int(tool_record_match.group(1))
|
||||
roctx_record_match = roctx_record.match(line)
|
||||
if roctx_record_match:
|
||||
event = roctx_record_match.group(2)
|
||||
tid = int(roctx_record_match.group(1))
|
||||
if event == '' or event == '(null)': #some traces has these null events
|
||||
continue
|
||||
|
||||
if re.search(re_events2ignore,event):
|
||||
continue
|
||||
|
||||
if metric == 'cnt' and re.search(re_events2chkcnt,event):
|
||||
if event in events_count:
|
||||
events_count[event] = events_count[event] + 1
|
||||
else:
|
||||
if not re.search(re_no_events_cnt,event):
|
||||
events_count[event] = 1
|
||||
|
||||
if metric == 'or' and re.search(re_events2chkord,event):
|
||||
if tid in events_order.keys():
|
||||
if re.search(re_no_events_cnt,event):
|
||||
if event != events_order[tid][-1]: #Add event only if it is not last event in the list
|
||||
events_order[tid].append(event)
|
||||
else:
|
||||
events_order[tid].append(event)
|
||||
else:
|
||||
events_order[tid] = [event]
|
||||
if metric == 'cnt':
|
||||
for event,count in events_count.items():
|
||||
if re.search(re_no_events_cnt,event):
|
||||
res = res + event + '\n'
|
||||
else:
|
||||
res = res + event + " : count " + str(count) + '\n'
|
||||
if metric == 'or':
|
||||
for tid in sorted (events_order.keys()) :
|
||||
res = res + str(events_order[tid])
|
||||
if metric == 'cnt':
|
||||
newres = res.split('\n')
|
||||
newres = sorted(newres)
|
||||
res = str(newres)
|
||||
return res
|
||||
|
||||
parser = argparse.ArgumentParser(description='check_trace.py: check a trace aainst golden ref. Returns 0 for success, 1 for failure')
|
||||
requiredNamed = parser.add_argument_group('Required arguments')
|
||||
requiredNamed.add_argument('-in', metavar='file', help='Name of trace to be checked', required=True)
|
||||
requiredNamed.add_argument('-v', action='store_true', help='debug info', required=False)
|
||||
requiredNamed.add_argument('-ck', metavar='N', type=int, help='check trace 0|1', required=False)
|
||||
|
||||
args = vars(parser.parse_args())
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(check_trace_status(args['in'],args['v'],args['ck']))
|
||||
|
||||
|
||||
@@ -0,0 +1,296 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
################################################################################
|
||||
# Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to
|
||||
# deal in the Software without restriction, including without limitation the
|
||||
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
# sell copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
################################################################################
|
||||
|
||||
import os, sys, re
|
||||
import CppHeaderParser
|
||||
import argparse
|
||||
import string
|
||||
|
||||
LICENSE = \
|
||||
'/*\n' + \
|
||||
'Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.\n' + \
|
||||
'\n' + \
|
||||
'Permission is hereby granted, free of charge, to any person obtaining a copy\n' + \
|
||||
'of this software and associated documentation files (the "Software"), to deal\n' + \
|
||||
'in the Software without restriction, including without limitation the rights\n' + \
|
||||
'to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n' + \
|
||||
'copies of the Software, and to permit persons to whom the Software is\n' + \
|
||||
'furnished to do so, subject to the following conditions:\n' + \
|
||||
'\n' + \
|
||||
'The above copyright notice and this permission notice shall be included in\n' + \
|
||||
'all copies or substantial portions of the Software.\n' + \
|
||||
'\n' + \
|
||||
'THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n' + \
|
||||
'IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n' + \
|
||||
'FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n' + \
|
||||
'AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n' + \
|
||||
'LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n' + \
|
||||
'OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n' + \
|
||||
'THE SOFTWARE.\n' + \
|
||||
'*/\n'
|
||||
|
||||
|
||||
header_basic = \
|
||||
'namespace detail {\n' + \
|
||||
' inline static void print_escaped_string(std::ostream& out, const char *v, size_t len) {\n' + \
|
||||
' out << \'"\'; \n' + \
|
||||
' for (size_t i = 0; i < len && v[i]; ++i) {\n' + \
|
||||
' switch (v[i]) {\n' + \
|
||||
' case \'\\"\': out << "\\\\\\""; break;\n' + \
|
||||
' case \'\\\\\': out << "\\\\\\\\"; break;\n' + \
|
||||
' case \'\\b\': out << "\\\\\\b"; break;\n' + \
|
||||
' case \'\\f\': out << "\\\\\\f"; break;\n' + \
|
||||
' case \'\\n\': out << "\\\\\\n"; break;\n' + \
|
||||
' case \'\\r\': out << "\\\\\\r"; break;\n' + \
|
||||
' case \'\\t\': out << "\\\\\\t"; break;\n' + \
|
||||
' default:\n' + \
|
||||
' if (std::isprint((unsigned char)v[i])) std::operator<<(out, v[i]);\n' + \
|
||||
' else {\n' + \
|
||||
' std::ios_base::fmtflags flags(out.flags());\n' + \
|
||||
' out << "\\\\x" << std::setfill(\'0\') << std::setw(2) << std::hex << (unsigned int)(unsigned char)v[i];\n' + \
|
||||
' out.flags(flags);\n' + \
|
||||
' }\n' + \
|
||||
' break;\n' + \
|
||||
' }\n' + \
|
||||
' }\n' + \
|
||||
' out << \'"\'; \n' + \
|
||||
' }\n' + \
|
||||
'\n' + \
|
||||
' template <typename T>\n' + \
|
||||
' inline static std::ostream& operator<<(std::ostream& out, const T& v) {\n' + \
|
||||
' using std::operator<<;\n' + \
|
||||
' static bool recursion = false;\n' + \
|
||||
' if (recursion == false) { recursion = true; out << v; recursion = false; }\n' + \
|
||||
' return out;\n }\n' + \
|
||||
'\n' + \
|
||||
' inline static std::ostream &operator<<(std::ostream &out, const unsigned char &v) {\n' + \
|
||||
' out << (unsigned int)v;\n' + \
|
||||
' return out;\n }\n' + \
|
||||
'\n' + \
|
||||
' inline static std::ostream &operator<<(std::ostream &out, const char &v) {\n' + \
|
||||
' out << (unsigned char)v;\n' + \
|
||||
' return out;\n }\n' + \
|
||||
'\n' + \
|
||||
' template <size_t N>\n' + \
|
||||
' inline static std::ostream &operator<<(std::ostream &out, const char (&v)[N]) {\n' + \
|
||||
' print_escaped_string(out, v, N);\n' + \
|
||||
' return out;\n }\n' + \
|
||||
'\n' + \
|
||||
' inline static std::ostream &operator<<(std::ostream &out, const char *v) {\n' + \
|
||||
' print_escaped_string(out, v, strlen(v));\n' + \
|
||||
' return out;\n }\n'
|
||||
|
||||
structs_analyzed = {}
|
||||
global_ops = ''
|
||||
global_str = ''
|
||||
output_filename_h = None
|
||||
apiname = ""
|
||||
|
||||
# process_struct traverses recursively all structs to extract all fields
|
||||
def process_struct(file_handle, cppHeader_struct, cppHeader, parent_hier_name, apiname):
|
||||
# file_handle: handle for output file {api_name}_ostream_ops.h to be generated
|
||||
# cppHeader_struct: cppHeader struct being processed
|
||||
# cppHeader: cppHeader object created by CppHeaderParser.CppHeader(...)
|
||||
# parent_hier_name: parent hierarchical name used for nested structs/enums
|
||||
# apiname: for example hip.
|
||||
global global_str
|
||||
|
||||
if cppHeader_struct == 'max_align_t': #function pointers not working in cppheaderparser
|
||||
return
|
||||
if cppHeader_struct not in cppHeader.classes:
|
||||
return
|
||||
if cppHeader_struct in structs_analyzed:
|
||||
return
|
||||
structs_analyzed[cppHeader_struct] = 1
|
||||
for l in reversed(range(len(cppHeader.classes[cppHeader_struct]["properties"]["public"]))):
|
||||
key = 'name'
|
||||
name = ""
|
||||
if key in cppHeader.classes[cppHeader_struct]["properties"]["public"][l]:
|
||||
if parent_hier_name != '':
|
||||
name = parent_hier_name + '.' + cppHeader.classes[cppHeader_struct]["properties"]["public"][l][key]
|
||||
else:
|
||||
name = cppHeader.classes[cppHeader_struct]["properties"]["public"][l][key]
|
||||
if name == '':
|
||||
continue
|
||||
key2 = 'type'
|
||||
mtype = ""
|
||||
if key2 in cppHeader.classes[cppHeader_struct]["properties"]["public"][l]:
|
||||
mtype = cppHeader.classes[cppHeader_struct]["properties"]["public"][l][key2]
|
||||
if mtype == '':
|
||||
continue
|
||||
key3 = 'array_size'
|
||||
array_size = ""
|
||||
if key3 in cppHeader.classes[cppHeader_struct]["properties"]["public"][l]:
|
||||
array_size = cppHeader.classes[cppHeader_struct]["properties"]["public"][l][key3]
|
||||
key4 = 'property_of_class'
|
||||
prop = ""
|
||||
if key4 in cppHeader.classes[cppHeader_struct]["properties"]["public"][l]:
|
||||
prop = cppHeader.classes[cppHeader_struct]["properties"]["public"][l][key4]
|
||||
|
||||
str = ''
|
||||
if "union" not in mtype:
|
||||
indent = ""
|
||||
str += " if (std::string(\"" + cppHeader_struct + "::" + name + "\").find(" + apiname.upper() + "_structs_regex" + ") != std::string::npos) {\n"
|
||||
indent = " "
|
||||
str += indent + " std::operator<<(out, \"" + name + "=\");\n"
|
||||
if (name == 'reserved' and apiname.upper() == 'HIP'):
|
||||
str += indent + " roctracer::" + apiname.lower() + "_support::detail::operator<<(out, 0);\n"
|
||||
else:
|
||||
str += indent + " roctracer::" + apiname.lower() + "_support::detail::operator<<(out, v." + name + ");\n"
|
||||
str += indent + " std::operator<<(out, \", \");\n"
|
||||
str += " }\n"
|
||||
if "void" not in mtype:
|
||||
global_str += str
|
||||
else:
|
||||
if prop != '':
|
||||
next_cppHeader_struct = prop + "::"
|
||||
process_struct(file_handle, next_cppHeader_struct, cppHeader, name, apiname)
|
||||
next_cppHeader_struct = prop + "::" + mtype + " "
|
||||
process_struct(file_handle, next_cppHeader_struct, cppHeader, name, apiname)
|
||||
next_cppHeader_struct = cppHeader_struct + "::"
|
||||
process_struct(file_handle, next_cppHeader_struct, cppHeader, name, apiname)
|
||||
|
||||
# Parses API header file and generates ostream ops files ostream_ops.h
|
||||
def gen_cppheader(infilepath, outfilepath, rank):
|
||||
# infilepath: API Header file to be parsed
|
||||
# outfilepath: Output file where ostream operators are written
|
||||
global global_ops
|
||||
global output_filename_h
|
||||
global apiname
|
||||
global global_str
|
||||
try:
|
||||
cppHeader = CppHeaderParser.CppHeader(infilepath)
|
||||
except CppHeaderParser.CppParseError as e:
|
||||
print(e)
|
||||
sys.exit(1)
|
||||
if rank == 0 or rank == 2:
|
||||
mpath = os.path.dirname(outfilepath)
|
||||
if mpath == "":
|
||||
mpath = os.getcwd()
|
||||
apiname = outfilepath.replace(mpath + "/","")
|
||||
output_filename_h = open(outfilepath,"w+")
|
||||
apiname = apiname.replace("_ostream_ops.h","")
|
||||
apiname = apiname.upper()
|
||||
output_filename_h.write("// automatically generated\n")
|
||||
output_filename_h.write(LICENSE + '\n')
|
||||
header_s = \
|
||||
'#ifndef INC_' + apiname + '_OSTREAM_OPS_H_\n' + \
|
||||
'#define INC_' + apiname + '_OSTREAM_OPS_H_\n' + \
|
||||
'\n'
|
||||
if apiname.upper() == 'HIP':
|
||||
header_s = \
|
||||
header_s + \
|
||||
'#include <hip/hip_runtime.h>\n' + \
|
||||
'#include <hip/hip_deprecated.h>\n'
|
||||
header_s = \
|
||||
header_s + \
|
||||
'#include "roctracer.h"\n' + \
|
||||
'\n' + \
|
||||
'#ifdef __cplusplus\n' + \
|
||||
'#include <iostream>\n' + \
|
||||
'#include <iomanip>\n' + \
|
||||
'#include <string>\n' + \
|
||||
'#include <cstring>\n'
|
||||
|
||||
output_filename_h.write(header_s)
|
||||
output_filename_h.write('\n')
|
||||
output_filename_h.write('namespace roctracer {\n')
|
||||
output_filename_h.write('namespace ' + apiname.lower() + '_support {\n')
|
||||
output_filename_h.write('static int ' + apiname.upper() + '_depth_max = 1;\n')
|
||||
output_filename_h.write('static int ' + apiname.upper() + '_depth_max_cnt = 0;\n')
|
||||
output_filename_h.write('static std::string ' + apiname.upper() + '_structs_regex = \"\";\n')
|
||||
output_filename_h.write('// begin ostream ops for '+ apiname + ' \n')
|
||||
output_filename_h.write("// basic ostream ops\n")
|
||||
output_filename_h.write(header_basic)
|
||||
output_filename_h.write("// End of basic ostream ops\n\n")
|
||||
|
||||
for c in cppHeader.classes.copy():
|
||||
# Types defined inside of unions are incorrectly prepended with "union " after parsing by CppHeaderParser
|
||||
# Remove "union " from the beginning of the full class name to correct the eventual output
|
||||
if "union " in c[0:6] and "::union" not in c[-8:]:
|
||||
new_name = c[6:]
|
||||
cppHeader.classes[new_name] = cppHeader.classes[c]
|
||||
del cppHeader.classes[c]
|
||||
|
||||
for c in cppHeader.classes:
|
||||
if c[-2] == ':' and c[-1] == ':': continue #ostream operator cannot be overloaded for anonymous struct therefore it is skipped
|
||||
if "::union" in c:
|
||||
continue
|
||||
if c in structs_analyzed:
|
||||
continue
|
||||
if c == 'max_align_t' or c == '__fsid_t': # Skipping as it is defined in multiple domains
|
||||
continue
|
||||
if c.startswith("_") or c.startswith("pthread_") or c.startswith("__pthread_"):
|
||||
continue
|
||||
if len(cppHeader.classes[c]["properties"]["public"]) != 0:
|
||||
output_filename_h.write("inline static std::ostream& operator<<(std::ostream& out, const " + c + "& v)\n")
|
||||
output_filename_h.write("{\n")
|
||||
output_filename_h.write(" std::operator<<(out, '{');\n")
|
||||
output_filename_h.write(" " + apiname.upper() + "_depth_max_cnt++;\n")
|
||||
output_filename_h.write(" if (" + apiname.upper() + "_depth_max == -1 || " + apiname.upper() + "_depth_max_cnt <= " + apiname.upper() + "_depth_max" + ") {\n" )
|
||||
process_struct(output_filename_h, c, cppHeader, "", apiname)
|
||||
global_str = "\n".join(global_str.split("\n")[0:-3])
|
||||
if global_str != '': global_str += "\n }\n"
|
||||
output_filename_h.write(global_str)
|
||||
output_filename_h.write(" };\n")
|
||||
output_filename_h.write(" " + apiname.upper() + "_depth_max_cnt--;\n")
|
||||
output_filename_h.write(" std::operator<<(out, '}');\n")
|
||||
output_filename_h.write(" return out;\n")
|
||||
output_filename_h.write("}\n")
|
||||
global_str = ''
|
||||
global_ops += "inline static std::ostream& operator<<(std::ostream& out, const " + c + "& v)\n" + "{\n" + " roctracer::" + apiname.lower() + "_support::detail::operator<<(out, v);\n" + " return out;\n" + "}\n\n"
|
||||
|
||||
if rank == 1 or rank == 2:
|
||||
footer = '// end ostream ops for '+ apiname + ' \n'
|
||||
footer += '};};};\n\n'
|
||||
output_filename_h.write(footer)
|
||||
output_filename_h.write(global_ops)
|
||||
footer = '#endif //__cplusplus\n' + \
|
||||
'#endif // INC_' + apiname + '_OSTREAM_OPS_H_\n' + \
|
||||
' \n'
|
||||
output_filename_h.write(footer)
|
||||
output_filename_h.close()
|
||||
print('File ' + outfilepath + ' generated')
|
||||
|
||||
return
|
||||
|
||||
parser = argparse.ArgumentParser(description='genOstreamOps.py: generates ostream operators for all typedefs in provided input file.')
|
||||
requiredNamed = parser.add_argument_group('Required arguments')
|
||||
requiredNamed.add_argument('-in', metavar='fileList', help='Comma separated list of header files to be parsed', required=True)
|
||||
requiredNamed.add_argument('-out', metavar='file', help='Output file with ostream operators', required=True)
|
||||
|
||||
args = vars(parser.parse_args())
|
||||
|
||||
if __name__ == '__main__':
|
||||
flist = args['in'].split(',')
|
||||
if len(flist) == 1:
|
||||
gen_cppheader(flist[0], args['out'],2)
|
||||
else:
|
||||
for i in range(len(flist)):
|
||||
if i == 0:
|
||||
gen_cppheader(flist[i], args['out'],0)
|
||||
elif i == len(flist)-1:
|
||||
gen_cppheader(flist[i], args['out'],1)
|
||||
else:
|
||||
gen_cppheader(flist[i], args['out'],-1)
|
||||
@@ -0,0 +1,581 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
################################################################################
|
||||
# Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to
|
||||
# deal in the Software without restriction, including without limitation the
|
||||
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
# sell copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
################################################################################
|
||||
|
||||
from __future__ import print_function
|
||||
import os, sys, re
|
||||
|
||||
H_OUT='hsa_prof_str.h'
|
||||
CPP_OUT='hsa_prof_str.inline.h'
|
||||
API_TABLES_H = 'hsa_api_trace.h'
|
||||
API_HEADERS_H = (
|
||||
('CoreApi', 'hsa.h'),
|
||||
('AmdExt', 'hsa_ext_amd.h'),
|
||||
('ImageExt', 'hsa_ext_image.h'),
|
||||
('AmdExt', API_TABLES_H),
|
||||
)
|
||||
|
||||
LICENSE = \
|
||||
'/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.\n' + \
|
||||
'\n' + \
|
||||
' Permission is hereby granted, free of charge, to any person obtaining a copy\n' + \
|
||||
' of this software and associated documentation files (the "Software"), to deal\n' + \
|
||||
' in the Software without restriction, including without limitation the rights\n' + \
|
||||
' to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n' + \
|
||||
' copies of the Software, and to permit persons to whom the Software is\n' + \
|
||||
' furnished to do so, subject to the following conditions:\n' + \
|
||||
'\n' + \
|
||||
' The above copyright notice and this permission notice shall be included in\n' + \
|
||||
' all copies or substantial portions of the Software.\n' + \
|
||||
'\n' + \
|
||||
' THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n' + \
|
||||
' IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n' + \
|
||||
' FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n' + \
|
||||
' AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n' + \
|
||||
' LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n' + \
|
||||
' OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n' + \
|
||||
' THE SOFTWARE. */\n'
|
||||
|
||||
#############################################################
|
||||
# Error handler
|
||||
def fatal(module, msg):
|
||||
print (module + ' Error: "' + msg + '"', file = sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
# Get next text block
|
||||
def NextBlock(pos, record):
|
||||
if len(record) == 0: return pos
|
||||
|
||||
space_pattern = re.compile(r'(\s+)')
|
||||
word_pattern = re.compile(r'([\w\*]+)')
|
||||
if record[pos] != '(':
|
||||
m = space_pattern.match(record, pos)
|
||||
if not m:
|
||||
m = word_pattern.match(record, pos)
|
||||
if m:
|
||||
return pos + len(m.group(1))
|
||||
else:
|
||||
fatal('NextBlock', "bad record '" + record + "' pos(" + str(pos) + ")")
|
||||
else:
|
||||
count = 0
|
||||
for index in range(pos, len(record)):
|
||||
if record[index] == '(':
|
||||
count = count + 1
|
||||
elif record[index] == ')':
|
||||
count = count - 1
|
||||
if count == 0:
|
||||
index = index + 1
|
||||
break
|
||||
if count != 0:
|
||||
fatal('NextBlock', "count is not zero (" + str(count) + ")")
|
||||
if record[index - 1] != ')':
|
||||
fatal('NextBlock', "last char is not ')' '" + record[index - 1] + "'")
|
||||
return index
|
||||
|
||||
#############################################################
|
||||
# API table parser class
|
||||
class API_TableParser:
|
||||
def fatal(self, msg):
|
||||
fatal('API_TableParser', msg)
|
||||
|
||||
def __init__(self, header, name):
|
||||
self.name = name
|
||||
|
||||
if not os.path.isfile(header):
|
||||
self.fatal("file '" + header + "' not found")
|
||||
|
||||
self.inp = open(header, 'r')
|
||||
|
||||
self.beg_pattern = re.compile('^\s*struct\s+' + name + 'Table\s*{\s*$')
|
||||
self.end_pattern = re.compile('^\s*};\s*$')
|
||||
self.array = []
|
||||
self.parse()
|
||||
|
||||
# normalizing a line
|
||||
def norm_line(self, line):
|
||||
return re.sub(r'^\s+', r' ', line[:-1])
|
||||
|
||||
# check for start record
|
||||
def is_start(self, record):
|
||||
return self.beg_pattern.match(record)
|
||||
|
||||
# check for end record
|
||||
def is_end(self, record):
|
||||
return self.end_pattern.match(record)
|
||||
|
||||
# check for declaration entry record
|
||||
def is_entry(self, record):
|
||||
return re.match(r'^\s*decltype\(([^\)]*)\)', record)
|
||||
|
||||
# parse method
|
||||
def parse(self):
|
||||
active = 0
|
||||
for line in self.inp.readlines():
|
||||
record = self.norm_line(line)
|
||||
if self.is_start(record): active = 1
|
||||
if active != 0:
|
||||
if self.is_end(record): return
|
||||
m = self.is_entry(record)
|
||||
if m:
|
||||
self.array.append(m.group(1))
|
||||
|
||||
#############################################################
|
||||
# API declaration parser class
|
||||
class API_DeclParser:
|
||||
def fatal(self, msg):
|
||||
fatal('API_DeclParser', msg)
|
||||
|
||||
def __init__(self, header, array, data):
|
||||
if not os.path.isfile(header):
|
||||
self.fatal("file '" + header + "' not found")
|
||||
|
||||
self.inp = open(header, 'r')
|
||||
|
||||
self.end_pattern = re.compile('\);\s*$')
|
||||
self.data = data
|
||||
for call in array:
|
||||
if call in data:
|
||||
self.fatal(call + ' is already found')
|
||||
self.parse(call)
|
||||
|
||||
# api record filter
|
||||
def api_filter(self, record):
|
||||
record = re.sub(r'\sHSA_API\s', r' ', record)
|
||||
record = re.sub(r'\sHSA_DEPRECATED\s', r' ', record)
|
||||
return record
|
||||
|
||||
# check for start record
|
||||
def is_start(self, call, record):
|
||||
return re.search('\s' + call + '\s*\(', record)
|
||||
|
||||
# check for API method record
|
||||
def is_api(self, call, record):
|
||||
record = self.api_filter(record)
|
||||
return re.match('\s+\S+\s+' + call + '\s*\(', record)
|
||||
|
||||
# check for end record
|
||||
def is_end(self, record):
|
||||
return self.end_pattern.search(record)
|
||||
|
||||
# parse method args
|
||||
def get_args(self, record):
|
||||
struct = {'ret': '', 'args': '', 'astr': {}, 'alst': [], 'tlst': []}
|
||||
record = re.sub(r'^\s+', r'', record)
|
||||
record = re.sub(r'\s*(\*+)\s*', r'\1 ', record)
|
||||
rind = NextBlock(0, record)
|
||||
struct['ret'] = record[0:rind]
|
||||
pos = record.find('(')
|
||||
end = NextBlock(pos, record);
|
||||
args = record[pos:end]
|
||||
args = re.sub(r'^\(\s*', r'', args)
|
||||
args = re.sub(r'\s*\)$', r'', args)
|
||||
args = re.sub(r'\s*,\s*', r',', args)
|
||||
struct['args'] = re.sub(r',', r', ', args)
|
||||
if len(args) == 0: return struct
|
||||
|
||||
pos = 0
|
||||
args = args + ','
|
||||
while pos < len(args):
|
||||
ind1 = NextBlock(pos, args) # type
|
||||
ind2 = NextBlock(ind1, args) # space
|
||||
if args[ind2] != '(':
|
||||
while ind2 < len(args):
|
||||
end = NextBlock(ind2, args)
|
||||
if args[end] == ',': break
|
||||
else: ind2 = end
|
||||
name = args[ind2:end]
|
||||
else:
|
||||
ind3 = NextBlock(ind2, args) # field
|
||||
m = re.match(r'\(\s*\*\s*(\S+)\s*\)', args[ind2:ind3])
|
||||
if not m:
|
||||
self.fatal("bad block3 '" + args + "' : '" + args[ind2:ind3] + "'")
|
||||
name = m.group(1)
|
||||
end = NextBlock(ind3, args) # the rest
|
||||
item = args[pos:end]
|
||||
struct['astr'][name] = item
|
||||
struct['alst'].append(name)
|
||||
struct['tlst'].append(item)
|
||||
if args[end] != ',':
|
||||
self.fatal("no comma '" + args + "'")
|
||||
pos = end + 1
|
||||
|
||||
return struct
|
||||
|
||||
# parse given api
|
||||
def parse(self, call):
|
||||
record = ''
|
||||
active = 0
|
||||
found = 0
|
||||
api_name = ''
|
||||
prev_line = ''
|
||||
|
||||
self.inp.seek(0)
|
||||
for line in self.inp.readlines():
|
||||
record += ' ' + line[:-1]
|
||||
record = re.sub(r'^\s*', r' ', record)
|
||||
|
||||
if active == 0:
|
||||
if self.is_start(call, record):
|
||||
active = 1
|
||||
m = self.is_api(call, record)
|
||||
if not m:
|
||||
record = ' ' + prev_line + ' ' + record
|
||||
m = self.is_api(call, record)
|
||||
if not m:
|
||||
self.fatal("bad api '" + line + "'")
|
||||
|
||||
if active == 1:
|
||||
if self.is_end(record):
|
||||
self.data[call] = self.get_args(record)
|
||||
active = 0
|
||||
found = 0
|
||||
|
||||
if active == 0: record = ''
|
||||
prev_line = line
|
||||
|
||||
#############################################################
|
||||
# API description parser class
|
||||
class API_DescrParser:
|
||||
def fatal(self, msg):
|
||||
fatal('API_DescrParser', msg)
|
||||
|
||||
def __init__(self, out_h_file, hsa_dir, api_table_h, api_headers, license):
|
||||
out_macro = re.sub(r'[\/\.]', r'_', out_h_file.upper()) + '_'
|
||||
|
||||
self.h_content = ''
|
||||
self.cpp_content = ''
|
||||
self.api_names = []
|
||||
self.api_calls = {}
|
||||
self.api_rettypes = set()
|
||||
self.api_id = {}
|
||||
|
||||
api_data = {}
|
||||
api_list = []
|
||||
ns_calls = []
|
||||
|
||||
for i in range(0, len(api_headers)):
|
||||
(name, header) = api_headers[i]
|
||||
|
||||
if i < len(api_headers) - 1:
|
||||
api = API_TableParser(hsa_dir + api_table_h, name)
|
||||
api_list = api.array
|
||||
self.api_names.append(name)
|
||||
self.api_calls[name] = api_list
|
||||
else:
|
||||
api_list = ns_calls
|
||||
ns_calls = []
|
||||
|
||||
for call in api_list:
|
||||
if call in api_data:
|
||||
self.fatal("call '" + call + "' is already found")
|
||||
|
||||
API_DeclParser(hsa_dir + header, api_list, api_data)
|
||||
|
||||
for call in api_list:
|
||||
if not call in api_data:
|
||||
# Not-supported functions
|
||||
ns_calls.append(call)
|
||||
else:
|
||||
# API ID map
|
||||
self.api_id[call] = 'HSA_API_ID_' + call
|
||||
# Return types
|
||||
self.api_rettypes.add(api_data[call]['ret'])
|
||||
|
||||
self.api_rettypes.discard('void')
|
||||
self.api_data = api_data
|
||||
self.ns_calls = ns_calls
|
||||
|
||||
self.h_content += "/* Generated by " + os.path.basename(__file__) + " */\n" + license + "\n\n"
|
||||
|
||||
self.h_content += "/* HSA API tracing primitives\n"
|
||||
for (name, header) in api_headers:
|
||||
self.h_content += " '" + name + "', header '" + header + "', " + str(len(self.api_calls[name])) + ' funcs\n'
|
||||
for call in self.ns_calls:
|
||||
self.h_content += ' ' + call + ' was not parsed\n'
|
||||
self.h_content += " */\n"
|
||||
self.h_content += '\n'
|
||||
self.h_content += '#ifndef ' + out_macro + '\n'
|
||||
self.h_content += '#define ' + out_macro + '\n'
|
||||
|
||||
self.h_content += self.add_section('API ID enumeration', ' ', self.gen_id_enum)
|
||||
|
||||
self.h_content += '/* Declarations of APIs intended for use only by tools. */\n'
|
||||
self.h_content += 'typedef void (*hsa_amd_queue_intercept_packet_writer)(const void*, uint64_t);\n'
|
||||
self.h_content += 'typedef void (*hsa_amd_queue_intercept_handler)(const void*, uint64_t, uint64_t, void*,\n'
|
||||
self.h_content += ' hsa_amd_queue_intercept_packet_writer);\n'
|
||||
self.h_content += 'typedef void (*hsa_amd_runtime_queue_notifier)(const hsa_queue_t*, hsa_agent_t, void*);\n'
|
||||
|
||||
self.h_content += self.add_section('API arg structure', ' ', self.gen_arg_struct)
|
||||
self.h_content += self.add_section('API output stream', ' ', self.gen_out_stream)
|
||||
self.h_content += '#endif /* ' + out_macro + ' */\n'
|
||||
|
||||
self.cpp_content += "/* Generated by " + os.path.basename(__file__) + " */\n" + license + "\n\n"
|
||||
|
||||
self.cpp_content += '#include <hsa/hsa_api_trace.h>\n'
|
||||
self.cpp_content += '#include <atomic>\n'
|
||||
self.cpp_content += 'namespace roctracer::hsa_support::detail {\n'
|
||||
|
||||
self.cpp_content += 'static CoreApiTable CoreApi_saved_before_cb;\n'
|
||||
self.cpp_content += 'static AmdExtTable AmdExt_saved_before_cb;\n'
|
||||
self.cpp_content += 'static ImageExtTable ImageExt_saved_before_cb;\n\n'
|
||||
|
||||
self.cpp_content += self.add_section('API callback functions', '', self.gen_callbacks)
|
||||
self.cpp_content += self.add_section('API intercepting code', '', self.gen_intercept)
|
||||
self.cpp_content += self.add_section('API get_name function', ' ', self.gen_get_name)
|
||||
self.cpp_content += self.add_section('API get_code function', ' ', self.gen_get_code)
|
||||
self.cpp_content += '\n};\n'
|
||||
|
||||
# add code section
|
||||
def add_section(self, title, gap, fun):
|
||||
content = ''
|
||||
n = 0
|
||||
content += '\n/* section: ' + title + ' */\n\n'
|
||||
content += fun(-1, '-', '-', {})
|
||||
for index in range(len(self.api_names)):
|
||||
last = (index == len(self.api_names) - 1)
|
||||
name = self.api_names[index]
|
||||
if n != 0:
|
||||
if gap == '': content += fun(n, name, '-', {})
|
||||
content += '\n'
|
||||
content += gap + '/* block: ' + name + ' API */\n'
|
||||
for call in self.api_calls[name]:
|
||||
content += fun(n, name, call, self.api_data[call])
|
||||
n += 1
|
||||
content += fun(n, '-', '-', {})
|
||||
return content
|
||||
|
||||
# generate API ID enumeration
|
||||
def gen_id_enum(self, n, name, call, data):
|
||||
content = ''
|
||||
if n == -1:
|
||||
content += 'enum hsa_api_id_t {\n'
|
||||
return content
|
||||
if call != '-':
|
||||
content += ' ' + self.api_id[call] + ' = ' + str(n) + ',\n'
|
||||
else:
|
||||
content += '\n'
|
||||
content += ' HSA_API_ID_DISPATCH = ' + str(n) + ',\n'
|
||||
content += ' HSA_API_ID_NUMBER = ' + str(n + 1) + ',\n'
|
||||
content += '};\n'
|
||||
return content
|
||||
|
||||
# generate API args structure
|
||||
def gen_arg_struct(self, n, name, call, struct):
|
||||
content = ''
|
||||
if n == -1:
|
||||
content += 'struct hsa_api_data_t {\n'
|
||||
content += ' uint64_t correlation_id;\n'
|
||||
content += ' uint32_t phase;\n'
|
||||
content += ' union {\n'
|
||||
for ret_type in self.api_rettypes:
|
||||
content += ' ' + ret_type + ' ' + ret_type + '_retval;\n'
|
||||
content += ' };\n'
|
||||
content += ' union {\n'
|
||||
return content
|
||||
if call != '-':
|
||||
content += ' struct {\n'
|
||||
for (var, item) in struct['astr'].items():
|
||||
content += ' ' + item + ';\n'
|
||||
if call == "hsa_amd_memory_async_copy_rect" and item == "const hsa_dim3_t* range":
|
||||
content += ' hsa_dim3_t range__val;\n'
|
||||
content += ' } ' + call + ';\n'
|
||||
else:
|
||||
content += ' } args;\n'
|
||||
content += ' uint64_t *phase_data;\n'
|
||||
content += '};\n'
|
||||
return content
|
||||
|
||||
# generate API callbacks
|
||||
def gen_callbacks(self, n, name, call, struct):
|
||||
content = ''
|
||||
if n == -1:
|
||||
content += '/* section: Static declarations */\n'
|
||||
content += '\n'
|
||||
if call != '-':
|
||||
call_id = self.api_id[call];
|
||||
ret_type = struct['ret']
|
||||
content += 'static ' + ret_type + ' ' + call + '_callback(' + struct['args'] + ') {\n'
|
||||
|
||||
content += ' hsa_trace_data_t trace_data;\n'
|
||||
content += ' bool enabled{false};\n'
|
||||
content += '\n'
|
||||
content += ' if (auto function = report_activity.load(std::memory_order_relaxed); function &&\n'
|
||||
content += ' (enabled =\n'
|
||||
content += ' function(ACTIVITY_DOMAIN_HSA_API, ' + call_id + ', &trace_data) == 0)) {\n'
|
||||
content += ' if (trace_data.phase_enter != nullptr) {\n'
|
||||
|
||||
for var in struct['alst']:
|
||||
item = struct['astr'][var];
|
||||
if re.search(r'char\* ', item):
|
||||
# FIXME: we should not strdup the char* arguments here, as the callback will not outlive the scope of this function. Instead, we
|
||||
# should generate a helper function to capture the content of the arguments similar to hipApiArgsInit for HIP. We also need a
|
||||
# helper to free the memory that is allocated to capture the content.
|
||||
content += ' trace_data.api_data.args.' + call + '.' + var + ' = ' + '(' + var + ' != NULL) ? strdup(' + var + ')' + ' : NULL;\n'
|
||||
else:
|
||||
content += ' trace_data.api_data.args.' + call + '.' + var + ' = ' + var + ';\n'
|
||||
if call == 'hsa_amd_memory_async_copy_rect' and var == 'range':
|
||||
content += ' trace_data.api_data.args.' + call + '.' + var + '__val = ' + '*(' + var + ');\n'
|
||||
|
||||
content += ' trace_data.phase_enter(' + call_id + ', &trace_data);\n'
|
||||
content += ' }\n'
|
||||
content += ' }\n'
|
||||
content += '\n'
|
||||
|
||||
if ret_type != 'void':
|
||||
content += ' trace_data.api_data.' + ret_type + '_retval = '
|
||||
content += ' ' + name + '_saved_before_cb.' + call + '_fn(' + ', '.join(struct['alst']) + ');\n'
|
||||
|
||||
content += '\n'
|
||||
content += ' if (enabled && trace_data.phase_exit != nullptr)\n'
|
||||
content += ' trace_data.phase_exit(' + call_id + ', &trace_data);\n'
|
||||
|
||||
if ret_type != 'void':
|
||||
content += ' return trace_data.api_data.' + ret_type + '_retval;\n'
|
||||
content += '}\n'
|
||||
|
||||
return content
|
||||
|
||||
# generate API intercepting code
|
||||
def gen_intercept(self, n, name, call, struct):
|
||||
content = ''
|
||||
if n > 0 and call == '-':
|
||||
content += '};\n'
|
||||
if n == 0 or (call == '-' and name != '-'):
|
||||
content += 'static void Install' + name + 'Wrappers(' + name + 'Table* table) {\n'
|
||||
content += ' ' + name + '_saved_before_cb = *table;\n'
|
||||
if call != '-':
|
||||
if call != 'hsa_shut_down':
|
||||
content += ' table->' + call + '_fn = ' + call + '_callback;\n'
|
||||
else:
|
||||
content += ' { void* p = (void*)' + call + '_callback; (void)p; }\n'
|
||||
return content
|
||||
|
||||
# generate API name function
|
||||
def gen_get_name(self, n, name, call, struct):
|
||||
content = ''
|
||||
if n == -1:
|
||||
content += 'static const char* GetApiName(uint32_t id) {\n'
|
||||
content += ' switch (id) {\n'
|
||||
return content
|
||||
if call != '-':
|
||||
content += ' case ' + self.api_id[call] + ': return "' + call + '";\n'
|
||||
else:
|
||||
content += ' }\n'
|
||||
content += ' return "unknown";\n'
|
||||
content += '}\n'
|
||||
return content
|
||||
|
||||
# generate API code function
|
||||
def gen_get_code(self, n, name, call, struct):
|
||||
content = ''
|
||||
if n == -1:
|
||||
content += 'static uint32_t GetApiCode(const char* str) {\n'
|
||||
return content
|
||||
if call != '-':
|
||||
content += ' if (strcmp("' + call + '", str) == 0) return ' + self.api_id[call] + ';\n'
|
||||
else:
|
||||
content += ' return HSA_API_ID_NUMBER;\n'
|
||||
content += '}\n'
|
||||
return content
|
||||
|
||||
# generate stream operator
|
||||
def gen_out_stream(self, n, name, call, struct):
|
||||
content = ''
|
||||
if n == -1:
|
||||
content += '#ifdef __cplusplus\n'
|
||||
content += '#include "hsa_ostream_ops.h"\n'
|
||||
content += 'typedef std::pair<uint32_t, hsa_api_data_t> hsa_api_data_pair_t;\n'
|
||||
content += 'inline std::ostream& operator<< (std::ostream& out, const hsa_api_data_pair_t& data_pair) {\n'
|
||||
content += ' const uint32_t cid = data_pair.first;\n'
|
||||
content += ' const hsa_api_data_t& api_data = data_pair.second;\n'
|
||||
content += ' switch(cid) {\n'
|
||||
return content
|
||||
if call != '-':
|
||||
content += ' case ' + self.api_id[call] + ': {\n'
|
||||
content += ' out << "' + call + '(";\n'
|
||||
arg_list = struct['alst']
|
||||
if len(arg_list) != 0:
|
||||
for ind in range(len(arg_list)):
|
||||
arg_var = arg_list[ind]
|
||||
arg_val = 'api_data.args.' + call + '.' + arg_var
|
||||
if re.search(r'char\* ', struct['astr'][arg_var]):
|
||||
content += ' out << "0x" << std::hex << (uint64_t)' + arg_val
|
||||
else:
|
||||
content += ' out << ' + arg_val
|
||||
if call == "hsa_amd_memory_async_copy_rect" and arg_var == "range":
|
||||
content += ' << ", ";\n'
|
||||
content += ' out << ' + arg_val + '__val'
|
||||
'''
|
||||
arg_item = struct['tlst'][ind]
|
||||
if re.search(r'\(\* ', arg_item): arg_pref = ''
|
||||
elif re.search(r'void\* ', arg_item): arg_pref = ''
|
||||
elif re.search(r'\*\* ', arg_item): arg_pref = '**'
|
||||
elif re.search(r'\* ', arg_item): arg_pref = '*'
|
||||
else: arg_pref = ''
|
||||
if arg_pref != '':
|
||||
content += ' if (' + arg_val + ') out << ' + arg_pref + '(' + arg_val + '); else out << ' + arg_val
|
||||
else:
|
||||
content += ' out << ' + arg_val
|
||||
'''
|
||||
if ind < len(arg_list) - 1: content += ' << ", ";\n'
|
||||
else: content += ';\n'
|
||||
if struct['ret'] != 'void':
|
||||
content += ' out << ") = " << api_data.' + struct['ret'] + '_retval;\n'
|
||||
else:
|
||||
content += ' out << ") = void";\n'
|
||||
content += ' break;\n'
|
||||
content += ' }\n'
|
||||
else:
|
||||
content += ' default:\n'
|
||||
content += ' out << "ERROR: unknown API";\n'
|
||||
content += ' abort();\n'
|
||||
content += ' }\n'
|
||||
content += ' return out;\n'
|
||||
content += '}\n'
|
||||
content += '#endif\n'
|
||||
return content
|
||||
|
||||
#############################################################
|
||||
# main
|
||||
# Usage
|
||||
if len(sys.argv) != 3:
|
||||
print ("Usage:", sys.argv[0], " <OUT prefix> <HSA runtime include path>", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
else:
|
||||
PREFIX = sys.argv[1] + '/'
|
||||
HSA_DIR = sys.argv[2] + '/'
|
||||
|
||||
descr = API_DescrParser(H_OUT, HSA_DIR, API_TABLES_H, API_HEADERS_H, LICENSE)
|
||||
|
||||
out_file = PREFIX + H_OUT
|
||||
print ('Generating "' + out_file + '"')
|
||||
f = open(out_file, 'w')
|
||||
f.write(descr.h_content[:-1])
|
||||
f.close()
|
||||
|
||||
out_file = PREFIX + CPP_OUT
|
||||
print ('Generating "' + out_file + '"')
|
||||
f = open(out_file, 'w')
|
||||
f.write(descr.cpp_content[:-1])
|
||||
f.close()
|
||||
#############################################################
|
||||
@@ -0,0 +1,245 @@
|
||||
################################################################################
|
||||
## Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
##
|
||||
## Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
## of this software and associated documentation files (the "Software"), to
|
||||
## deal in the Software without restriction, including without limitation the
|
||||
## rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
## sell copies of the Software, and to permit persons to whom the Software is
|
||||
## furnished to do so, subject to the following conditions:
|
||||
##
|
||||
## The above copyright notice and this permission notice shall be included in
|
||||
## all copies or substantial portions of the Software.
|
||||
##
|
||||
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
## FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
## IN THE SOFTWARE.
|
||||
################################################################################
|
||||
|
||||
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR})
|
||||
|
||||
if(${LIBRARY_TYPE} STREQUAL STATIC)
|
||||
add_compile_definitions(STATIC_BUILD=1)
|
||||
endif()
|
||||
|
||||
option(DEBUG_TRACE "Enable debug tracing")
|
||||
if(DEBUG_TRACE)
|
||||
add_compile_definitions(DEBUG_TRACE_ON=1)
|
||||
endif()
|
||||
|
||||
find_package(Python3 COMPONENTS Interpreter REQUIRED)
|
||||
|
||||
execute_process(COMMAND ${Python3_EXECUTABLE} -c "import CppHeaderParser"
|
||||
RESULT_VARIABLE CPP_HEADER_PARSER
|
||||
OUTPUT_QUIET)
|
||||
|
||||
if(NOT ${CPP_HEADER_PARSER} EQUAL 0)
|
||||
message(FATAL_ERROR "\
|
||||
The \"CppHeaderParser\" Python3 package is not installed. \
|
||||
Please install it using the following command: \"pip3 install CppHeaderParser\".\
|
||||
")
|
||||
endif()
|
||||
|
||||
get_property(HSA_RUNTIME_INCLUDE_DIRECTORIES TARGET hsa-runtime64::hsa-runtime64 PROPERTY INTERFACE_INCLUDE_DIRECTORIES)
|
||||
find_file(HSA_H hsa.h
|
||||
PATHS ${HSA_RUNTIME_INCLUDE_DIRECTORIES}
|
||||
PATH_SUFFIXES hsa
|
||||
NO_DEFAULT_PATH
|
||||
REQUIRED)
|
||||
get_filename_component(HSA_RUNTIME_INC_PATH ${HSA_H} DIRECTORY)
|
||||
|
||||
## Generate the HSA wrapper functions header
|
||||
add_custom_command(
|
||||
OUTPUT hsa_prof_str.h hsa_prof_str.inline.h
|
||||
COMMAND ${Python3_EXECUTABLE} ${PROJECT_SOURCE_DIR}/script/hsaap.py ${CMAKE_CURRENT_BINARY_DIR} "${HSA_RUNTIME_INC_PATH}" > /dev/null
|
||||
DEPENDS ${PROJECT_SOURCE_DIR}/script/hsaap.py
|
||||
"${HSA_RUNTIME_INC_PATH}/hsa.h" "${HSA_RUNTIME_INC_PATH}/hsa_ext_amd.h"
|
||||
"${HSA_RUNTIME_INC_PATH}/hsa_ext_image.h" "${HSA_RUNTIME_INC_PATH}/hsa_api_trace.h"
|
||||
COMMENT "Generating hsa_prof_str.h,hsa_prof_str.inline.h...")
|
||||
|
||||
## Generate the HSA pretty printers
|
||||
add_custom_command(
|
||||
OUTPUT hsa_ostream_ops.h
|
||||
COMMAND ${CMAKE_C_COMPILER} -E "${HSA_RUNTIME_INC_PATH}/hsa.h" -o hsa.h.i
|
||||
COMMAND ${CMAKE_C_COMPILER} -E "${HSA_RUNTIME_INC_PATH}/hsa_ext_amd.h" -o hsa_ext_amd.h.i
|
||||
BYPRODUCTS hsa.h.i hsa_ext_amd.h.i
|
||||
COMMAND ${Python3_EXECUTABLE} ${PROJECT_SOURCE_DIR}/script/gen_ostream_ops.py
|
||||
-in hsa.h.i,hsa_ext_amd.h.i -out hsa_ostream_ops.h > /dev/null
|
||||
DEPENDS ${PROJECT_SOURCE_DIR}/script/gen_ostream_ops.py
|
||||
"${HSA_RUNTIME_INC_PATH}/hsa.h" "${HSA_RUNTIME_INC_PATH}/hsa_ext_amd.h"
|
||||
COMMENT "Generating hsa_ostream_ops.h...")
|
||||
|
||||
get_property(HIP_INCLUDE_DIRECTORIES TARGET hip::amdhip64 PROPERTY INTERFACE_INCLUDE_DIRECTORIES)
|
||||
find_file(HIP_RUNTIME_API_H hip_runtime_api.h
|
||||
PATHS ${HIP_INCLUDE_DIRECTORIES}
|
||||
PATH_SUFFIXES hip
|
||||
NO_DEFAULT_PATH
|
||||
REQUIRED)
|
||||
|
||||
## Generate the HIP pretty printers
|
||||
add_custom_command(
|
||||
OUTPUT hip_ostream_ops.h
|
||||
COMMAND ${CMAKE_C_COMPILER} "$<$<BOOL:${HIP_INCLUDE_DIRECTORIES}>:-I$<JOIN:${HIP_INCLUDE_DIRECTORIES},$<SEMICOLON>-I>>"
|
||||
-E "${CMAKE_CURRENT_SOURCE_DIR}/roctracer/hip_full_api.h" -D__HIP_PLATFORM_AMD__=1 -D__HIP_ROCclr__=1 -o hip_runtime_api.h.i
|
||||
BYPRODUCTS hip_runtime_api.h.i
|
||||
COMMAND ${Python3_EXECUTABLE} ${PROJECT_SOURCE_DIR}/script/gen_ostream_ops.py
|
||||
-in hip_runtime_api.h.i -out hip_ostream_ops.h > /dev/null
|
||||
DEPENDS ${PROJECT_SOURCE_DIR}/script/gen_ostream_ops.py "${HIP_RUNTIME_API_H}"
|
||||
COMMENT "Generating hip_ostream_ops.h..."
|
||||
COMMAND_EXPAND_LISTS)
|
||||
|
||||
set(PUBLIC_HEADERS
|
||||
roctx.h
|
||||
roctracer.h
|
||||
roctracer_ext.h
|
||||
roctracer_hip.h
|
||||
roctracer_hcc.h
|
||||
roctracer_hsa.h
|
||||
roctracer_roctx.h
|
||||
roctracer_plugin.h
|
||||
ext/prof_protocol.h)
|
||||
|
||||
foreach(header ${PUBLIC_HEADERS})
|
||||
get_filename_component(header_subdir ${header} DIRECTORY)
|
||||
install(FILES ${PROJECT_SOURCE_DIR}/inc/${header}
|
||||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}/${header_subdir}
|
||||
COMPONENT dev)
|
||||
endforeach()
|
||||
|
||||
set(GENERATED_HEADERS
|
||||
hip_ostream_ops.h
|
||||
hsa_prof_str.h
|
||||
hsa_ostream_ops.h)
|
||||
|
||||
foreach(header ${GENERATED_HEADERS})
|
||||
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${header}
|
||||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}
|
||||
COMPONENT dev)
|
||||
endforeach()
|
||||
|
||||
## Build the util library
|
||||
file(GLOB UTIL_SOURCES "util/*.cpp")
|
||||
add_library(util STATIC ${UTIL_SOURCES})
|
||||
|
||||
set_target_properties(util PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
target_include_directories(util
|
||||
PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/util)
|
||||
|
||||
check_include_file(backtrace.h BACKTRACE_H)
|
||||
if(BACKTRACE_H)
|
||||
target_compile_definitions(util PRIVATE HAVE_BACKTRACE_H)
|
||||
find_library(BACKTRACE_LIB "backtrace" ${CMAKE_C_IMPLICIT_LINK_DIRECTORIES})
|
||||
endif()
|
||||
|
||||
if(BACKTRACE_LIB)
|
||||
target_compile_definitions(util PRIVATE ENABLE_BACKTRACE)
|
||||
target_link_libraries(util PRIVATE ${BACKTRACE_LIB})
|
||||
endif()
|
||||
|
||||
## Build the ROCtracer library
|
||||
file(GLOB ROCTRACER_SOURCES "roctracer/*.cpp")
|
||||
add_library(roctracer ${LIBRARY_TYPE} ${ROCTRACER_SOURCES} ${GENERATED_HEADERS} hsa_prof_str.inline.h)
|
||||
|
||||
set_target_properties(roctracer PROPERTIES
|
||||
CXX_VISIBILITY_PRESET hidden
|
||||
OUTPUT_NAME "roctracer64"
|
||||
DEFINE_SYMBOL "ROCTRACER_EXPORTS"
|
||||
LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/roctracer/exportmap
|
||||
VERSION ${PROJECT_VERSION}
|
||||
SOVERSION ${PROJECT_VERSION_MAJOR})
|
||||
|
||||
target_compile_definitions(roctracer
|
||||
PUBLIC AMD_INTERNAL_BUILD
|
||||
PRIVATE PROF_API_IMPL HIP_PROF_HIP_API_STRING=1 __HIP_PLATFORM_AMD__=1)
|
||||
|
||||
target_include_directories(roctracer
|
||||
PUBLIC
|
||||
${HIP_INCLUDE_DIRECTORIES} ${HSA_RUNTIME_INCLUDE_DIRECTORIES}
|
||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
|
||||
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/inc>
|
||||
PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/roctracer ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
target_link_options(roctracer PRIVATE -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/roctracer/exportmap -Wl,--no-undefined)
|
||||
target_link_libraries(roctracer PRIVATE util hsa-runtime64::hsa-runtime64 stdc++fs Threads::Threads dl)
|
||||
|
||||
install(TARGETS roctracer LIBRARY
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
COMPONENT runtime NAMELINK_SKIP)
|
||||
## Install name link library in dev component
|
||||
install(TARGETS roctracer LIBRARY
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
COMPONENT dev NAMELINK_ONLY)
|
||||
|
||||
install(TARGETS roctracer LIBRARY
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
COMPONENT asan)
|
||||
|
||||
## Build the ROCTX library
|
||||
file(GLOB ROCTX_SOURCES "roctx/*.cpp")
|
||||
add_library(roctx ${LIBRARY_TYPE} ${ROCTX_SOURCES})
|
||||
|
||||
set_target_properties(roctx PROPERTIES
|
||||
CXX_VISIBILITY_PRESET hidden
|
||||
OUTPUT_NAME "roctx64"
|
||||
DEFINE_SYMBOL "ROCTX_EXPORTS"
|
||||
LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/roctx/exportmap
|
||||
VERSION ${PROJECT_VERSION}
|
||||
SOVERSION ${PROJECT_VERSION_MAJOR})
|
||||
|
||||
target_include_directories(roctx
|
||||
PUBLIC $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/inc>
|
||||
PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
target_link_options(roctx PRIVATE -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/roctx/exportmap -Wl,--no-undefined)
|
||||
|
||||
install(TARGETS roctx LIBRARY
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
COMPONENT runtime NAMELINK_SKIP)
|
||||
## Install name link library in dev component
|
||||
install(TARGETS roctx LIBRARY
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
COMPONENT dev NAMELINK_ONLY)
|
||||
|
||||
install(TARGETS roctx LIBRARY
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
COMPONENT asan)
|
||||
|
||||
## Build the tracer_tool library
|
||||
if (${LIBRARY_TYPE} STREQUAL SHARED)
|
||||
|
||||
file(GLOB TRACER_TOOL_SOURCES "tracer_tool/*.cpp")
|
||||
add_library(roctracer_tool SHARED ${TRACER_TOOL_SOURCES})
|
||||
|
||||
set_target_properties(roctracer_tool PROPERTIES
|
||||
CXX_VISIBILITY_PRESET hidden
|
||||
LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/tracer_tool/exportmap
|
||||
INSTALL_RPATH "${ROCM_APPEND_PRIVLIB_RPATH}")
|
||||
|
||||
target_compile_definitions(roctracer_tool
|
||||
PRIVATE HIP_PROF_HIP_API_STRING=1 __HIP_PLATFORM_AMD__=1)
|
||||
|
||||
target_include_directories(roctracer_tool
|
||||
PRIVATE
|
||||
${PROJECT_SOURCE_DIR}/inc ${CMAKE_CURRENT_SOURCE_DIR}/roctracer
|
||||
${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
target_link_libraries(roctracer_tool util roctracer hsa-runtime64::hsa-runtime64 stdc++fs Threads::Threads atomic dl)
|
||||
target_link_options(roctracer_tool PRIVATE -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/tracer_tool/exportmap -Wl,--no-undefined)
|
||||
|
||||
install(TARGETS roctracer_tool LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/${PROJECT_NAME} COMPONENT runtime)
|
||||
install(TARGETS roctracer_tool LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/${PROJECT_NAME} COMPONENT asan)
|
||||
|
||||
add_library(hip_stats SHARED hip_stats/hip_stats.cpp)
|
||||
set_target_properties(hip_stats PROPERTIES INSTALL_RPATH "${ROCM_APPEND_PRIVLIB_RPATH}")
|
||||
target_compile_definitions(hip_stats PRIVATE __HIP_PLATFORM_AMD__)
|
||||
target_link_libraries(hip_stats roctracer stdc++fs)
|
||||
install(TARGETS hip_stats LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/${PROJECT_NAME} COMPONENT runtime)
|
||||
install(TARGETS hip_stats LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/${PROJECT_NAME} COMPONENT asan)
|
||||
|
||||
endif()
|
||||
@@ -0,0 +1,259 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "roctracer.h"
|
||||
#include "roctracer_hip.h"
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <experimental/filesystem>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <numeric>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
|
||||
#define CHECK_ROCTRACER(call) \
|
||||
do { \
|
||||
roctracer_status_t status = call; \
|
||||
if (status != ROCTRACER_STATUS_SUCCESS) { \
|
||||
std::cerr << roctracer_error_string() << std::endl; \
|
||||
abort(); \
|
||||
} \
|
||||
} while (false)
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr uint64_t NextPowerOf2(uint64_t v) {
|
||||
v += (v == 0);
|
||||
v--;
|
||||
v |= v >> 1;
|
||||
v |= v >> 2;
|
||||
v |= v >> 4;
|
||||
v |= v >> 8;
|
||||
v |= v >> 16;
|
||||
v |= v >> 32;
|
||||
return ++v;
|
||||
}
|
||||
|
||||
constexpr size_t KiB = 1024;
|
||||
constexpr size_t MiB = KiB * KiB;
|
||||
constexpr size_t GiB = KiB * KiB * KiB;
|
||||
|
||||
std::string HumanReadableSize(size_t size, int precision) {
|
||||
std::stringstream ss;
|
||||
if (size < KiB)
|
||||
ss << size;
|
||||
else if (size < MiB)
|
||||
ss << std::fixed << std::setprecision(precision) << (double)size / KiB << "K";
|
||||
else if (size < GiB)
|
||||
ss << std::fixed << std::setprecision(precision) << (double)size / MiB << "M";
|
||||
else
|
||||
ss << std::fixed << std::setprecision(precision) << (double)size / GiB << "G";
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
struct FunctionStats {
|
||||
uint64_t total_time_ns;
|
||||
uint64_t count;
|
||||
void Accumulate(uint64_t time_ns) {
|
||||
total_time_ns += time_ns;
|
||||
++count;
|
||||
}
|
||||
};
|
||||
|
||||
struct MemCopyStats {
|
||||
uint64_t total_time_ns;
|
||||
uint64_t total_byte_size;
|
||||
uint64_t count;
|
||||
void Accumulate(uint64_t time_ns, uint64_t byte_size) {
|
||||
total_time_ns += time_ns;
|
||||
total_byte_size += byte_size;
|
||||
++count;
|
||||
}
|
||||
};
|
||||
|
||||
struct pair_hash {
|
||||
template <typename T1, typename T2> std::size_t operator()(const std::pair<T1, T2>& pair) const {
|
||||
return std::hash<T1>()(pair.first) ^ std::hash<T2>()(pair.second);
|
||||
}
|
||||
};
|
||||
|
||||
std::unordered_map<decltype(roctracer_record_t::op), FunctionStats> hip_api_stats;
|
||||
std::unordered_map<std::string, FunctionStats> kernel_stats;
|
||||
std::unordered_map<std::pair<decltype(roctracer_record_t::kind), size_t>, MemCopyStats, pair_hash>
|
||||
memcpy_stats;
|
||||
|
||||
void CollectStatistics(const char* begin, const char* end, void* /* user_arg */) {
|
||||
const auto* record = reinterpret_cast<const roctracer_record_t*>(begin);
|
||||
while (record < reinterpret_cast<const roctracer_record_t*>(end)) {
|
||||
auto elapsed_time_ns = record->end_ns - record->begin_ns;
|
||||
|
||||
if (record->domain == ACTIVITY_DOMAIN_HIP_OPS && record->op == HIP_OP_ID_DISPATCH) {
|
||||
const char* kernel_name = record->kernel_name;
|
||||
if (kernel_name == nullptr) kernel_name = "Unknown kernels";
|
||||
kernel_stats[kernel_name].Accumulate(elapsed_time_ns);
|
||||
} else if (record->domain == ACTIVITY_DOMAIN_HIP_OPS && record->op == HIP_OP_ID_COPY)
|
||||
memcpy_stats[std::make_pair(record->kind, NextPowerOf2(record->bytes))].Accumulate(
|
||||
elapsed_time_ns, record->bytes);
|
||||
else if (record->domain == ACTIVITY_DOMAIN_HIP_API)
|
||||
hip_api_stats[record->op].Accumulate(elapsed_time_ns);
|
||||
|
||||
CHECK_ROCTRACER(roctracer_next_record(record, &record));
|
||||
}
|
||||
}
|
||||
|
||||
namespace fs = std::experimental::filesystem;
|
||||
|
||||
void DumpStatistics() {
|
||||
CHECK_ROCTRACER(roctracer_close_pool());
|
||||
|
||||
fs::path output_dir = []() {
|
||||
const char* env_var = getenv("ROCP_OUTPUT_DIR");
|
||||
return env_var != nullptr ? env_var : "";
|
||||
}();
|
||||
|
||||
std::ofstream out;
|
||||
|
||||
if (output_dir.empty()) {
|
||||
// If an output directory was not specified, then print the statistics to stdout.
|
||||
out.copyfmt(std::cout);
|
||||
out.clear(std::cout.rdstate());
|
||||
out.basic_ios<char>::rdbuf(std::cout.rdbuf());
|
||||
} else {
|
||||
if (auto status = fs::status(output_dir); !fs::exists(status) || !fs::is_directory(status)) {
|
||||
std::cerr << "error: ROCP_OUTPUT_DIR=" << output_dir << " is not a directory" << std::endl;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
auto compare = [](const auto& x, const auto& y) {
|
||||
return x.second.total_time_ns > y.second.total_time_ns;
|
||||
};
|
||||
|
||||
// Print the HIP API statistics sorted by descending total inclusive time.
|
||||
if (!hip_api_stats.empty()) {
|
||||
const char* filename = "hip_api_stats.csv";
|
||||
if (!output_dir.empty()) out = std::ofstream(output_dir / filename);
|
||||
|
||||
if (out.good()) {
|
||||
std::cout << "Dumping HIP API statistics." << std::endl;
|
||||
|
||||
uint64_t total_hip_api_time_ns =
|
||||
std::accumulate(hip_api_stats.begin(), hip_api_stats.end(), 0,
|
||||
[](uint64_t total_time_ns, const auto& stats) {
|
||||
return total_time_ns + stats.second.total_time_ns;
|
||||
});
|
||||
|
||||
out << "\"Name\",\"Calls\",\"TotalDurationNs\",\"AverageNs\",\"Percentage\"" << std::endl;
|
||||
for (auto&& [op, stats] : std::set<decltype(hip_api_stats)::value_type, decltype(compare)>(
|
||||
hip_api_stats.begin(), hip_api_stats.end(), compare))
|
||||
out << "\"" << roctracer_op_string(ACTIVITY_DOMAIN_HIP_API, op, 0) << "\"," << stats.count
|
||||
<< "," << stats.total_time_ns << "," << stats.total_time_ns / stats.count << ","
|
||||
<< std::fixed << std::setprecision(4)
|
||||
<< (double)stats.total_time_ns / total_hip_api_time_ns * 100 << std::endl;
|
||||
} else {
|
||||
std::cerr << "warning: could not open " << output_dir / filename << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
// Print the HIP kernel dispatch statistics sorted by descending execution time.
|
||||
|
||||
if (!kernel_stats.empty()) {
|
||||
const char* filename = "hip_kernel_stats.csv";
|
||||
if (!output_dir.empty()) out = std::ofstream(output_dir / filename);
|
||||
|
||||
if (out.good()) {
|
||||
std::cout << "Dumping HIP kernel dispatch statistics." << std::endl;
|
||||
|
||||
uint64_t total_kernel_time_ns =
|
||||
std::accumulate(kernel_stats.begin(), kernel_stats.end(), 0,
|
||||
[](uint64_t total_time_ns, const auto& stats) {
|
||||
return total_time_ns + stats.second.total_time_ns;
|
||||
});
|
||||
|
||||
out << "\"Name\",\"Calls\",\"TotalDurationNs\",\"AverageNs\",\"Percentage\"" << std::endl;
|
||||
for (auto&& [name, stats] : std::set<decltype(kernel_stats)::value_type, decltype(compare)>(
|
||||
kernel_stats.begin(), kernel_stats.end(), compare))
|
||||
out << "\"" << name << "\"," << stats.count << "," << stats.total_time_ns << ","
|
||||
<< stats.total_time_ns / stats.count << "," << std::fixed << std::setprecision(4)
|
||||
<< (double)stats.total_time_ns / total_kernel_time_ns * 100 << std::endl;
|
||||
} else {
|
||||
std::cerr << "warning: could not open " << output_dir / filename << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
// Print the HIP memory copy statistics sorted by descending transfer time.
|
||||
|
||||
if (!memcpy_stats.empty()) {
|
||||
const char* filename = "hip_copy_stats.csv";
|
||||
if (!output_dir.empty()) out = std::ofstream(output_dir / filename);
|
||||
|
||||
if (out.good()) {
|
||||
std::cout << "Dumping HIP memory copy statistics." << std::endl;
|
||||
|
||||
uint64_t total_memory_copy_time_ns =
|
||||
std::accumulate(memcpy_stats.begin(), memcpy_stats.end(), 0,
|
||||
[](uint64_t total_time_ns, const auto& stats) {
|
||||
return total_time_ns + stats.second.total_time_ns;
|
||||
});
|
||||
|
||||
out << "\"Name\",\"Calls\",\"TotalBytes\",\"TotalDurationNs\",\"AverageNs\",\"Percentage\""
|
||||
<< std::endl;
|
||||
for (auto&& [kind, stats] : std::set<decltype(memcpy_stats)::value_type, decltype(compare)>(
|
||||
memcpy_stats.begin(), memcpy_stats.end(), compare))
|
||||
out << "\"" << roctracer_op_string(ACTIVITY_DOMAIN_HIP_OPS, HIP_OP_ID_COPY, kind.first)
|
||||
<< "(" << HumanReadableSize(kind.second >> 1, 0) << "-"
|
||||
<< HumanReadableSize(kind.second, 0) << ")"
|
||||
<< "\"," << stats.count << "," << stats.total_byte_size << "," << stats.total_time_ns
|
||||
<< "," << stats.total_time_ns / stats.count << "," << std::fixed << std::setprecision(4)
|
||||
<< (double)stats.total_time_ns / total_memory_copy_time_ns * 100 << std::endl;
|
||||
} else {
|
||||
std::cerr << "warning: could not open " << output_dir / filename << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
#include <hsa/hsa_api_trace.h>
|
||||
|
||||
extern "C" ROCTRACER_EXPORT bool OnLoad(HsaApiTable* /* table */, uint64_t /* runtime_version */,
|
||||
uint64_t /* failed_tool_count */,
|
||||
const char* const* /* failed_tool_names */) {
|
||||
roctracer_properties_t properties{};
|
||||
properties.buffer_size = sizeof(roctracer_record_t) * 10000;
|
||||
properties.buffer_callback_fun = CollectStatistics;
|
||||
properties.buffer_callback_arg = nullptr;
|
||||
|
||||
CHECK_ROCTRACER(roctracer_open_pool(&properties));
|
||||
CHECK_ROCTRACER(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_API));
|
||||
CHECK_ROCTRACER(roctracer_enable_op_activity(ACTIVITY_DOMAIN_HIP_OPS, HIP_OP_ID_DISPATCH));
|
||||
CHECK_ROCTRACER(roctracer_enable_op_activity(ACTIVITY_DOMAIN_HIP_OPS, HIP_OP_ID_COPY));
|
||||
|
||||
std::atexit([]() { DumpStatistics(); });
|
||||
return true;
|
||||
}
|
||||
|
||||
extern "C" ROCTRACER_EXPORT void OnUnload() {}
|
||||
@@ -0,0 +1,73 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "roctracer.h"
|
||||
|
||||
extern "C" {
|
||||
|
||||
// Deprecated functions:
|
||||
ROCTRACER_API int roctracer_load() { return 1; }
|
||||
ROCTRACER_API void roctracer_unload() {}
|
||||
ROCTRACER_API void roctracer_flush_buf() {}
|
||||
ROCTRACER_API void roctracer_mark(const char*) {}
|
||||
|
||||
ROCTRACER_API roctracer_status_t roctracer_enable_callback(roctracer_rtapi_callback_t callback,
|
||||
void* user_data) {
|
||||
for (uint32_t domain = 0; domain < ACTIVITY_DOMAIN_NUMBER; ++domain)
|
||||
if (auto status =
|
||||
roctracer_enable_domain_callback((roctracer_domain_t)domain, callback, user_data);
|
||||
status != ROCTRACER_STATUS_SUCCESS)
|
||||
return status;
|
||||
return ROCTRACER_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
ROCTRACER_API roctracer_status_t roctracer_disable_callback() {
|
||||
for (uint32_t domain = 0; domain < ACTIVITY_DOMAIN_NUMBER; ++domain)
|
||||
if (auto status = roctracer_disable_domain_callback((roctracer_domain_t)domain);
|
||||
status != ROCTRACER_STATUS_SUCCESS)
|
||||
return status;
|
||||
return ROCTRACER_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
ROCTRACER_API roctracer_status_t roctracer_enable_activity_expl(roctracer_pool_t* pool) {
|
||||
for (uint32_t domain = 0; domain < ACTIVITY_DOMAIN_NUMBER; ++domain)
|
||||
if (auto status = roctracer_enable_domain_activity_expl((roctracer_domain_t)domain, pool);
|
||||
status != ROCTRACER_STATUS_SUCCESS)
|
||||
return status;
|
||||
return ROCTRACER_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
ROCTRACER_API roctracer_status_t roctracer_enable_activity() {
|
||||
for (uint32_t domain = 0; domain < ACTIVITY_DOMAIN_NUMBER; ++domain)
|
||||
if (auto status = roctracer_enable_domain_activity((roctracer_domain_t)domain);
|
||||
status != ROCTRACER_STATUS_SUCCESS)
|
||||
return status;
|
||||
return ROCTRACER_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
ROCTRACER_API roctracer_status_t roctracer_disable_activity() {
|
||||
for (uint32_t domain = 0; domain < ACTIVITY_DOMAIN_NUMBER; ++domain)
|
||||
if (auto status = roctracer_disable_domain_activity((roctracer_domain_t)domain);
|
||||
status != ROCTRACER_STATUS_SUCCESS)
|
||||
return status;
|
||||
return ROCTRACER_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
} // extern "C"
|
||||
@@ -0,0 +1,99 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "correlation_id.h"
|
||||
#include "roctracer.h"
|
||||
|
||||
#include <atomic>
|
||||
#include <stack>
|
||||
#include <vector>
|
||||
|
||||
namespace {
|
||||
|
||||
// A stack that can be used for TLS variables. TLS destructors are invoked before global destructors
|
||||
// which is a problem if operations invoked by global destructors use TLS variables. If the TLS
|
||||
// stack is destructed, it still has well defined behavior by always returning a dummy element.
|
||||
template <typename T> class Stack : std::stack<T, std::vector<T>> {
|
||||
using parent_type = typename std::stack<T, std::vector<T>>;
|
||||
|
||||
public:
|
||||
Stack() { valid_.store(true, std::memory_order_relaxed); }
|
||||
~Stack() { valid_.store(false, std::memory_order_relaxed); }
|
||||
|
||||
template <class... Args> auto& emplace(Args&&... args) {
|
||||
return is_valid() ? parent_type::emplace(std::forward<Args>(args)...)
|
||||
: dummy_element_ = T(std::forward<Args>(args)...);
|
||||
}
|
||||
void push(const T& v) {
|
||||
if (is_valid()) parent_type::push(v);
|
||||
}
|
||||
void push(T&& v) {
|
||||
if (is_valid()) parent_type::push(std::move(v));
|
||||
}
|
||||
void pop() {
|
||||
if (is_valid()) parent_type::pop();
|
||||
}
|
||||
const auto& top() const { return is_valid() ? parent_type::top() : dummy_element_; }
|
||||
auto& top() { return is_valid() ? parent_type::top() : (dummy_element_ = {}); }
|
||||
|
||||
bool is_valid() const { return valid_.load(std::memory_order_relaxed); }
|
||||
size_t size() const { return is_valid() ? parent_type::size() : 0; }
|
||||
bool empty() const { return size() == 0; }
|
||||
|
||||
private:
|
||||
std::atomic<bool> valid_{false};
|
||||
T dummy_element_; // Dummy element used when the stack is not valid.
|
||||
};
|
||||
|
||||
thread_local Stack<activity_correlation_id_t> correlation_id_stack{};
|
||||
thread_local Stack<activity_correlation_id_t> external_id_stack{};
|
||||
|
||||
} // namespace
|
||||
|
||||
namespace roctracer {
|
||||
|
||||
activity_correlation_id_t CorrelationIdPush() {
|
||||
static std::atomic<uint64_t> counter{1};
|
||||
return correlation_id_stack.emplace(counter.fetch_add(1, std::memory_order_relaxed));
|
||||
}
|
||||
|
||||
void CorrelationIdPop() { correlation_id_stack.pop(); }
|
||||
|
||||
activity_correlation_id_t CorrelationId() {
|
||||
return correlation_id_stack.empty() ? 0 : correlation_id_stack.top();
|
||||
}
|
||||
|
||||
void ExternalCorrelationIdPush(activity_correlation_id_t external_id) {
|
||||
external_id_stack.push(external_id);
|
||||
}
|
||||
|
||||
std::optional<activity_correlation_id_t> ExternalCorrelationIdPop() {
|
||||
if (external_id_stack.empty()) return std::nullopt;
|
||||
|
||||
auto external_id = external_id_stack.top();
|
||||
external_id_stack.pop();
|
||||
return std::make_optional(external_id);
|
||||
}
|
||||
|
||||
std::optional<activity_correlation_id_t> ExternalCorrelationId() {
|
||||
return external_id_stack.empty() ? std::nullopt : std::make_optional(external_id_stack.top());
|
||||
}
|
||||
|
||||
} // namespace roctracer
|
||||
@@ -0,0 +1,50 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "roctracer.h"
|
||||
|
||||
#include <optional>
|
||||
|
||||
namespace roctracer {
|
||||
|
||||
// Start a new correlation ID region and push it onto the thread local stack. Correlation ID
|
||||
// regions are nested and per-thread.
|
||||
activity_correlation_id_t CorrelationIdPush();
|
||||
|
||||
// Stop the current correlation ID region and pop it from the thread local stack.
|
||||
void CorrelationIdPop();
|
||||
|
||||
// Return the ID currently active correlation ID region, or 0 if no regin is active.
|
||||
activity_correlation_id_t CorrelationId();
|
||||
|
||||
// Start a new external correlation ID region for the given \p external_id. As for the internal
|
||||
// correlation ID regions, external correlation ID regions are nested and per-thread.
|
||||
void ExternalCorrelationIdPush(activity_correlation_id_t external_id);
|
||||
|
||||
// Stop the current external correlation ID region and return the external_id used to start the
|
||||
// region. Return a nullopt if no region was active.
|
||||
std::optional<activity_correlation_id_t> ExternalCorrelationIdPop();
|
||||
|
||||
// Return the current external correlation ID or nullopt is no region is active.
|
||||
std::optional<activity_correlation_id_t> ExternalCorrelationId();
|
||||
|
||||
} // namespace roctracer
|
||||
@@ -0,0 +1,51 @@
|
||||
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef EXCEPTION_H_
|
||||
#define EXCEPTION_H_
|
||||
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
|
||||
#define EXC_RAISING(error, stream) \
|
||||
do { \
|
||||
std::ostringstream oss; \
|
||||
oss << __FUNCTION__ << "(), " << stream; \
|
||||
throw roctracer::ApiError(error, oss.str()); \
|
||||
} while (false)
|
||||
|
||||
namespace roctracer {
|
||||
|
||||
class ApiError : public std::runtime_error {
|
||||
public:
|
||||
explicit ApiError(roctracer_status_t status, const std::string& what_arg)
|
||||
: std::runtime_error(what_arg), status_(status) {}
|
||||
|
||||
roctracer_status_t status() const noexcept { return status_; }
|
||||
|
||||
private:
|
||||
const roctracer_status_t status_;
|
||||
};
|
||||
|
||||
} // namespace roctracer
|
||||
|
||||
#endif // EXCEPTION_H_
|
||||
@@ -0,0 +1,48 @@
|
||||
ROCTRACER_4.0 {
|
||||
global: OnLoad;
|
||||
OnUnload;
|
||||
roctracer_activity_pop_external_correlation_id;
|
||||
roctracer_activity_push_external_correlation_id;
|
||||
roctracer_close_pool_expl;
|
||||
roctracer_default_pool_expl;
|
||||
roctracer_disable_activity;
|
||||
roctracer_disable_callback;
|
||||
roctracer_disable_domain_activity;
|
||||
roctracer_disable_domain_callback;
|
||||
roctracer_disable_op_activity;
|
||||
roctracer_disable_op_callback;
|
||||
roctracer_enable_activity_expl;
|
||||
roctracer_enable_callback;
|
||||
roctracer_enable_domain_activity_expl;
|
||||
roctracer_enable_domain_callback;
|
||||
roctracer_enable_op_activity_expl;
|
||||
roctracer_enable_op_callback;
|
||||
roctracer_error_string;
|
||||
roctracer_flush_activity_expl;
|
||||
roctracer_flush_buf;
|
||||
roctracer_get_timestamp;
|
||||
roctracer_load;
|
||||
roctracer_mark;
|
||||
roctracer_op_code;
|
||||
roctracer_open_pool_expl;
|
||||
roctracer_op_string;
|
||||
roctracer_set_properties;
|
||||
roctracer_start;
|
||||
roctracer_stop;
|
||||
roctracer_unload;
|
||||
roctracer_version_major;
|
||||
roctracer_version_minor;
|
||||
local: *;
|
||||
};
|
||||
|
||||
ROCTRACER_4.1 {
|
||||
global: HSA_AMD_TOOL_PRIORITY;
|
||||
roctracer_close_pool;
|
||||
roctracer_default_pool;
|
||||
roctracer_enable_activity;
|
||||
roctracer_enable_domain_activity;
|
||||
roctracer_enable_op_activity;
|
||||
roctracer_flush_activity;
|
||||
roctracer_next_record;
|
||||
roctracer_open_pool;
|
||||
} ROCTRACER_4.0;
|
||||
@@ -0,0 +1,2 @@
|
||||
#include <hip/hip_runtime_api.h>
|
||||
#include <hip/hip_deprecated.h>
|
||||
@@ -0,0 +1,679 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "hsa_support.h"
|
||||
|
||||
#include "correlation_id.h"
|
||||
#include "debug.h"
|
||||
#include "exception.h"
|
||||
#include "memory_pool.h"
|
||||
#include "roctracer.h"
|
||||
#include "roctracer_hsa.h"
|
||||
|
||||
#include <atomic>
|
||||
#include <hsa/hsa.h>
|
||||
#include <hsa/amd_hsa_signal.h>
|
||||
#include <hsa/hsa_ven_amd_loader.h>
|
||||
#include <unordered_map>
|
||||
#include <optional>
|
||||
#include <mutex>
|
||||
|
||||
namespace {
|
||||
|
||||
std::atomic<int (*)(activity_domain_t domain, uint32_t operation_id, void* data)> report_activity;
|
||||
|
||||
bool IsEnabled(activity_domain_t domain, uint32_t operation_id) {
|
||||
auto report = report_activity.load(std::memory_order_relaxed);
|
||||
return report && report(domain, operation_id, nullptr) == 0;
|
||||
}
|
||||
|
||||
void ReportActivity(activity_domain_t domain, uint32_t operation_id, void* data) {
|
||||
if (auto report = report_activity.load(std::memory_order_relaxed))
|
||||
report(domain, operation_id, data);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
#include "hsa_prof_str.inline.h"
|
||||
|
||||
namespace roctracer::hsa_support {
|
||||
|
||||
namespace {
|
||||
|
||||
CoreApiTable saved_core_api{};
|
||||
AmdExtTable saved_amd_ext_api{};
|
||||
hsa_ven_amd_loader_1_01_pfn_t hsa_loader_api{};
|
||||
|
||||
struct AgentInfo {
|
||||
uint32_t id;
|
||||
hsa_device_type_t type;
|
||||
};
|
||||
std::unordered_map<decltype(hsa_agent_t::handle), AgentInfo> agent_info_map;
|
||||
|
||||
class Tracker {
|
||||
public:
|
||||
enum { ENTRY_INV = 0, ENTRY_INIT = 1, ENTRY_COMPL = 2 };
|
||||
|
||||
enum entry_type_t {
|
||||
DFLT_ENTRY_TYPE = 0,
|
||||
API_ENTRY_TYPE = 1,
|
||||
COPY_ENTRY_TYPE = 2,
|
||||
KERNEL_ENTRY_TYPE = 3,
|
||||
NUM_ENTRY_TYPE = 4
|
||||
};
|
||||
|
||||
struct entry_t {
|
||||
std::atomic<uint32_t> valid;
|
||||
entry_type_t type;
|
||||
uint64_t correlation_id;
|
||||
roctracer_timestamp_t begin; // begin timestamp, ns
|
||||
roctracer_timestamp_t end; // end timestamp, ns
|
||||
hsa_agent_t agent;
|
||||
uint32_t dev_index;
|
||||
hsa_signal_t orig;
|
||||
hsa_signal_t signal;
|
||||
void (*handler)(const entry_t*);
|
||||
union {
|
||||
struct {
|
||||
} copy;
|
||||
struct {
|
||||
const char* name;
|
||||
hsa_agent_t agent;
|
||||
uint32_t tid;
|
||||
} kernel;
|
||||
};
|
||||
};
|
||||
|
||||
// Add tracker entry
|
||||
inline static void Enable(entry_type_t type, const hsa_agent_t& agent, const hsa_signal_t& signal,
|
||||
entry_t* entry) {
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
|
||||
// Creating a new tracker entry
|
||||
entry->type = type;
|
||||
entry->agent = agent;
|
||||
entry->dev_index = 0; // hsa_rsrc->GetAgentInfo(agent)->dev_index;
|
||||
entry->orig = signal;
|
||||
entry->valid.store(ENTRY_INIT, std::memory_order_release);
|
||||
|
||||
// Creating a proxy signal
|
||||
status = saved_core_api.hsa_signal_create_fn(1, 0, NULL, &(entry->signal));
|
||||
if (status != HSA_STATUS_SUCCESS) fatal("hsa_signal_create failed");
|
||||
status = saved_amd_ext_api.hsa_amd_signal_async_handler_fn(
|
||||
entry->signal, HSA_SIGNAL_CONDITION_LT, 1, Handler, entry);
|
||||
if (status != HSA_STATUS_SUCCESS) fatal("hsa_amd_signal_async_handler failed");
|
||||
}
|
||||
|
||||
// Delete tracker entry
|
||||
inline static void Disable(entry_t* entry) {
|
||||
saved_core_api.hsa_signal_destroy_fn(entry->signal);
|
||||
entry->valid.store(ENTRY_INV, std::memory_order_release);
|
||||
}
|
||||
|
||||
private:
|
||||
// Entry completion
|
||||
inline static void Complete(hsa_signal_value_t signal_value, entry_t* entry) {
|
||||
static roctracer_timestamp_t sysclock_period = []() {
|
||||
uint64_t sysclock_hz = 0;
|
||||
hsa_status_t status =
|
||||
saved_core_api.hsa_system_get_info_fn(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sysclock_hz);
|
||||
if (status != HSA_STATUS_SUCCESS) fatal("hsa_system_get_info failed");
|
||||
return (uint64_t)1000000000 / sysclock_hz;
|
||||
}();
|
||||
|
||||
if (entry->type == COPY_ENTRY_TYPE) {
|
||||
hsa_amd_profiling_async_copy_time_t async_copy_time{};
|
||||
hsa_status_t status = saved_amd_ext_api.hsa_amd_profiling_get_async_copy_time_fn(
|
||||
entry->signal, &async_copy_time);
|
||||
if (status != HSA_STATUS_SUCCESS) fatal("hsa_amd_profiling_get_async_copy_time failed");
|
||||
entry->begin = async_copy_time.start * sysclock_period;
|
||||
entry->end = async_copy_time.end * sysclock_period;
|
||||
} else {
|
||||
assert(false && "should not reach here");
|
||||
}
|
||||
|
||||
hsa_signal_t orig = entry->orig;
|
||||
hsa_signal_t signal = entry->signal;
|
||||
|
||||
// Releasing completed entry
|
||||
entry->valid.store(ENTRY_COMPL, std::memory_order_release);
|
||||
|
||||
assert(entry->handler != nullptr);
|
||||
entry->handler(entry);
|
||||
|
||||
// Original intercepted signal completion
|
||||
if (orig.handle) {
|
||||
amd_signal_t* orig_signal_ptr = reinterpret_cast<amd_signal_t*>(orig.handle);
|
||||
amd_signal_t* prof_signal_ptr = reinterpret_cast<amd_signal_t*>(signal.handle);
|
||||
orig_signal_ptr->start_ts = prof_signal_ptr->start_ts;
|
||||
orig_signal_ptr->end_ts = prof_signal_ptr->end_ts;
|
||||
|
||||
[[maybe_unused]] const hsa_signal_value_t new_value =
|
||||
saved_core_api.hsa_signal_load_relaxed_fn(orig) - 1;
|
||||
assert(signal_value == new_value && "Tracker::Complete bad signal value");
|
||||
saved_core_api.hsa_signal_store_screlease_fn(orig, signal_value);
|
||||
}
|
||||
saved_core_api.hsa_signal_destroy_fn(signal);
|
||||
delete entry;
|
||||
}
|
||||
|
||||
// Handler for packet completion
|
||||
static bool Handler(hsa_signal_value_t signal_value, void* arg) {
|
||||
// Acquire entry
|
||||
entry_t* entry = reinterpret_cast<entry_t*>(arg);
|
||||
while (entry->valid.load(std::memory_order_acquire) != ENTRY_INIT) sched_yield();
|
||||
|
||||
// Complete entry
|
||||
Tracker::Complete(signal_value, entry);
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
hsa_status_t HSA_API MemoryAllocateIntercept(hsa_region_t region, size_t size, void** ptr) {
|
||||
hsa_status_t status = saved_core_api.hsa_memory_allocate_fn(region, size, ptr);
|
||||
if (status != HSA_STATUS_SUCCESS) return status;
|
||||
|
||||
if (IsEnabled(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_ALLOCATE)) {
|
||||
hsa_evt_data_t data{};
|
||||
data.allocate.ptr = *ptr;
|
||||
data.allocate.size = size;
|
||||
if (saved_core_api.hsa_region_get_info_fn(region, HSA_REGION_INFO_SEGMENT,
|
||||
&data.allocate.segment) != HSA_STATUS_SUCCESS ||
|
||||
saved_core_api.hsa_region_get_info_fn(region, HSA_REGION_INFO_GLOBAL_FLAGS,
|
||||
&data.allocate.global_flag) != HSA_STATUS_SUCCESS)
|
||||
fatal("hsa_region_get_info failed");
|
||||
|
||||
ReportActivity(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_ALLOCATE, &data);
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t MemoryAssignAgentIntercept(void* ptr, hsa_agent_t agent,
|
||||
hsa_access_permission_t access) {
|
||||
hsa_status_t status = saved_core_api.hsa_memory_assign_agent_fn(ptr, agent, access);
|
||||
if (status != HSA_STATUS_SUCCESS) return status;
|
||||
|
||||
if (IsEnabled(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_DEVICE)) {
|
||||
hsa_evt_data_t data{};
|
||||
data.device.ptr = ptr;
|
||||
if (saved_core_api.hsa_agent_get_info_fn(agent, HSA_AGENT_INFO_DEVICE, &data.device.type) !=
|
||||
HSA_STATUS_SUCCESS)
|
||||
fatal("hsa_agent_get_info failed");
|
||||
|
||||
ReportActivity(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_DEVICE, &data);
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t MemoryCopyIntercept(void* dst, const void* src, size_t size) {
|
||||
hsa_status_t status = saved_core_api.hsa_memory_copy_fn(dst, src, size);
|
||||
if (status != HSA_STATUS_SUCCESS) return status;
|
||||
|
||||
if (IsEnabled(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_MEMCOPY)) {
|
||||
hsa_evt_data_t data{};
|
||||
data.memcopy.dst = dst;
|
||||
data.memcopy.src = src;
|
||||
data.memcopy.size = size;
|
||||
|
||||
ReportActivity(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_MEMCOPY, &data);
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t MemoryPoolAllocateIntercept(hsa_amd_memory_pool_t pool, size_t size, uint32_t flags,
|
||||
void** ptr) {
|
||||
hsa_status_t status = saved_amd_ext_api.hsa_amd_memory_pool_allocate_fn(pool, size, flags, ptr);
|
||||
if (size == 0 || status != HSA_STATUS_SUCCESS) return status;
|
||||
|
||||
if (IsEnabled(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_ALLOCATE)) {
|
||||
hsa_evt_data_t data{};
|
||||
data.allocate.ptr = *ptr;
|
||||
data.allocate.size = size;
|
||||
|
||||
if (saved_amd_ext_api.hsa_amd_memory_pool_get_info_fn(
|
||||
pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &data.allocate.segment) != HSA_STATUS_SUCCESS ||
|
||||
saved_amd_ext_api.hsa_amd_memory_pool_get_info_fn(
|
||||
pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &data.allocate.global_flag) !=
|
||||
HSA_STATUS_SUCCESS)
|
||||
fatal("hsa_region_get_info failed");
|
||||
|
||||
ReportActivity(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_ALLOCATE, &data);
|
||||
}
|
||||
|
||||
if (IsEnabled(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_DEVICE)) {
|
||||
auto callback_data = std::make_pair(pool, ptr);
|
||||
auto agent_callback = [](hsa_agent_t agent, void* iterate_agent_callback_data) {
|
||||
auto [pool, ptr] = *reinterpret_cast<decltype(callback_data)*>(iterate_agent_callback_data);
|
||||
|
||||
if (hsa_amd_memory_pool_access_t value;
|
||||
saved_amd_ext_api.hsa_amd_agent_memory_pool_get_info_fn(
|
||||
agent, pool, HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS, &value) != HSA_STATUS_SUCCESS ||
|
||||
value != HSA_AMD_MEMORY_POOL_ACCESS_ALLOWED_BY_DEFAULT)
|
||||
return HSA_STATUS_SUCCESS;
|
||||
|
||||
auto it = agent_info_map.find(agent.handle);
|
||||
if (it == agent_info_map.end()) fatal("agent was not found in the agent_info map");
|
||||
|
||||
hsa_evt_data_t data{};
|
||||
data.device.type = it->second.type;
|
||||
data.device.id = it->second.id;
|
||||
data.device.agent = agent;
|
||||
data.device.ptr = ptr;
|
||||
|
||||
ReportActivity(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_DEVICE, &data);
|
||||
return HSA_STATUS_SUCCESS;
|
||||
};
|
||||
saved_core_api.hsa_iterate_agents_fn(agent_callback, &callback_data);
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t MemoryPoolFreeIntercept(void* ptr) {
|
||||
if (IsEnabled(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_ALLOCATE)) {
|
||||
hsa_evt_data_t data{};
|
||||
data.allocate.ptr = ptr;
|
||||
data.allocate.size = 0;
|
||||
ReportActivity(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_ALLOCATE, &data);
|
||||
}
|
||||
|
||||
return saved_amd_ext_api.hsa_amd_memory_pool_free_fn(ptr);
|
||||
}
|
||||
|
||||
// Agent allow access callback 'hsa_amd_agents_allow_access'
|
||||
hsa_status_t AgentsAllowAccessIntercept(uint32_t num_agents, const hsa_agent_t* agents,
|
||||
const uint32_t* flags, const void* ptr) {
|
||||
hsa_status_t status =
|
||||
saved_amd_ext_api.hsa_amd_agents_allow_access_fn(num_agents, agents, flags, ptr);
|
||||
if (status != HSA_STATUS_SUCCESS) return status;
|
||||
|
||||
if (IsEnabled(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_DEVICE)) {
|
||||
while (num_agents--) {
|
||||
hsa_agent_t agent = *agents++;
|
||||
auto it = agent_info_map.find(agent.handle);
|
||||
if (it == agent_info_map.end()) fatal("agent was not found in the agent_info map");
|
||||
|
||||
hsa_evt_data_t data{};
|
||||
data.device.type = it->second.type;
|
||||
data.device.id = it->second.id;
|
||||
data.device.agent = agent;
|
||||
data.device.ptr = ptr;
|
||||
|
||||
ReportActivity(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_DEVICE, &data);
|
||||
}
|
||||
}
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
struct CodeObjectCallbackArg {
|
||||
activity_rtapi_callback_t callback_fun;
|
||||
void* callback_arg;
|
||||
bool unload;
|
||||
};
|
||||
|
||||
hsa_status_t CodeObjectCallback(hsa_executable_t executable,
|
||||
hsa_loaded_code_object_t loaded_code_object, void* arg) {
|
||||
hsa_evt_data_t data{};
|
||||
|
||||
if (hsa_loader_api.hsa_ven_amd_loader_loaded_code_object_get_info(
|
||||
loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_TYPE,
|
||||
&data.codeobj.storage_type) != HSA_STATUS_SUCCESS)
|
||||
fatal("hsa_ven_amd_loader_loaded_code_object_get_info failed");
|
||||
|
||||
if (data.codeobj.storage_type == HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_FILE) {
|
||||
if (hsa_loader_api.hsa_ven_amd_loader_loaded_code_object_get_info(
|
||||
loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_FILE,
|
||||
&data.codeobj.storage_file) != HSA_STATUS_SUCCESS ||
|
||||
data.codeobj.storage_file == -1)
|
||||
fatal("hsa_ven_amd_loader_loaded_code_object_get_info failed");
|
||||
data.codeobj.memory_base = data.codeobj.memory_size = 0;
|
||||
} else if (data.codeobj.storage_type == HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_MEMORY) {
|
||||
if (hsa_loader_api.hsa_ven_amd_loader_loaded_code_object_get_info(
|
||||
loaded_code_object,
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_MEMORY_BASE,
|
||||
&data.codeobj.memory_base) != HSA_STATUS_SUCCESS ||
|
||||
hsa_loader_api.hsa_ven_amd_loader_loaded_code_object_get_info(
|
||||
loaded_code_object,
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_CODE_OBJECT_STORAGE_MEMORY_SIZE,
|
||||
&data.codeobj.memory_size) != HSA_STATUS_SUCCESS)
|
||||
fatal("hsa_ven_amd_loader_loaded_code_object_get_info failed");
|
||||
data.codeobj.storage_file = -1;
|
||||
} else if (data.codeobj.storage_type == HSA_VEN_AMD_LOADER_CODE_OBJECT_STORAGE_TYPE_NONE) {
|
||||
return HSA_STATUS_SUCCESS; // FIXME: do we really not care about these code objects?
|
||||
} else {
|
||||
fatal("unknown code object storage type: %d", data.codeobj.storage_type);
|
||||
}
|
||||
|
||||
if (hsa_loader_api.hsa_ven_amd_loader_loaded_code_object_get_info(
|
||||
loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_BASE,
|
||||
&data.codeobj.load_base) != HSA_STATUS_SUCCESS ||
|
||||
hsa_loader_api.hsa_ven_amd_loader_loaded_code_object_get_info(
|
||||
loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_SIZE,
|
||||
&data.codeobj.load_size) != HSA_STATUS_SUCCESS ||
|
||||
hsa_loader_api.hsa_ven_amd_loader_loaded_code_object_get_info(
|
||||
loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_DELTA,
|
||||
&data.codeobj.load_delta) != HSA_STATUS_SUCCESS)
|
||||
fatal("hsa_ven_amd_loader_loaded_code_object_get_info failed");
|
||||
|
||||
if (hsa_loader_api.hsa_ven_amd_loader_loaded_code_object_get_info(
|
||||
loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI_LENGTH,
|
||||
&data.codeobj.uri_length) != HSA_STATUS_SUCCESS)
|
||||
fatal("hsa_ven_amd_loader_loaded_code_object_get_info failed");
|
||||
|
||||
std::string uri_str(data.codeobj.uri_length, '\0');
|
||||
if (hsa_loader_api.hsa_ven_amd_loader_loaded_code_object_get_info(
|
||||
loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI, uri_str.data()) !=
|
||||
HSA_STATUS_SUCCESS)
|
||||
fatal("hsa_ven_amd_loader_loaded_code_object_get_info failed");
|
||||
|
||||
data.codeobj.uri = uri_str.c_str();
|
||||
data.codeobj.unload = *static_cast<bool*>(arg) ? 1 : 0;
|
||||
ReportActivity(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_CODEOBJ, &data);
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t ExecutableFreezeIntercept(hsa_executable_t executable, const char* options) {
|
||||
hsa_status_t status = saved_core_api.hsa_executable_freeze_fn(executable, options);
|
||||
if (status != HSA_STATUS_SUCCESS) return status;
|
||||
|
||||
if (IsEnabled(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_CODEOBJ)) {
|
||||
bool unload = false;
|
||||
hsa_loader_api.hsa_ven_amd_loader_executable_iterate_loaded_code_objects(
|
||||
executable, CodeObjectCallback, &unload);
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t ExecutableDestroyIntercept(hsa_executable_t executable) {
|
||||
if (IsEnabled(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_CODEOBJ)) {
|
||||
bool unload = true;
|
||||
hsa_loader_api.hsa_ven_amd_loader_executable_iterate_loaded_code_objects(
|
||||
executable, CodeObjectCallback, &unload);
|
||||
}
|
||||
|
||||
return saved_core_api.hsa_executable_destroy_fn(executable);
|
||||
}
|
||||
|
||||
std::atomic<bool> profiling_async_copy_enable{false};
|
||||
|
||||
hsa_status_t ProfilingAsyncCopyEnableIntercept(bool enable) {
|
||||
hsa_status_t status = saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn(enable);
|
||||
if (status == HSA_STATUS_SUCCESS) {
|
||||
profiling_async_copy_enable.exchange(enable, std::memory_order_release);
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
void MemoryASyncCopyHandler(const Tracker::entry_t* entry) {
|
||||
activity_record_t record{};
|
||||
record.domain = ACTIVITY_DOMAIN_HSA_OPS;
|
||||
record.op = HSA_OP_ID_COPY;
|
||||
record.begin_ns = entry->begin;
|
||||
record.end_ns = entry->end;
|
||||
record.device_id = 0;
|
||||
record.correlation_id = entry->correlation_id;
|
||||
ReportActivity(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_COPY, &record);
|
||||
}
|
||||
|
||||
hsa_status_t MemoryASyncCopyOnEngineIntercept(
|
||||
void* dst, hsa_agent_t dst_agent, const void* src, hsa_agent_t src_agent, size_t size,
|
||||
uint32_t num_dep_signals, const hsa_signal_t* dep_signals, hsa_signal_t completion_signal,
|
||||
hsa_amd_sdma_engine_id_t engine_id, bool force_copy_on_sdma) {
|
||||
bool is_enabled = IsEnabled(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_COPY);
|
||||
|
||||
// FIXME: what happens if the state changes before returning?
|
||||
[[maybe_unused]] hsa_status_t status = saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn(
|
||||
profiling_async_copy_enable.load(std::memory_order_relaxed) || is_enabled);
|
||||
assert(status == HSA_STATUS_SUCCESS && "hsa_amd_profiling_async_copy_enable failed");
|
||||
|
||||
if (!is_enabled) {
|
||||
return saved_amd_ext_api.hsa_amd_memory_async_copy_on_engine_fn(
|
||||
dst, dst_agent, src, src_agent, size, num_dep_signals, dep_signals, completion_signal,
|
||||
engine_id, force_copy_on_sdma);
|
||||
}
|
||||
|
||||
Tracker::entry_t* entry = new Tracker::entry_t();
|
||||
entry->handler = MemoryASyncCopyHandler;
|
||||
entry->correlation_id = CorrelationId();
|
||||
Tracker::Enable(Tracker::COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry);
|
||||
|
||||
status = saved_amd_ext_api.hsa_amd_memory_async_copy_on_engine_fn(
|
||||
dst, dst_agent, src, src_agent, size, num_dep_signals, dep_signals, entry->signal, engine_id,
|
||||
force_copy_on_sdma);
|
||||
if (status != HSA_STATUS_SUCCESS) Tracker::Disable(entry);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
hsa_status_t MemoryASyncCopyIntercept(void* dst, hsa_agent_t dst_agent, const void* src,
|
||||
hsa_agent_t src_agent, size_t size, uint32_t num_dep_signals,
|
||||
const hsa_signal_t* dep_signals,
|
||||
hsa_signal_t completion_signal) {
|
||||
bool is_enabled = IsEnabled(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_COPY);
|
||||
|
||||
// FIXME: what happens if the state changes before returning?
|
||||
[[maybe_unused]] hsa_status_t status = saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn(
|
||||
profiling_async_copy_enable.load(std::memory_order_relaxed) || is_enabled);
|
||||
assert(status == HSA_STATUS_SUCCESS && "hsa_amd_profiling_async_copy_enable failed");
|
||||
|
||||
if (!is_enabled) {
|
||||
return saved_amd_ext_api.hsa_amd_memory_async_copy_fn(
|
||||
dst, dst_agent, src, src_agent, size, num_dep_signals, dep_signals, completion_signal);
|
||||
}
|
||||
|
||||
Tracker::entry_t* entry = new Tracker::entry_t();
|
||||
entry->handler = MemoryASyncCopyHandler;
|
||||
entry->correlation_id = CorrelationId();
|
||||
Tracker::Enable(Tracker::COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry);
|
||||
|
||||
status = saved_amd_ext_api.hsa_amd_memory_async_copy_fn(
|
||||
dst, dst_agent, src, src_agent, size, num_dep_signals, dep_signals, entry->signal);
|
||||
if (status != HSA_STATUS_SUCCESS) Tracker::Disable(entry);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
hsa_status_t MemoryASyncCopyRectIntercept(const hsa_pitched_ptr_t* dst,
|
||||
const hsa_dim3_t* dst_offset,
|
||||
const hsa_pitched_ptr_t* src,
|
||||
const hsa_dim3_t* src_offset, const hsa_dim3_t* range,
|
||||
hsa_agent_t copy_agent, hsa_amd_copy_direction_t dir,
|
||||
uint32_t num_dep_signals, const hsa_signal_t* dep_signals,
|
||||
hsa_signal_t completion_signal) {
|
||||
bool is_enabled = IsEnabled(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_COPY);
|
||||
|
||||
// FIXME: what happens if the state changes before returning?
|
||||
[[maybe_unused]] hsa_status_t status = saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn(
|
||||
profiling_async_copy_enable.load(std::memory_order_relaxed) || is_enabled);
|
||||
assert(status == HSA_STATUS_SUCCESS && "hsa_amd_profiling_async_copy_enable failed");
|
||||
|
||||
if (!is_enabled) {
|
||||
return saved_amd_ext_api.hsa_amd_memory_async_copy_rect_fn(
|
||||
dst, dst_offset, src, src_offset, range, copy_agent, dir, num_dep_signals, dep_signals,
|
||||
completion_signal);
|
||||
}
|
||||
|
||||
Tracker::entry_t* entry = new Tracker::entry_t();
|
||||
entry->handler = MemoryASyncCopyHandler;
|
||||
entry->correlation_id = CorrelationId();
|
||||
Tracker::Enable(Tracker::COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry);
|
||||
|
||||
status = saved_amd_ext_api.hsa_amd_memory_async_copy_rect_fn(
|
||||
dst, dst_offset, src, src_offset, range, copy_agent, dir, num_dep_signals, dep_signals,
|
||||
entry->signal);
|
||||
if (status != HSA_STATUS_SUCCESS) Tracker::Disable(entry);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
roctracer_timestamp_t timestamp_ns() {
|
||||
// If the HSA intercept is installed, then use the "original" 'hsa_system_get_info' function to
|
||||
// avoid reporting calls for internal use of the HSA API by the tracer.
|
||||
auto hsa_system_get_info_fn = saved_core_api.hsa_system_get_info_fn;
|
||||
|
||||
// If the HSA intercept is not installed, use the default 'hsa_system_get_info'.
|
||||
if (hsa_system_get_info_fn == nullptr) hsa_system_get_info_fn = hsa_system_get_info;
|
||||
|
||||
uint64_t sysclock;
|
||||
if (hsa_status_t status = hsa_system_get_info_fn(HSA_SYSTEM_INFO_TIMESTAMP, &sysclock);
|
||||
status == HSA_STATUS_ERROR_NOT_INITIALIZED)
|
||||
return 0;
|
||||
else if (status != HSA_STATUS_SUCCESS)
|
||||
fatal("hsa_system_get_info failed");
|
||||
|
||||
static uint64_t sysclock_period = [&]() {
|
||||
uint64_t sysclock_hz = 0;
|
||||
if (hsa_status_t status =
|
||||
hsa_system_get_info_fn(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sysclock_hz);
|
||||
status != HSA_STATUS_SUCCESS)
|
||||
fatal("hsa_system_get_info failed");
|
||||
|
||||
return (uint64_t)1000000000 / sysclock_hz;
|
||||
}();
|
||||
|
||||
return sysclock * sysclock_period;
|
||||
}
|
||||
|
||||
void Initialize(HsaApiTable* table) {
|
||||
// Save the HSA core api and amd_ext api.
|
||||
saved_core_api = *table->core_;
|
||||
saved_amd_ext_api = *table->amd_ext_;
|
||||
|
||||
// Enumerate the agents.
|
||||
if (hsa_support::saved_core_api.hsa_iterate_agents_fn(
|
||||
[](hsa_agent_t agent, void* data) {
|
||||
hsa_support::AgentInfo agent_info;
|
||||
if (hsa_support::saved_core_api.hsa_agent_get_info_fn(
|
||||
agent, HSA_AGENT_INFO_DEVICE, &agent_info.type) != HSA_STATUS_SUCCESS)
|
||||
fatal("hsa_agent_get_info failed");
|
||||
switch (agent_info.type) {
|
||||
case HSA_DEVICE_TYPE_CPU:
|
||||
static int cpu_agent_count = 0;
|
||||
agent_info.id = cpu_agent_count++;
|
||||
break;
|
||||
case HSA_DEVICE_TYPE_GPU: {
|
||||
uint32_t driver_node_id;
|
||||
if (hsa_support::saved_core_api.hsa_agent_get_info_fn(
|
||||
agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_DRIVER_NODE_ID),
|
||||
&driver_node_id) != HSA_STATUS_SUCCESS)
|
||||
fatal("hsa_agent_get_info failed");
|
||||
|
||||
agent_info.id = driver_node_id;
|
||||
} break;
|
||||
default:
|
||||
static int other_agent_count = 0;
|
||||
agent_info.id = other_agent_count++;
|
||||
break;
|
||||
}
|
||||
hsa_support::agent_info_map.emplace(agent.handle, agent_info);
|
||||
return HSA_STATUS_SUCCESS;
|
||||
},
|
||||
nullptr) != HSA_STATUS_SUCCESS)
|
||||
fatal("hsa_iterate_agents failed");
|
||||
|
||||
// Install the code object intercept.
|
||||
hsa_status_t status = table->core_->hsa_system_get_major_extension_table_fn(
|
||||
HSA_EXTENSION_AMD_LOADER, 1, sizeof(hsa_ven_amd_loader_1_01_pfn_t), &hsa_loader_api);
|
||||
if (status != HSA_STATUS_SUCCESS) fatal("hsa_system_get_major_extension_table failed");
|
||||
|
||||
// Install the HSA_OPS intercept
|
||||
table->amd_ext_->hsa_amd_memory_async_copy_fn = MemoryASyncCopyIntercept;
|
||||
table->amd_ext_->hsa_amd_memory_async_copy_rect_fn = MemoryASyncCopyRectIntercept;
|
||||
table->amd_ext_->hsa_amd_memory_async_copy_on_engine_fn = MemoryASyncCopyOnEngineIntercept;
|
||||
table->amd_ext_->hsa_amd_profiling_async_copy_enable_fn = ProfilingAsyncCopyEnableIntercept;
|
||||
|
||||
// Install the HSA_EVT intercept
|
||||
table->core_->hsa_memory_allocate_fn = MemoryAllocateIntercept;
|
||||
table->core_->hsa_memory_assign_agent_fn = MemoryAssignAgentIntercept;
|
||||
table->core_->hsa_memory_copy_fn = MemoryCopyIntercept;
|
||||
table->amd_ext_->hsa_amd_memory_pool_allocate_fn = MemoryPoolAllocateIntercept;
|
||||
table->amd_ext_->hsa_amd_memory_pool_free_fn = MemoryPoolFreeIntercept;
|
||||
table->amd_ext_->hsa_amd_agents_allow_access_fn = AgentsAllowAccessIntercept;
|
||||
table->core_->hsa_executable_freeze_fn = ExecutableFreezeIntercept;
|
||||
table->core_->hsa_executable_destroy_fn = ExecutableDestroyIntercept;
|
||||
|
||||
// Install the HSA_API wrappers
|
||||
detail::InstallCoreApiWrappers(table->core_);
|
||||
detail::InstallAmdExtWrappers(table->amd_ext_);
|
||||
detail::InstallImageExtWrappers(table->image_ext_);
|
||||
}
|
||||
|
||||
void Finalize() {
|
||||
if (hsa_status_t status =
|
||||
saved_amd_ext_api.hsa_amd_profiling_async_copy_enable_fn(profiling_async_copy_enable.load(std::memory_order_relaxed));
|
||||
status != HSA_STATUS_SUCCESS)
|
||||
assert(!"hsa_amd_profiling_async_copy_enable failed");
|
||||
|
||||
memset(&saved_core_api, '\0', sizeof(saved_core_api));
|
||||
memset(&saved_amd_ext_api, '\0', sizeof(saved_amd_ext_api));
|
||||
memset(&hsa_loader_api, '\0', sizeof(hsa_loader_api));
|
||||
}
|
||||
|
||||
const char* GetApiName(uint32_t id) { return detail::GetApiName(id); }
|
||||
|
||||
const char* GetEvtName(uint32_t id) {
|
||||
switch (id) {
|
||||
case HSA_EVT_ID_ALLOCATE:
|
||||
return "ALLOCATE";
|
||||
case HSA_EVT_ID_DEVICE:
|
||||
return "DEVICE";
|
||||
case HSA_EVT_ID_MEMCOPY:
|
||||
return "MEMCOPY";
|
||||
case HSA_EVT_ID_SUBMIT:
|
||||
return "SUBMIT";
|
||||
case HSA_EVT_ID_KSYMBOL:
|
||||
return "KSYMBOL";
|
||||
case HSA_EVT_ID_CODEOBJ:
|
||||
return "CODEOBJ";
|
||||
case HSA_EVT_ID_NUMBER:
|
||||
break;
|
||||
}
|
||||
throw ApiError(ROCTRACER_STATUS_ERROR_INVALID_ARGUMENT, "invalid HSA EVT callback id");
|
||||
}
|
||||
|
||||
const char* GetOpsName(uint32_t id) {
|
||||
switch (id) {
|
||||
case HSA_OP_ID_DISPATCH:
|
||||
return "DISPATCH";
|
||||
case HSA_OP_ID_COPY:
|
||||
return "COPY";
|
||||
case HSA_OP_ID_BARRIER:
|
||||
return "BARRIER";
|
||||
case HSA_OP_ID_RESERVED1:
|
||||
return "PCSAMPLE";
|
||||
}
|
||||
throw ApiError(ROCTRACER_STATUS_ERROR_INVALID_ARGUMENT, "invalid HSA OPS callback id");
|
||||
}
|
||||
|
||||
uint32_t GetApiCode(const char* str) { return detail::GetApiCode(str); }
|
||||
|
||||
void RegisterTracerCallback(int (*function)(activity_domain_t domain, uint32_t operation_id,
|
||||
void* data)) {
|
||||
report_activity.store(function, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
} // namespace roctracer::hsa_support
|
||||
@@ -0,0 +1,54 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef HSA_SUPPORT_H_
|
||||
#define HSA_SUPPORT_H_
|
||||
|
||||
#include "roctracer.h"
|
||||
#include "roctracer_hsa.h"
|
||||
|
||||
#include <hsa/hsa_api_trace.h>
|
||||
|
||||
namespace roctracer::hsa_support {
|
||||
|
||||
struct hsa_trace_data_t {
|
||||
hsa_api_data_t api_data;
|
||||
uint64_t phase_enter_timestamp;
|
||||
uint64_t phase_data;
|
||||
|
||||
void (*phase_enter)(hsa_api_id_t operation_id, hsa_trace_data_t* data);
|
||||
void (*phase_exit)(hsa_api_id_t operation_id, hsa_trace_data_t* data);
|
||||
};
|
||||
|
||||
void Initialize(HsaApiTable* table);
|
||||
void Finalize();
|
||||
|
||||
const char* GetApiName(uint32_t id);
|
||||
const char* GetEvtName(uint32_t id);
|
||||
const char* GetOpsName(uint32_t id);
|
||||
uint32_t GetApiCode(const char* str);
|
||||
|
||||
void RegisterTracerCallback(int (*function)(activity_domain_t domain, uint32_t operation_id,
|
||||
void* data));
|
||||
uint64_t timestamp_ns();
|
||||
|
||||
} // namespace roctracer::hsa_support
|
||||
|
||||
#endif // HSA_SUPPORT_H_
|
||||
@@ -0,0 +1,192 @@
|
||||
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef ROCTRACER_LOADER_H_
|
||||
#define ROCTRACER_LOADER_H_
|
||||
|
||||
#include "debug.h"
|
||||
|
||||
#include <hip/hip_runtime_api.h>
|
||||
|
||||
#include <dlfcn.h>
|
||||
#include <experimental/filesystem>
|
||||
#include <link.h>
|
||||
#include <unistd.h>
|
||||
|
||||
namespace fs = std::experimental::filesystem;
|
||||
|
||||
namespace roctracer {
|
||||
|
||||
// Base loader class
|
||||
template <typename Loader> class BaseLoader {
|
||||
protected:
|
||||
BaseLoader(const char* pattern) {
|
||||
// Iterate through the process' loaded shared objects and try to dlopen the first entry with a
|
||||
// file name starting with the given 'pattern'. This allows the loader to acquire a handle
|
||||
// to the target library iff it is already loaded. The handle is used to query symbols
|
||||
// exported by that library.
|
||||
|
||||
auto callback = [this, pattern](dl_phdr_info* info) {
|
||||
if (handle_ == nullptr &&
|
||||
fs::path(info->dlpi_name).filename().string().rfind(pattern, 0) == 0)
|
||||
handle_ = ::dlopen(info->dlpi_name, RTLD_LAZY);
|
||||
};
|
||||
dl_iterate_phdr(
|
||||
[](dl_phdr_info* info, size_t size, void* data) {
|
||||
(*reinterpret_cast<decltype(callback)*>(data))(info);
|
||||
return 0;
|
||||
},
|
||||
&callback);
|
||||
}
|
||||
|
||||
~BaseLoader() {
|
||||
if (handle_ != nullptr) ::dlclose(handle_);
|
||||
}
|
||||
|
||||
BaseLoader(const BaseLoader&) = delete;
|
||||
BaseLoader& operator=(const BaseLoader&) = delete;
|
||||
|
||||
public:
|
||||
bool IsEnabled() const { return handle_ != nullptr; }
|
||||
|
||||
template <typename FunctionPtr> FunctionPtr GetFun(const char* symbol) const {
|
||||
assert(IsEnabled());
|
||||
|
||||
auto function_ptr = reinterpret_cast<FunctionPtr>(::dlsym(handle_, symbol));
|
||||
if (function_ptr == nullptr) fatal("symbol lookup '%s' failed: %s", symbol, ::dlerror());
|
||||
return function_ptr;
|
||||
}
|
||||
|
||||
static inline Loader& Instance() {
|
||||
static Loader instance;
|
||||
return instance;
|
||||
}
|
||||
|
||||
private:
|
||||
void* handle_;
|
||||
};
|
||||
|
||||
} // namespace roctracer
|
||||
|
||||
// HIP runtime library loader class
|
||||
namespace roctracer {
|
||||
#if STATIC_BUILD
|
||||
__attribute__((weak)) const char* hipKernelNameRef(const hipFunction_t f) { return nullptr; }
|
||||
__attribute__((weak)) const char* hipKernelNameRefByPtr(const void* hostFunction,
|
||||
hipStream_t stream) {
|
||||
return nullptr;
|
||||
}
|
||||
__attribute__((weak)) int hipGetStreamDeviceId(hipStream_t stream) { return 0; }
|
||||
__attribute__((weak)) const char* hipGetCmdName(unsigned op) { return nullptr; }
|
||||
__attribute__((weak)) const char* hipApiName(uint32_t id) { return nullptr; }
|
||||
__attribute__((weak)) void hipRegisterTracerCallback(int (*function)(activity_domain_t domain,
|
||||
uint32_t operation_id,
|
||||
void* data)) {}
|
||||
|
||||
class HipLoader {
|
||||
private:
|
||||
HipLoader() {}
|
||||
|
||||
public:
|
||||
bool IsEnabled() const { return true; }
|
||||
|
||||
int GetStreamDeviceId(hipStream_t stream) const { return hipGetStreamDeviceId(stream); }
|
||||
|
||||
const char* KernelNameRef(const hipFunction_t f) const { return hipKernelNameRef(f); }
|
||||
|
||||
const char* KernelNameRefByPtr(const void* host_function, hipStream_t stream = nullptr) const {
|
||||
return hipKernelNameRefByPtr(host_function, stream);
|
||||
}
|
||||
|
||||
const char* GetOpName(unsigned op) const { return hipGetCmdName(op); }
|
||||
|
||||
const char* ApiName(uint32_t id) const { return hipApiName(id); }
|
||||
|
||||
void RegisterTracerCallback(int (*callback)(activity_domain_t domain, uint32_t operation_id,
|
||||
void* data)) const {
|
||||
return hipRegisterTracerCallback(callback);
|
||||
}
|
||||
|
||||
static inline HipLoader& Instance() {
|
||||
static HipLoader instance;
|
||||
return instance;
|
||||
}
|
||||
};
|
||||
#else
|
||||
class HipLoader : public BaseLoader<HipLoader> {
|
||||
private:
|
||||
friend HipLoader& BaseLoader::Instance();
|
||||
HipLoader() : BaseLoader("libamdhip64.so") {}
|
||||
|
||||
public:
|
||||
int GetStreamDeviceId(hipStream_t stream) const {
|
||||
static auto function = GetFun<int (*)(hipStream_t stream)>("hipGetStreamDeviceId");
|
||||
return function(stream);
|
||||
}
|
||||
|
||||
const char* KernelNameRef(const hipFunction_t f) const {
|
||||
static auto function = GetFun<const char* (*)(const hipFunction_t f)>("hipKernelNameRef");
|
||||
return function(f);
|
||||
}
|
||||
|
||||
const char* KernelNameRefByPtr(const void* host_function, hipStream_t stream = nullptr) const {
|
||||
static auto function = GetFun<const char* (*)(const void* hostFunction, hipStream_t stream)>(
|
||||
"hipKernelNameRefByPtr");
|
||||
return function(host_function, stream);
|
||||
}
|
||||
|
||||
const char* GetOpName(unsigned op) const {
|
||||
static auto function = GetFun<const char* (*)(unsigned op)>("hipGetCmdName");
|
||||
return function(op);
|
||||
}
|
||||
|
||||
const char* ApiName(uint32_t id) const {
|
||||
static auto function = GetFun<const char* (*)(uint32_t id)>("hipApiName");
|
||||
return function(id);
|
||||
}
|
||||
|
||||
void RegisterTracerCallback(int (*callback)(activity_domain_t domain, uint32_t operation_id,
|
||||
void* data)) const {
|
||||
static auto function = GetFun<void (*)(int (*callback)(
|
||||
activity_domain_t domain, uint32_t operation_id, void* data))>("hipRegisterTracerCallback");
|
||||
return function(callback);
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
// ROCTX library loader class
|
||||
class RocTxLoader : public BaseLoader<RocTxLoader> {
|
||||
private:
|
||||
friend RocTxLoader& BaseLoader::Instance();
|
||||
RocTxLoader() : BaseLoader("libroctx64.so") {}
|
||||
|
||||
public:
|
||||
void RegisterTracerCallback(int (*callback)(activity_domain_t domain, uint32_t operation_id,
|
||||
void* data)) const {
|
||||
static auto function =
|
||||
GetFun<void (*)(int (*callback)(activity_domain_t domain, uint32_t operation_id,
|
||||
void* data))>("roctxRegisterTracerCallback");
|
||||
return function(callback);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace roctracer
|
||||
|
||||
#endif // ROCTRACER_LOADER_H_
|
||||
@@ -0,0 +1,238 @@
|
||||
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef MEMORY_POOL_H_
|
||||
#define MEMORY_POOL_H_
|
||||
|
||||
#include "roctracer.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <condition_variable>
|
||||
#include <cstdlib>
|
||||
#include <cstddef>
|
||||
#include <cstring>
|
||||
#include <future>
|
||||
#include <mutex>
|
||||
#include <type_traits>
|
||||
|
||||
namespace roctracer {
|
||||
|
||||
class MemoryPool {
|
||||
public:
|
||||
MemoryPool(const roctracer_properties_t& properties) : properties_(properties) {
|
||||
// Pool definition: The memory pool is split in 2 buffers of equal size. When first initialized,
|
||||
// the write pointer points to the first element of the first buffer. When a buffer is full, or
|
||||
// when Flush() is called, the write pointer moves to the other buffer.
|
||||
// Each buffer should be large enough to hold at least 2 activity records, as record pairs may
|
||||
// be written when external correlation ids are used.
|
||||
const size_t allocation_size =
|
||||
2 * std::max(2 * sizeof(roctracer_record_t), properties_.buffer_size);
|
||||
pool_begin_ = nullptr;
|
||||
AllocateMemory(&pool_begin_, allocation_size);
|
||||
assert(pool_begin_ != nullptr && "pool allocator failed");
|
||||
|
||||
pool_end_ = pool_begin_ + allocation_size;
|
||||
buffer_begin_ = pool_begin_;
|
||||
buffer_end_ = buffer_begin_ + properties_.buffer_size;
|
||||
record_ptr_ = buffer_begin_;
|
||||
data_ptr_ = buffer_end_;
|
||||
|
||||
// Create a consumer thread and wait for it to be ready to accept work.
|
||||
std::promise<void> ready;
|
||||
std::future<void> future = ready.get_future();
|
||||
consumer_thread_ = std::thread(&MemoryPool::ConsumerThreadLoop, this, std::move(ready));
|
||||
future.wait();
|
||||
}
|
||||
|
||||
~MemoryPool() {
|
||||
Flush();
|
||||
|
||||
// Wait for the previous flush to complete, then send the exit signal.
|
||||
NotifyConsumerThread(nullptr, nullptr);
|
||||
consumer_thread_.join();
|
||||
|
||||
// Free the pool's buffer memory.
|
||||
AllocateMemory(&pool_begin_, 0);
|
||||
}
|
||||
|
||||
MemoryPool(const MemoryPool&) = delete;
|
||||
MemoryPool& operator=(const MemoryPool&) = delete;
|
||||
|
||||
template <typename Record, typename Functor = std::function<void(Record& record, const void*)>>
|
||||
void Write(Record&& record, const void* data, size_t data_size, Functor&& store_data = {}) {
|
||||
assert(data != nullptr || data_size == 0); // If data is null, then data_size must be 0
|
||||
|
||||
std::lock_guard producer_lock(producer_mutex_);
|
||||
|
||||
// The amount of memory reserved in the buffer to store data. If the data cannot fit because it
|
||||
// is larger than the buffer size minus one record, then the data won't be copied into the
|
||||
// buffer.
|
||||
size_t reserve_data_size =
|
||||
data_size <= (properties_.buffer_size - sizeof(Record)) ? data_size : 0;
|
||||
|
||||
std::byte* next_record = record_ptr_ + sizeof(Record);
|
||||
if (next_record > (data_ptr_ - reserve_data_size)) {
|
||||
NotifyConsumerThread(buffer_begin_, record_ptr_);
|
||||
SwitchBuffers();
|
||||
next_record = record_ptr_ + sizeof(Record);
|
||||
assert(next_record <= buffer_end_ && "buffer size is less then the record size");
|
||||
}
|
||||
|
||||
// Store data in the record. Copy the data first if it fits in the buffer
|
||||
// (reserve_data_size != 0).
|
||||
if (reserve_data_size) {
|
||||
data_ptr_ -= data_size;
|
||||
::memcpy(data_ptr_, data, data_size);
|
||||
store_data(record, data_ptr_);
|
||||
} else if (data != nullptr) {
|
||||
store_data(record, data);
|
||||
}
|
||||
|
||||
// Store the record into the buffer, and increment the write pointer.
|
||||
::memcpy(record_ptr_, &record, sizeof(Record));
|
||||
record_ptr_ = next_record;
|
||||
|
||||
// If the data does not fit in the buffer, flush the buffer with the record as is. We don't copy
|
||||
// the data so we make sure that the record and its data are processed by waiting until the
|
||||
// flush is complete.
|
||||
if (data != nullptr && reserve_data_size == 0) {
|
||||
NotifyConsumerThread(buffer_begin_, record_ptr_);
|
||||
SwitchBuffers();
|
||||
{
|
||||
std::unique_lock consumer_lock(consumer_mutex_);
|
||||
consumer_cond_.wait(consumer_lock, [this]() { return !consumer_arg_.valid; });
|
||||
}
|
||||
}
|
||||
}
|
||||
template <typename Record> void Write(Record&& record) {
|
||||
using DataPtr = void*;
|
||||
Write(std::forward<Record>(record), DataPtr(nullptr), 0, {});
|
||||
}
|
||||
|
||||
// Flush the records and block until they are all made visible to the client.
|
||||
void Flush() {
|
||||
{
|
||||
std::lock_guard producer_lock(producer_mutex_);
|
||||
if (record_ptr_ == buffer_begin_) return;
|
||||
|
||||
NotifyConsumerThread(buffer_begin_, record_ptr_);
|
||||
SwitchBuffers();
|
||||
}
|
||||
{
|
||||
// Wait for the current operation to complete.
|
||||
std::unique_lock consumer_lock(consumer_mutex_);
|
||||
consumer_cond_.wait(consumer_lock, [this]() { return !consumer_arg_.valid; });
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
void SwitchBuffers() {
|
||||
buffer_begin_ = (buffer_end_ == pool_end_) ? pool_begin_ : buffer_end_;
|
||||
buffer_end_ = buffer_begin_ + properties_.buffer_size;
|
||||
record_ptr_ = buffer_begin_;
|
||||
data_ptr_ = buffer_end_;
|
||||
}
|
||||
|
||||
void ConsumerThreadLoop(std::promise<void> ready) {
|
||||
std::unique_lock consumer_lock(consumer_mutex_);
|
||||
|
||||
// This consumer is now ready to accept work.
|
||||
ready.set_value();
|
||||
|
||||
while (true) {
|
||||
consumer_cond_.wait(consumer_lock, [this]() { return consumer_arg_.valid; });
|
||||
|
||||
// begin == end == nullptr means the thread needs to exit.
|
||||
if (consumer_arg_.begin == nullptr && consumer_arg_.end == nullptr) break;
|
||||
|
||||
properties_.buffer_callback_fun(reinterpret_cast<const char*>(consumer_arg_.begin),
|
||||
reinterpret_cast<const char*>(consumer_arg_.end),
|
||||
properties_.buffer_callback_arg);
|
||||
|
||||
// Mark this operation as complete (valid=false) and notify all producers that may be
|
||||
// waiting for this operation to finish, or to start a new operation. See comment below in
|
||||
// NotifyConsumerThread().
|
||||
consumer_arg_.valid = false;
|
||||
consumer_cond_.notify_all();
|
||||
}
|
||||
}
|
||||
|
||||
void NotifyConsumerThread(const std::byte* data_begin, const std::byte* data_end) {
|
||||
std::unique_lock consumer_lock(consumer_mutex_);
|
||||
|
||||
// If consumer_arg_ is still in use (valid=true), then wait for the consumer thread to finish
|
||||
// processing the current operation. Multiple producers may wait here, one will be allowed to
|
||||
// continue once the consumer thread is idle and valid=false. This prevents a race condition
|
||||
// where operations would be lost if multiple producers could enter this critical section
|
||||
// (sequentially) before the consumer thread could re-acquire the consumer_mutex_ lock.
|
||||
consumer_cond_.wait(consumer_lock, [this]() { return !consumer_arg_.valid; });
|
||||
|
||||
consumer_arg_.begin = data_begin;
|
||||
consumer_arg_.end = data_end;
|
||||
|
||||
consumer_arg_.valid = true;
|
||||
consumer_cond_.notify_all();
|
||||
}
|
||||
|
||||
void AllocateMemory(std::byte** ptr, size_t size) const {
|
||||
if (properties_.alloc_fun != nullptr) {
|
||||
// Use the custom allocator provided in the properties.
|
||||
properties_.alloc_fun(reinterpret_cast<char**>(ptr), size, properties_.alloc_arg);
|
||||
return;
|
||||
}
|
||||
|
||||
// No custom allocator was provided so use the default malloc/realloc/free allocator.
|
||||
if (*ptr == nullptr) {
|
||||
*ptr = static_cast<std::byte*>(malloc(size));
|
||||
} else if (size != 0) {
|
||||
*ptr = static_cast<std::byte*>(realloc(*ptr, size));
|
||||
} else {
|
||||
free(*ptr);
|
||||
*ptr = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
// Properties used to create the memory pool.
|
||||
const roctracer_properties_t properties_;
|
||||
|
||||
// Pool definition
|
||||
std::byte* pool_begin_;
|
||||
std::byte* pool_end_;
|
||||
std::byte* buffer_begin_;
|
||||
std::byte* buffer_end_;
|
||||
std::byte* record_ptr_;
|
||||
std::byte* data_ptr_;
|
||||
std::mutex producer_mutex_;
|
||||
|
||||
// Consumer thread
|
||||
std::thread consumer_thread_;
|
||||
struct {
|
||||
const std::byte* begin;
|
||||
const std::byte* end;
|
||||
bool valid = false;
|
||||
} consumer_arg_;
|
||||
|
||||
std::mutex consumer_mutex_;
|
||||
std::condition_variable consumer_cond_;
|
||||
};
|
||||
|
||||
} // namespace roctracer
|
||||
|
||||
#endif // MEMORY_POOL_H_
|
||||
@@ -0,0 +1,102 @@
|
||||
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef UTIL_CALLBACK_TABLE_H_
|
||||
#define UTIL_CALLBACK_TABLE_H_
|
||||
|
||||
#include "ext/prof_protocol.h"
|
||||
|
||||
#include <array>
|
||||
#include <atomic>
|
||||
#include <cassert>
|
||||
#include <optional>
|
||||
#include <shared_mutex>
|
||||
#include <utility>
|
||||
|
||||
namespace roctracer::util {
|
||||
|
||||
#if __GNUC__ == 11 || __GNUCC__ == 12
|
||||
// Starting with gcc-11 (verified with gcc-12 as well), an array out-of-bounds subscript error is
|
||||
// reported for accessing the registration table element at the operation ID index. Validating the
|
||||
// index in the function calling Register/Unregister does not quiet the warning/error in release
|
||||
// builds, so, for gcc-11 and gcc-12, we disable that warning just for this class.
|
||||
#define IGNORE_GCC_ARRAY_BOUNDS_ERROR 1
|
||||
#endif // __GNUC__ == 11 || __GNUCC__ == 12
|
||||
|
||||
#if IGNORE_GCC_ARRAY_BOUNDS_ERROR
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Warray-bounds"
|
||||
#endif // IGNORE_GCC_ARRAY_BOUNDS_ERROR
|
||||
|
||||
namespace detail {
|
||||
struct False {
|
||||
constexpr bool operator()() { return false; }
|
||||
};
|
||||
} // namespace detail
|
||||
|
||||
// Generic callbacks table
|
||||
template <typename T, uint32_t N, typename IsStopped = detail::False> class RegistrationTable {
|
||||
public:
|
||||
struct table_element_t {
|
||||
std::atomic<bool> enabled{false};
|
||||
mutable std::shared_mutex mutex;
|
||||
T data;
|
||||
};
|
||||
|
||||
template <typename... Args> void Register(uint32_t operation_id, Args... args) {
|
||||
assert(operation_id < N && "operation_id is out of range");
|
||||
table_element_t& entry = table_.at(operation_id);
|
||||
std::unique_lock lock(entry.mutex);
|
||||
if (!entry.enabled.exchange(true, std::memory_order_relaxed))
|
||||
registered_count_.fetch_add(1, std::memory_order_relaxed);
|
||||
entry.data = T{std::forward<Args>(args)...};
|
||||
}
|
||||
|
||||
void Unregister(uint32_t operation_id) {
|
||||
assert(operation_id < N && "id is out of range");
|
||||
table_element_t& entry = table_.at(operation_id);
|
||||
std::unique_lock lock(entry.mutex);
|
||||
if (entry.enabled.exchange(false, std::memory_order_relaxed))
|
||||
registered_count_.fetch_sub(1, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
std::optional<T> Get(uint32_t operation_id) const {
|
||||
assert(operation_id < N && "id is out of range");
|
||||
const table_element_t& entry = table_.at(operation_id);
|
||||
if (!entry.enabled.load(std::memory_order_relaxed) || IsStopped{}()) return std::nullopt;
|
||||
std::shared_lock lock(entry.mutex);
|
||||
return entry.enabled.load(std::memory_order_relaxed) ? std::make_optional(entry.data)
|
||||
: std::nullopt;
|
||||
}
|
||||
|
||||
bool IsEmpty() const { return registered_count_.load(std::memory_order_relaxed) == 0; }
|
||||
|
||||
private:
|
||||
std::atomic<size_t> registered_count_{0};
|
||||
std::array<table_element_t, N> table_{};
|
||||
};
|
||||
|
||||
#if IGNORE_GCC_ARRAY_BOUNDS_ERROR
|
||||
#pragma GCC diagnostic pop
|
||||
#endif // IGNORE_GCC_ARRAY_BOUNDS_ERROR
|
||||
|
||||
} // namespace roctracer::util
|
||||
|
||||
#endif // UTIL_CALLBACK_TABLE_H_
|
||||
@@ -0,0 +1,894 @@
|
||||
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "roctracer.h"
|
||||
#include "roctracer_hip.h"
|
||||
#include "roctracer_ext.h"
|
||||
#include "roctracer_roctx.h"
|
||||
#include "roctracer_hsa.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <dirent.h>
|
||||
#include <hsa/hsa_api_trace.h>
|
||||
#include <string.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <mutex>
|
||||
#include <stack>
|
||||
#include <type_traits>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "correlation_id.h"
|
||||
#include "debug.h"
|
||||
#include "exception.h"
|
||||
#include "hsa_support.h"
|
||||
#include "loader.h"
|
||||
#include "logger.h"
|
||||
#include "memory_pool.h"
|
||||
#include "registration_table.h"
|
||||
|
||||
#define API_METHOD_PREFIX \
|
||||
roctracer_status_t err = ROCTRACER_STATUS_SUCCESS; \
|
||||
try {
|
||||
#define API_METHOD_SUFFIX \
|
||||
} \
|
||||
catch (std::exception & e) { \
|
||||
ERR_LOGGING(__FUNCTION__ << "(), " << e.what()); \
|
||||
err = GetExcStatus(e); \
|
||||
} \
|
||||
return err;
|
||||
|
||||
#define API_METHOD_CATCH(X) \
|
||||
} \
|
||||
catch (std::exception & e) { \
|
||||
ERR_LOGGING(__FUNCTION__ << "(), " << e.what()); \
|
||||
} \
|
||||
(void)err; \
|
||||
return X;
|
||||
|
||||
static inline uint32_t GetPid() {
|
||||
static auto pid = syscall(__NR_getpid);
|
||||
return pid;
|
||||
}
|
||||
static inline uint32_t GetTid() {
|
||||
static thread_local auto tid = syscall(__NR_gettid);
|
||||
return tid;
|
||||
}
|
||||
|
||||
using namespace roctracer;
|
||||
|
||||
namespace {
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Internal library methods
|
||||
//
|
||||
|
||||
roctracer_start_cb_t roctracer_start_cb = nullptr;
|
||||
roctracer_stop_cb_t roctracer_stop_cb = nullptr;
|
||||
|
||||
roctracer_status_t GetExcStatus(const std::exception& e) {
|
||||
const ApiError* roctracer_exc_ptr = dynamic_cast<const ApiError*>(&e);
|
||||
return (roctracer_exc_ptr) ? roctracer_exc_ptr->status() : ROCTRACER_STATUS_ERROR;
|
||||
}
|
||||
|
||||
std::mutex registration_mutex;
|
||||
|
||||
// Memory pool routines and primitives
|
||||
std::recursive_mutex memory_pool_mutex;
|
||||
MemoryPool* default_memory_pool = nullptr;
|
||||
|
||||
} // namespace
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Public library methods
|
||||
//
|
||||
|
||||
// Returns library version
|
||||
ROCTRACER_API uint32_t roctracer_version_major() { return ROCTRACER_VERSION_MAJOR; }
|
||||
ROCTRACER_API uint32_t roctracer_version_minor() { return ROCTRACER_VERSION_MINOR; }
|
||||
|
||||
// Returns the last error
|
||||
ROCTRACER_API const char* roctracer_error_string() {
|
||||
return strdup(util::Logger::Instance().LastMessage().c_str());
|
||||
}
|
||||
|
||||
// Return Op string by given domain and activity/API codes
|
||||
// nullptr returned on the error and the library errno is set
|
||||
ROCTRACER_API const char* roctracer_op_string(uint32_t domain, uint32_t op, uint32_t kind) {
|
||||
API_METHOD_PREFIX
|
||||
switch (domain) {
|
||||
case ACTIVITY_DOMAIN_HSA_API:
|
||||
return hsa_support::GetApiName(op);
|
||||
case ACTIVITY_DOMAIN_HSA_EVT:
|
||||
return hsa_support::GetEvtName(op);
|
||||
case ACTIVITY_DOMAIN_HSA_OPS:
|
||||
return hsa_support::GetOpsName(op);
|
||||
case ACTIVITY_DOMAIN_HIP_OPS:
|
||||
return HipLoader::Instance().GetOpName(kind);
|
||||
case ACTIVITY_DOMAIN_HIP_API:
|
||||
return HipLoader::Instance().ApiName(op);
|
||||
case ACTIVITY_DOMAIN_EXT_API:
|
||||
return "EXT_API";
|
||||
default:
|
||||
throw roctracer::ApiError(ROCTRACER_STATUS_ERROR_INVALID_DOMAIN_ID, "invalid domain ID");
|
||||
}
|
||||
API_METHOD_CATCH(nullptr)
|
||||
}
|
||||
|
||||
// Return Op code and kind by given string
|
||||
ROCTRACER_API roctracer_status_t roctracer_op_code(uint32_t domain, const char* str, uint32_t* op,
|
||||
uint32_t* kind) {
|
||||
API_METHOD_PREFIX
|
||||
switch (domain) {
|
||||
case ACTIVITY_DOMAIN_HSA_API: {
|
||||
*op = hsa_support::GetApiCode(str);
|
||||
if (*op == HSA_API_ID_NUMBER) {
|
||||
EXC_RAISING(ROCTRACER_STATUS_ERROR_INVALID_ARGUMENT,
|
||||
"Invalid API name \"" << str << "\", domain ID(" << domain << ")");
|
||||
}
|
||||
if (kind != nullptr) *kind = 0;
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HIP_API: {
|
||||
*op = hipApiIdByName(str);
|
||||
if (*op == HIP_API_ID_NONE) {
|
||||
EXC_RAISING(ROCTRACER_STATUS_ERROR_INVALID_ARGUMENT,
|
||||
"Invalid API name \"" << str << "\", domain ID(" << domain << ")");
|
||||
}
|
||||
if (kind != nullptr) *kind = 0;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
EXC_RAISING(ROCTRACER_STATUS_ERROR_INVALID_DOMAIN_ID, "limited domain ID(" << domain << ")");
|
||||
}
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
template <activity_domain_t> struct DomainTraits;
|
||||
|
||||
template <> struct DomainTraits<ACTIVITY_DOMAIN_HIP_API> {
|
||||
using ApiData = hip_api_data_t;
|
||||
using OperationId = hip_api_id_t;
|
||||
static constexpr size_t kOpIdBegin = HIP_API_ID_FIRST;
|
||||
static constexpr size_t kOpIdEnd = HIP_API_ID_LAST + 1;
|
||||
};
|
||||
|
||||
template <> struct DomainTraits<ACTIVITY_DOMAIN_HSA_API> {
|
||||
using ApiData = hsa_api_data_t;
|
||||
using OperationId = hsa_api_id_t;
|
||||
static constexpr size_t kOpIdBegin = 0;
|
||||
static constexpr size_t kOpIdEnd = HSA_API_ID_NUMBER;
|
||||
};
|
||||
|
||||
template <> struct DomainTraits<ACTIVITY_DOMAIN_ROCTX> {
|
||||
using ApiData = roctx_api_data_t;
|
||||
using OperationId = roctx_api_id_t;
|
||||
static constexpr size_t kOpIdBegin = 0;
|
||||
static constexpr size_t kOpIdEnd = ROCTX_API_ID_NUMBER;
|
||||
};
|
||||
|
||||
template <> struct DomainTraits<ACTIVITY_DOMAIN_HIP_OPS> {
|
||||
using OperationId = hip_op_id_t;
|
||||
static constexpr size_t kOpIdBegin = 0;
|
||||
static constexpr size_t kOpIdEnd = HIP_OP_ID_NUMBER;
|
||||
};
|
||||
|
||||
template <> struct DomainTraits<ACTIVITY_DOMAIN_HSA_OPS> {
|
||||
using OperationId = hsa_op_id_t;
|
||||
static constexpr size_t kOpIdBegin = 0;
|
||||
static constexpr size_t kOpIdEnd = HSA_OP_ID_NUMBER;
|
||||
};
|
||||
|
||||
template <> struct DomainTraits<ACTIVITY_DOMAIN_HSA_EVT> {
|
||||
using ApiData = hsa_evt_data_t;
|
||||
using OperationId = hsa_evt_id_t;
|
||||
static constexpr size_t kOpIdBegin = 0;
|
||||
static constexpr size_t kOpIdEnd = HSA_EVT_ID_NUMBER;
|
||||
};
|
||||
|
||||
constexpr uint32_t get_op_begin(activity_domain_t domain) {
|
||||
switch (domain) {
|
||||
case ACTIVITY_DOMAIN_HSA_OPS:
|
||||
return DomainTraits<ACTIVITY_DOMAIN_HSA_OPS>::kOpIdBegin;
|
||||
case ACTIVITY_DOMAIN_HSA_API:
|
||||
return DomainTraits<ACTIVITY_DOMAIN_HSA_API>::kOpIdBegin;
|
||||
case ACTIVITY_DOMAIN_HSA_EVT:
|
||||
return DomainTraits<ACTIVITY_DOMAIN_HSA_EVT>::kOpIdBegin;
|
||||
case ACTIVITY_DOMAIN_HIP_OPS:
|
||||
return DomainTraits<ACTIVITY_DOMAIN_HIP_OPS>::kOpIdBegin;
|
||||
case ACTIVITY_DOMAIN_HIP_API:
|
||||
return DomainTraits<ACTIVITY_DOMAIN_HIP_API>::kOpIdBegin;
|
||||
case ACTIVITY_DOMAIN_ROCTX:
|
||||
return DomainTraits<ACTIVITY_DOMAIN_ROCTX>::kOpIdBegin;
|
||||
case ACTIVITY_DOMAIN_EXT_API:
|
||||
return 0;
|
||||
default:
|
||||
throw roctracer::ApiError(ROCTRACER_STATUS_ERROR_INVALID_DOMAIN_ID, "invalid domain ID");
|
||||
}
|
||||
}
|
||||
|
||||
constexpr uint32_t get_op_end(activity_domain_t domain) {
|
||||
switch (domain) {
|
||||
case ACTIVITY_DOMAIN_HSA_OPS:
|
||||
return DomainTraits<ACTIVITY_DOMAIN_HSA_OPS>::kOpIdEnd;
|
||||
case ACTIVITY_DOMAIN_HSA_API:
|
||||
return DomainTraits<ACTIVITY_DOMAIN_HSA_API>::kOpIdEnd;
|
||||
case ACTIVITY_DOMAIN_HSA_EVT:
|
||||
return DomainTraits<ACTIVITY_DOMAIN_HSA_EVT>::kOpIdEnd;
|
||||
case ACTIVITY_DOMAIN_HIP_OPS:
|
||||
return DomainTraits<ACTIVITY_DOMAIN_HIP_OPS>::kOpIdEnd;
|
||||
case ACTIVITY_DOMAIN_HIP_API:
|
||||
return DomainTraits<ACTIVITY_DOMAIN_HIP_API>::kOpIdEnd;
|
||||
case ACTIVITY_DOMAIN_ROCTX:
|
||||
return DomainTraits<ACTIVITY_DOMAIN_ROCTX>::kOpIdEnd;
|
||||
case ACTIVITY_DOMAIN_EXT_API:
|
||||
return get_op_begin(ACTIVITY_DOMAIN_EXT_API);
|
||||
default:
|
||||
throw roctracer::ApiError(ROCTRACER_STATUS_ERROR_INVALID_DOMAIN_ID, "invalid domain ID");
|
||||
}
|
||||
}
|
||||
|
||||
std::atomic<bool> stopped_status{false};
|
||||
|
||||
struct IsStopped {
|
||||
bool operator()() const { return stopped_status.load(std::memory_order_relaxed); }
|
||||
};
|
||||
|
||||
struct NeverStopped {
|
||||
constexpr bool operator()() { return false; }
|
||||
};
|
||||
|
||||
using UserCallback = std::pair<activity_rtapi_callback_t, void*>;
|
||||
|
||||
template <activity_domain_t domain, typename IsStopped>
|
||||
using CallbackRegistrationTable =
|
||||
util::RegistrationTable<UserCallback, DomainTraits<domain>::kOpIdEnd, IsStopped>;
|
||||
|
||||
template <activity_domain_t domain, typename IsStopped>
|
||||
using ActivityRegistrationTable =
|
||||
util::RegistrationTable<MemoryPool*, DomainTraits<domain>::kOpIdEnd, IsStopped>;
|
||||
|
||||
template <activity_domain_t domain> struct ApiTracer {
|
||||
using ApiData = typename DomainTraits<domain>::ApiData;
|
||||
using OperationId = typename DomainTraits<domain>::OperationId;
|
||||
|
||||
struct TraceData {
|
||||
ApiData api_data; // API specific data (for example, function arguments).
|
||||
uint64_t phase_enter_timestamp; // timestamp when phase_enter was executed.
|
||||
uint64_t phase_data; // data that can be shared between phase_enter and phase_exit.
|
||||
|
||||
void (*phase_enter)(OperationId operation_id, TraceData* data);
|
||||
void (*phase_exit)(OperationId operation_id, TraceData* data);
|
||||
};
|
||||
|
||||
static void Exit(OperationId operation_id, TraceData* trace_data) {
|
||||
if (auto pool = activity_table.Get(operation_id)) {
|
||||
assert(trace_data != nullptr);
|
||||
activity_record_t record{};
|
||||
|
||||
record.domain = domain;
|
||||
record.op = operation_id;
|
||||
record.correlation_id = trace_data->api_data.correlation_id;
|
||||
record.begin_ns = trace_data->phase_enter_timestamp;
|
||||
record.end_ns = hsa_support::timestamp_ns();
|
||||
record.process_id = GetPid();
|
||||
record.thread_id = GetTid();
|
||||
|
||||
if (auto external_id = ExternalCorrelationId()) {
|
||||
roctracer_record_t ext_record{};
|
||||
ext_record.domain = ACTIVITY_DOMAIN_EXT_API;
|
||||
ext_record.op = ACTIVITY_EXT_OP_EXTERN_ID;
|
||||
ext_record.correlation_id = record.correlation_id;
|
||||
ext_record.external_id = *external_id;
|
||||
// Write the external correlation id record directly followed by the activity record.
|
||||
(*pool)->Write(std::array<roctracer_record_t, 2>{ext_record, record});
|
||||
} else {
|
||||
// Write record to the buffer.
|
||||
(*pool)->Write(record);
|
||||
}
|
||||
}
|
||||
CorrelationIdPop();
|
||||
}
|
||||
|
||||
static void Exit_UserCallback(OperationId operation_id, TraceData* trace_data) {
|
||||
if (auto user_callback = callback_table.Get(operation_id)) {
|
||||
assert(trace_data != nullptr);
|
||||
trace_data->api_data.phase = ACTIVITY_API_PHASE_EXIT;
|
||||
user_callback->first(domain, operation_id, &trace_data->api_data, user_callback->second);
|
||||
}
|
||||
Exit(operation_id, trace_data);
|
||||
}
|
||||
|
||||
static void Enter_UserCallback(OperationId operation_id, TraceData* trace_data) {
|
||||
if (auto user_callback = callback_table.Get(operation_id)) {
|
||||
assert(trace_data != nullptr);
|
||||
trace_data->api_data.phase = ACTIVITY_API_PHASE_ENTER;
|
||||
trace_data->api_data.phase_data = &trace_data->phase_data;
|
||||
user_callback->first(domain, operation_id, &trace_data->api_data, user_callback->second);
|
||||
trace_data->phase_exit = Exit_UserCallback;
|
||||
} else {
|
||||
trace_data->phase_exit = Exit;
|
||||
}
|
||||
}
|
||||
|
||||
static int Enter(OperationId operation_id, TraceData* trace_data) {
|
||||
bool callback_enabled = callback_table.Get(operation_id).has_value(),
|
||||
activity_enabled = activity_table.Get(operation_id).has_value();
|
||||
if (!callback_enabled && !activity_enabled) return -1;
|
||||
|
||||
if (trace_data != nullptr) {
|
||||
// Generate a new correlation ID.
|
||||
trace_data->api_data.correlation_id = CorrelationIdPush();
|
||||
|
||||
if (activity_enabled) {
|
||||
trace_data->phase_enter_timestamp = hsa_support::timestamp_ns();
|
||||
trace_data->phase_enter = nullptr;
|
||||
trace_data->phase_exit = Exit;
|
||||
}
|
||||
if (callback_enabled) {
|
||||
trace_data->phase_enter = Enter_UserCallback;
|
||||
trace_data->phase_exit = [](OperationId, TraceData*) { fatal("should not reach here"); };
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static CallbackRegistrationTable<domain, IsStopped> callback_table;
|
||||
static ActivityRegistrationTable<domain, IsStopped> activity_table;
|
||||
};
|
||||
|
||||
template <activity_domain_t domain>
|
||||
CallbackRegistrationTable<domain, IsStopped> ApiTracer<domain>::callback_table;
|
||||
|
||||
template <activity_domain_t domain>
|
||||
ActivityRegistrationTable<domain, IsStopped> ApiTracer<domain>::activity_table;
|
||||
|
||||
using HIP_ApiTracer = ApiTracer<ACTIVITY_DOMAIN_HIP_API>;
|
||||
using HSA_ApiTracer = ApiTracer<ACTIVITY_DOMAIN_HSA_API>;
|
||||
|
||||
CallbackRegistrationTable<ACTIVITY_DOMAIN_ROCTX, NeverStopped> roctx_api_callback_table;
|
||||
ActivityRegistrationTable<ACTIVITY_DOMAIN_HIP_OPS, IsStopped> hip_ops_activity_table;
|
||||
ActivityRegistrationTable<ACTIVITY_DOMAIN_HSA_OPS, IsStopped> hsa_ops_activity_table;
|
||||
CallbackRegistrationTable<ACTIVITY_DOMAIN_HSA_EVT, IsStopped> hsa_evt_callback_table;
|
||||
|
||||
int TracerCallback(activity_domain_t domain, uint32_t operation_id, void* data) {
|
||||
switch (domain) {
|
||||
case ACTIVITY_DOMAIN_HSA_API:
|
||||
return HSA_ApiTracer::Enter(static_cast<HSA_ApiTracer::OperationId>(operation_id),
|
||||
static_cast<HSA_ApiTracer::TraceData*>(data));
|
||||
|
||||
case ACTIVITY_DOMAIN_HIP_API:
|
||||
return HIP_ApiTracer::Enter(static_cast<HIP_ApiTracer::OperationId>(operation_id),
|
||||
static_cast<HIP_ApiTracer::TraceData*>(data));
|
||||
|
||||
case ACTIVITY_DOMAIN_HIP_OPS:
|
||||
if (auto pool = hip_ops_activity_table.Get(operation_id)) {
|
||||
if (auto record = static_cast<activity_record_t*>(data)) {
|
||||
// If the record is for a kernel dispatch, write the kernel name in the pool's data,
|
||||
// and make the record point to it. Older HIP runtimes do not provide a kernel
|
||||
// name, so record.kernel_name might be null.
|
||||
if (operation_id == HIP_OP_ID_DISPATCH && record->kernel_name != nullptr)
|
||||
(*pool)->Write(*record, record->kernel_name, strlen(record->kernel_name) + 1,
|
||||
[](auto& record, const void* data) {
|
||||
record.kernel_name = static_cast<const char*>(data);
|
||||
});
|
||||
else
|
||||
(*pool)->Write(*record);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
|
||||
case ACTIVITY_DOMAIN_ROCTX:
|
||||
if (auto user_callback = roctx_api_callback_table.Get(operation_id)) {
|
||||
if (auto api_data = static_cast<DomainTraits<ACTIVITY_DOMAIN_ROCTX>::ApiData*>(data))
|
||||
user_callback->first(ACTIVITY_DOMAIN_ROCTX, operation_id, api_data,
|
||||
user_callback->second);
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
|
||||
case ACTIVITY_DOMAIN_HSA_OPS:
|
||||
if (auto pool = hsa_ops_activity_table.Get(operation_id)) {
|
||||
if (auto record = static_cast<activity_record_t*>(data)) (*pool)->Write(*record);
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
|
||||
case ACTIVITY_DOMAIN_HSA_EVT:
|
||||
if (auto user_callback = hsa_evt_callback_table.Get(operation_id)) {
|
||||
if (auto api_data = static_cast<DomainTraits<ACTIVITY_DOMAIN_HSA_EVT>::ApiData*>(data))
|
||||
user_callback->first(ACTIVITY_DOMAIN_HSA_EVT, operation_id, api_data,
|
||||
user_callback->second);
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
template <typename... Tables> struct RegistrationTableGroup {
|
||||
private:
|
||||
bool AllEmpty() const {
|
||||
return std::apply([](auto&&... tables) { return (tables.IsEmpty() && ...); }, tables_);
|
||||
}
|
||||
|
||||
public:
|
||||
template <typename Functor1, typename Functor2>
|
||||
RegistrationTableGroup(Functor1&& engage_tracer, Functor2&& disengage_tracer, Tables&... tables)
|
||||
: engage_tracer_(std::forward<Functor1>(engage_tracer)),
|
||||
disengage_tracer_(std::forward<Functor2>(disengage_tracer)),
|
||||
tables_(tables...) {}
|
||||
|
||||
template <typename T, typename... Args>
|
||||
void Register(T& table, uint32_t operation_id, Args... args) const {
|
||||
if (AllEmpty()) engage_tracer_();
|
||||
table.Register(operation_id, std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
template <typename T> void Unregister(T& table, uint32_t operation_id) const {
|
||||
table.Unregister(operation_id);
|
||||
if (AllEmpty()) disengage_tracer_();
|
||||
}
|
||||
|
||||
private:
|
||||
const std::function<void()> engage_tracer_, disengage_tracer_;
|
||||
const std::tuple<const Tables&...> tables_;
|
||||
};
|
||||
|
||||
RegistrationTableGroup HSA_registration_group(
|
||||
[]() { hsa_support::RegisterTracerCallback(TracerCallback); },
|
||||
[]() { hsa_support::RegisterTracerCallback(nullptr); }, HSA_ApiTracer::callback_table,
|
||||
HSA_ApiTracer::activity_table, hsa_ops_activity_table, hsa_evt_callback_table);
|
||||
|
||||
RegistrationTableGroup HIP_registration_group(
|
||||
[]() { HipLoader::Instance().RegisterTracerCallback(TracerCallback); },
|
||||
[]() { HipLoader::Instance().RegisterTracerCallback(nullptr); }, HIP_ApiTracer::callback_table,
|
||||
HIP_ApiTracer::activity_table, hip_ops_activity_table);
|
||||
|
||||
RegistrationTableGroup ROCTX_registration_group(
|
||||
[]() { RocTxLoader::Instance().RegisterTracerCallback(TracerCallback); },
|
||||
[]() { RocTxLoader::Instance().RegisterTracerCallback(nullptr); }, roctx_api_callback_table);
|
||||
|
||||
} // namespace
|
||||
|
||||
// Enable runtime API callbacks
|
||||
static void roctracer_enable_callback_impl(roctracer_domain_t domain, uint32_t operation_id,
|
||||
roctracer_rtapi_callback_t callback, void* user_data) {
|
||||
std::lock_guard lock(registration_mutex);
|
||||
|
||||
if (operation_id >= get_op_end(domain) || callback == nullptr)
|
||||
throw ApiError(ROCTRACER_STATUS_ERROR_INVALID_ARGUMENT, "invalid argument");
|
||||
|
||||
switch (domain) {
|
||||
case ACTIVITY_DOMAIN_HSA_EVT:
|
||||
HSA_registration_group.Register(hsa_evt_callback_table, operation_id, callback, user_data);
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HSA_API:
|
||||
HSA_registration_group.Register(HSA_ApiTracer::callback_table, operation_id, callback,
|
||||
user_data);
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HSA_OPS:
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HIP_API:
|
||||
if (HipLoader::Instance().IsEnabled())
|
||||
HIP_registration_group.Register(HIP_ApiTracer::callback_table, operation_id, callback,
|
||||
user_data);
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HIP_OPS:
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_ROCTX:
|
||||
if (RocTxLoader::Instance().IsEnabled())
|
||||
ROCTX_registration_group.Register(roctx_api_callback_table, operation_id, callback,
|
||||
user_data);
|
||||
break;
|
||||
default:
|
||||
EXC_RAISING(ROCTRACER_STATUS_ERROR_INVALID_DOMAIN_ID, "invalid domain ID(" << domain << ")");
|
||||
}
|
||||
}
|
||||
|
||||
ROCTRACER_API roctracer_status_t roctracer_enable_op_callback(roctracer_domain_t domain,
|
||||
uint32_t op,
|
||||
roctracer_rtapi_callback_t callback,
|
||||
void* user_data) {
|
||||
API_METHOD_PREFIX
|
||||
roctracer_enable_callback_impl(domain, op, callback, user_data);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
ROCTRACER_API roctracer_status_t roctracer_enable_domain_callback(
|
||||
roctracer_domain_t domain, roctracer_rtapi_callback_t callback, void* user_data) {
|
||||
API_METHOD_PREFIX
|
||||
const uint32_t op_end = get_op_end(domain);
|
||||
for (uint32_t op = get_op_begin(domain); op < op_end; ++op)
|
||||
roctracer_enable_callback_impl(domain, op, callback, user_data);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
// Disable runtime API callbacks
|
||||
static void roctracer_disable_callback_impl(roctracer_domain_t domain, uint32_t operation_id) {
|
||||
std::lock_guard lock(registration_mutex);
|
||||
|
||||
if (operation_id >= get_op_end(domain))
|
||||
throw ApiError(ROCTRACER_STATUS_ERROR_INVALID_ARGUMENT, "invalid argument");
|
||||
|
||||
switch (domain) {
|
||||
case ACTIVITY_DOMAIN_HSA_EVT:
|
||||
HSA_registration_group.Unregister(hsa_evt_callback_table, operation_id);
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HSA_API:
|
||||
HSA_registration_group.Unregister(HSA_ApiTracer::callback_table, operation_id);
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HSA_OPS:
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HIP_API:
|
||||
if (HipLoader::Instance().IsEnabled())
|
||||
HIP_registration_group.Unregister(HIP_ApiTracer::callback_table, operation_id);
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HIP_OPS:
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_ROCTX:
|
||||
if (RocTxLoader::Instance().IsEnabled())
|
||||
ROCTX_registration_group.Unregister(roctx_api_callback_table, operation_id);
|
||||
break;
|
||||
default:
|
||||
EXC_RAISING(ROCTRACER_STATUS_ERROR_INVALID_DOMAIN_ID, "invalid domain ID(" << domain << ")");
|
||||
}
|
||||
}
|
||||
|
||||
ROCTRACER_API roctracer_status_t roctracer_disable_op_callback(roctracer_domain_t domain,
|
||||
uint32_t op) {
|
||||
API_METHOD_PREFIX
|
||||
roctracer_disable_callback_impl(domain, op);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
ROCTRACER_API roctracer_status_t roctracer_disable_domain_callback(roctracer_domain_t domain) {
|
||||
API_METHOD_PREFIX
|
||||
const uint32_t op_end = get_op_end(domain);
|
||||
for (uint32_t op = get_op_begin(domain); op < op_end; ++op)
|
||||
roctracer_disable_callback_impl(domain, op);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
// Return default pool and set new one if parameter pool is not NULL.
|
||||
ROCTRACER_API roctracer_pool_t* roctracer_default_pool_expl(roctracer_pool_t* pool) {
|
||||
std::lock_guard lock(memory_pool_mutex);
|
||||
roctracer_pool_t* p = reinterpret_cast<roctracer_pool_t*>(default_memory_pool);
|
||||
if (pool != nullptr) default_memory_pool = reinterpret_cast<MemoryPool*>(pool);
|
||||
return p;
|
||||
}
|
||||
|
||||
ROCTRACER_API roctracer_pool_t* roctracer_default_pool() {
|
||||
std::lock_guard lock(memory_pool_mutex);
|
||||
return reinterpret_cast<roctracer_pool_t*>(default_memory_pool);
|
||||
}
|
||||
|
||||
// Open memory pool
|
||||
static void roctracer_open_pool_impl(const roctracer_properties_t* properties,
|
||||
roctracer_pool_t** pool) {
|
||||
std::lock_guard lock(memory_pool_mutex);
|
||||
if ((pool == nullptr) && (default_memory_pool != nullptr)) {
|
||||
EXC_RAISING(ROCTRACER_STATUS_ERROR_DEFAULT_POOL_ALREADY_DEFINED, "default pool already set");
|
||||
}
|
||||
MemoryPool* p = new MemoryPool(*properties);
|
||||
if (p == nullptr) EXC_RAISING(ROCTRACER_STATUS_ERROR_MEMORY_ALLOCATION, "MemoryPool() error");
|
||||
if (pool != nullptr)
|
||||
*pool = p;
|
||||
else
|
||||
default_memory_pool = p;
|
||||
}
|
||||
|
||||
ROCTRACER_API roctracer_status_t roctracer_open_pool_expl(const roctracer_properties_t* properties,
|
||||
roctracer_pool_t** pool) {
|
||||
API_METHOD_PREFIX
|
||||
roctracer_open_pool_impl(properties, pool);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
ROCTRACER_API roctracer_status_t roctracer_open_pool(const roctracer_properties_t* properties) {
|
||||
API_METHOD_PREFIX
|
||||
roctracer_open_pool_impl(properties, nullptr);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
ROCTRACER_API roctracer_status_t roctracer_next_record(const activity_record_t* record,
|
||||
const activity_record_t** next) {
|
||||
API_METHOD_PREFIX
|
||||
*next = record + 1;
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
// Enable activity records logging
|
||||
static void roctracer_enable_activity_impl(roctracer_domain_t domain, uint32_t op,
|
||||
roctracer_pool_t* pool) {
|
||||
std::lock_guard lock(registration_mutex);
|
||||
|
||||
MemoryPool* memory_pool = reinterpret_cast<MemoryPool*>(pool);
|
||||
if (memory_pool == nullptr) memory_pool = default_memory_pool;
|
||||
if (memory_pool == nullptr)
|
||||
EXC_RAISING(ROCTRACER_STATUS_ERROR_DEFAULT_POOL_UNDEFINED, "no default pool");
|
||||
|
||||
if (op >= get_op_end(domain))
|
||||
throw ApiError(ROCTRACER_STATUS_ERROR_INVALID_ARGUMENT, "invalid argument");
|
||||
|
||||
switch (domain) {
|
||||
case ACTIVITY_DOMAIN_HSA_EVT:
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HSA_API:
|
||||
HSA_registration_group.Register(HSA_ApiTracer::activity_table, op, memory_pool);
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HSA_OPS:
|
||||
HSA_registration_group.Register(hsa_ops_activity_table, op, memory_pool);
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HIP_API:
|
||||
if (HipLoader::Instance().IsEnabled())
|
||||
HIP_registration_group.Register(HIP_ApiTracer::activity_table, op, memory_pool);
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HIP_OPS:
|
||||
if (HipLoader::Instance().IsEnabled())
|
||||
HIP_registration_group.Register(hip_ops_activity_table, op, memory_pool);
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_ROCTX:
|
||||
break;
|
||||
default:
|
||||
EXC_RAISING(ROCTRACER_STATUS_ERROR_INVALID_DOMAIN_ID, "invalid domain ID(" << domain << ")");
|
||||
}
|
||||
}
|
||||
|
||||
ROCTRACER_API roctracer_status_t roctracer_enable_op_activity_expl(roctracer_domain_t domain,
|
||||
uint32_t op,
|
||||
roctracer_pool_t* pool) {
|
||||
API_METHOD_PREFIX
|
||||
roctracer_enable_activity_impl(domain, op, pool);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
ROCTRACER_API roctracer_status_t roctracer_enable_op_activity(activity_domain_t domain,
|
||||
uint32_t op) {
|
||||
API_METHOD_PREFIX
|
||||
roctracer_enable_activity_impl(domain, op, nullptr);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
static void roctracer_enable_domain_activity_impl(roctracer_domain_t domain,
|
||||
roctracer_pool_t* pool) {
|
||||
const uint32_t op_end = get_op_end(domain);
|
||||
for (uint32_t op = get_op_begin(domain); op < op_end; ++op) try {
|
||||
roctracer_enable_activity_impl(domain, op, pool);
|
||||
} catch (const ApiError& err) {
|
||||
if (err.status() != ROCTRACER_STATUS_ERROR_NOT_IMPLEMENTED) throw;
|
||||
}
|
||||
}
|
||||
|
||||
ROCTRACER_API roctracer_status_t roctracer_enable_domain_activity_expl(roctracer_domain_t domain,
|
||||
roctracer_pool_t* pool) {
|
||||
API_METHOD_PREFIX
|
||||
roctracer_enable_domain_activity_impl(domain, pool);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
ROCTRACER_API roctracer_status_t roctracer_enable_domain_activity(activity_domain_t domain) {
|
||||
API_METHOD_PREFIX
|
||||
roctracer_enable_domain_activity_impl(domain, nullptr);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
// Disable activity records logging
|
||||
static void roctracer_disable_activity_impl(roctracer_domain_t domain, uint32_t op) {
|
||||
std::lock_guard lock(registration_mutex);
|
||||
|
||||
if (op >= get_op_end(domain))
|
||||
throw ApiError(ROCTRACER_STATUS_ERROR_INVALID_ARGUMENT, "invalid argument");
|
||||
|
||||
switch (domain) {
|
||||
case ACTIVITY_DOMAIN_HSA_EVT:
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HSA_API:
|
||||
HSA_registration_group.Unregister(HSA_ApiTracer::activity_table, op);
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HSA_OPS:
|
||||
HSA_registration_group.Unregister(hsa_ops_activity_table, op);
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HIP_API:
|
||||
if (HipLoader::Instance().IsEnabled())
|
||||
HIP_registration_group.Unregister(HIP_ApiTracer::activity_table, op);
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HIP_OPS:
|
||||
if (HipLoader::Instance().IsEnabled())
|
||||
HIP_registration_group.Unregister(hip_ops_activity_table, op);
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_ROCTX:
|
||||
break;
|
||||
default:
|
||||
EXC_RAISING(ROCTRACER_STATUS_ERROR_INVALID_DOMAIN_ID, "invalid domain ID(" << domain << ")");
|
||||
}
|
||||
}
|
||||
|
||||
ROCTRACER_API roctracer_status_t roctracer_disable_op_activity(roctracer_domain_t domain,
|
||||
uint32_t op) {
|
||||
API_METHOD_PREFIX
|
||||
roctracer_disable_activity_impl(domain, op);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
static void roctracer_disable_domain_activity_impl(roctracer_domain_t domain) {
|
||||
const uint32_t op_end = get_op_end(domain);
|
||||
for (uint32_t op = get_op_begin(domain); op < op_end; ++op) try {
|
||||
roctracer_disable_activity_impl(domain, op);
|
||||
} catch (const ApiError& err) {
|
||||
if (err.status() != ROCTRACER_STATUS_ERROR_NOT_IMPLEMENTED) throw;
|
||||
}
|
||||
}
|
||||
|
||||
ROCTRACER_API roctracer_status_t roctracer_disable_domain_activity(roctracer_domain_t domain) {
|
||||
API_METHOD_PREFIX
|
||||
roctracer_disable_domain_activity_impl(domain);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
// Close memory pool
|
||||
static void roctracer_close_pool_impl(roctracer_pool_t* pool) {
|
||||
std::lock_guard lock(memory_pool_mutex);
|
||||
if (pool == nullptr) pool = reinterpret_cast<roctracer_pool_t*>(default_memory_pool);
|
||||
if (pool == nullptr) return;
|
||||
MemoryPool* p = reinterpret_cast<MemoryPool*>(pool);
|
||||
if (p == default_memory_pool) default_memory_pool = nullptr;
|
||||
|
||||
#if 0
|
||||
// Disable any activities that specify the pool being deleted.
|
||||
std::vector<std::pair<roctracer_domain_t, uint32_t>> ops;
|
||||
act_journal.ForEach(
|
||||
[&ops, pool](roctracer_domain_t domain, uint32_t op, const ActivityJournalData& data) {
|
||||
if (pool == data.pool) ops.emplace_back(domain, op);
|
||||
return true;
|
||||
});
|
||||
for (auto&& [domain, op] : ops) roctracer_disable_activity_impl(domain, op);
|
||||
#endif
|
||||
|
||||
delete (p);
|
||||
}
|
||||
|
||||
ROCTRACER_API roctracer_status_t roctracer_close_pool_expl(roctracer_pool_t* pool) {
|
||||
API_METHOD_PREFIX
|
||||
roctracer_close_pool_impl(pool);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
ROCTRACER_API roctracer_status_t roctracer_close_pool() {
|
||||
API_METHOD_PREFIX
|
||||
roctracer_close_pool_impl(NULL);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
// Flush available activity records
|
||||
static void roctracer_flush_activity_impl(roctracer_pool_t* pool) {
|
||||
if (pool == nullptr) pool = roctracer_default_pool();
|
||||
MemoryPool* default_memory_pool = reinterpret_cast<MemoryPool*>(pool);
|
||||
if (default_memory_pool != nullptr) default_memory_pool->Flush();
|
||||
}
|
||||
|
||||
ROCTRACER_API roctracer_status_t roctracer_flush_activity_expl(roctracer_pool_t* pool) {
|
||||
API_METHOD_PREFIX
|
||||
roctracer_flush_activity_impl(pool);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
ROCTRACER_API roctracer_status_t roctracer_flush_activity() {
|
||||
API_METHOD_PREFIX
|
||||
roctracer_flush_activity_impl(nullptr);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
// Notifies that the calling thread is entering an external API region.
|
||||
// Push an external correlation id for the calling thread.
|
||||
ROCTRACER_API roctracer_status_t
|
||||
roctracer_activity_push_external_correlation_id(activity_correlation_id_t id) {
|
||||
API_METHOD_PREFIX
|
||||
ExternalCorrelationIdPush(id);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
// Notifies that the calling thread is leaving an external API region.
|
||||
// Pop an external correlation id for the calling thread, and return it in 'last_id' if not null.
|
||||
ROCTRACER_API roctracer_status_t
|
||||
roctracer_activity_pop_external_correlation_id(activity_correlation_id_t* last_id) {
|
||||
API_METHOD_PREFIX
|
||||
|
||||
auto external_id = ExternalCorrelationIdPop();
|
||||
if (!external_id) {
|
||||
if (last_id != nullptr) *last_id = 0;
|
||||
EXC_RAISING(ROCTRACER_STATUS_ERROR_MISMATCHED_EXTERNAL_CORRELATION_ID,
|
||||
"unbalanced external correlation id pop");
|
||||
}
|
||||
|
||||
if (last_id != nullptr) *last_id = *external_id;
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
// Start API
|
||||
ROCTRACER_API void roctracer_start() {
|
||||
if (stopped_status.exchange(false, std::memory_order_relaxed) && roctracer_start_cb)
|
||||
roctracer_start_cb();
|
||||
}
|
||||
|
||||
// Stop API
|
||||
ROCTRACER_API void roctracer_stop() {
|
||||
if (!stopped_status.exchange(true, std::memory_order_relaxed) && roctracer_stop_cb)
|
||||
roctracer_stop_cb();
|
||||
}
|
||||
|
||||
ROCTRACER_API roctracer_status_t roctracer_get_timestamp(roctracer_timestamp_t* timestamp) {
|
||||
API_METHOD_PREFIX
|
||||
*timestamp = hsa_support::timestamp_ns();
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
// Set properties
|
||||
ROCTRACER_API roctracer_status_t roctracer_set_properties(roctracer_domain_t domain,
|
||||
void* properties) {
|
||||
API_METHOD_PREFIX
|
||||
switch (domain) {
|
||||
case ACTIVITY_DOMAIN_HSA_OPS:
|
||||
case ACTIVITY_DOMAIN_HSA_EVT:
|
||||
case ACTIVITY_DOMAIN_HSA_API:
|
||||
case ACTIVITY_DOMAIN_HIP_OPS:
|
||||
case ACTIVITY_DOMAIN_HIP_API: {
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_EXT_API: {
|
||||
roctracer_ext_properties_t* ops_properties =
|
||||
reinterpret_cast<roctracer_ext_properties_t*>(properties);
|
||||
roctracer_start_cb = ops_properties->start_cb;
|
||||
roctracer_stop_cb = ops_properties->stop_cb;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
EXC_RAISING(ROCTRACER_STATUS_ERROR_INVALID_DOMAIN_ID, "invalid domain ID(" << domain << ")");
|
||||
}
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
|
||||
// The HSA_AMD_TOOL_PRIORITY variable must be a constant value type initialized by the loader
|
||||
// itself, not by code during _init. 'extern const' seems to do that although that is not a
|
||||
// guarantee.
|
||||
ROCTRACER_EXPORT extern const uint32_t HSA_AMD_TOOL_PRIORITY = 50;
|
||||
|
||||
// HSA-runtime tool on-load method
|
||||
ROCTRACER_EXPORT bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
|
||||
uint64_t failed_tool_count, const char* const* failed_tool_names) {
|
||||
[](auto&&...) {}(runtime_version, failed_tool_count, failed_tool_names);
|
||||
hsa_support::Initialize(table);
|
||||
return true;
|
||||
}
|
||||
|
||||
ROCTRACER_EXPORT void OnUnload() { hsa_support::Finalize(); }
|
||||
|
||||
} // extern "C"
|
||||
@@ -0,0 +1,11 @@
|
||||
ROCTX_4.1 {
|
||||
global: roctxMarkA;
|
||||
roctxRangePop;
|
||||
roctxRangePushA;
|
||||
roctxRangeStartA;
|
||||
roctxRangeStop;
|
||||
roctxRegisterTracerCallback;
|
||||
roctx_version_major;
|
||||
roctx_version_minor;
|
||||
local: *;
|
||||
};
|
||||
@@ -0,0 +1,94 @@
|
||||
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "roctx.h"
|
||||
#include "roctracer_roctx.h"
|
||||
#include "ext/prof_protocol.h"
|
||||
|
||||
#include <atomic>
|
||||
#include <cassert>
|
||||
|
||||
namespace {
|
||||
|
||||
std::atomic<int (*)(activity_domain_t domain, uint32_t operation_id, void* data)> report_activity;
|
||||
thread_local int nested_range_level{0};
|
||||
|
||||
void ReportActivity(roctx_api_id_t operation_id, const char* message = nullptr,
|
||||
roctx_range_id_t id = {}) {
|
||||
auto function = report_activity.load(std::memory_order_relaxed);
|
||||
if (!function) return;
|
||||
|
||||
roctx_api_data_t api_data{};
|
||||
switch (operation_id) {
|
||||
case ROCTX_API_ID_roctxMarkA:
|
||||
api_data.args.roctxMarkA.message = message;
|
||||
break;
|
||||
case ROCTX_API_ID_roctxRangePushA:
|
||||
api_data.args.roctxRangePushA.message = message;
|
||||
break;
|
||||
case ROCTX_API_ID_roctxRangePop:
|
||||
break;
|
||||
case ROCTX_API_ID_roctxRangeStartA:
|
||||
api_data.args.roctxRangeStartA.message = message;
|
||||
api_data.args.roctxRangeStartA.id = id;
|
||||
break;
|
||||
case ROCTX_API_ID_roctxRangeStop:
|
||||
api_data.args.roctxRangeStop.id = id;
|
||||
break;
|
||||
default:
|
||||
assert(!"should not reach here");
|
||||
}
|
||||
function(ACTIVITY_DOMAIN_ROCTX, operation_id, &api_data);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
ROCTX_API uint32_t roctx_version_major() { return ROCTX_VERSION_MAJOR; }
|
||||
ROCTX_API uint32_t roctx_version_minor() { return ROCTX_VERSION_MINOR; }
|
||||
|
||||
ROCTX_API void roctxMarkA(const char* message) { ReportActivity(ROCTX_API_ID_roctxMarkA, message); }
|
||||
|
||||
ROCTX_API int roctxRangePushA(const char* message) {
|
||||
ReportActivity(ROCTX_API_ID_roctxRangePushA, message);
|
||||
return nested_range_level++;
|
||||
}
|
||||
|
||||
ROCTX_API int roctxRangePop() {
|
||||
ReportActivity(ROCTX_API_ID_roctxRangePop);
|
||||
if (nested_range_level == 0) return -1;
|
||||
return --nested_range_level;
|
||||
}
|
||||
|
||||
ROCTX_API roctx_range_id_t roctxRangeStartA(const char* message) {
|
||||
static std::atomic<roctx_range_id_t> start_stop_range_id(1);
|
||||
auto range_id = start_stop_range_id++;
|
||||
ReportActivity(ROCTX_API_ID_roctxRangeStartA, message, range_id);
|
||||
return range_id;
|
||||
}
|
||||
|
||||
ROCTX_API void roctxRangeStop(roctx_range_id_t range_id) {
|
||||
ReportActivity(ROCTX_API_ID_roctxRangeStop, nullptr, range_id);
|
||||
}
|
||||
|
||||
extern "C" ROCTX_EXPORT void roctxRegisterTracerCallback(int (*function)(activity_domain_t domain,
|
||||
uint32_t operation_id,
|
||||
void* data)) {
|
||||
report_activity.store(function, std::memory_order_relaxed);
|
||||
}
|
||||
@@ -0,0 +1 @@
|
||||
{ global: HSA_AMD_TOOL_PRIORITY; OnLoad; OnUnload; local: *; };
|
||||
@@ -0,0 +1,288 @@
|
||||
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef TOOL_TRACE_BUFFER_H_
|
||||
#define TOOL_TRACE_BUFFER_H_
|
||||
|
||||
#include <atomic>
|
||||
#include <cassert>
|
||||
#include <condition_variable>
|
||||
#include <functional>
|
||||
#include <future>
|
||||
#include <iostream>
|
||||
#include <list>
|
||||
#include <mutex>
|
||||
#include <optional>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
|
||||
namespace roctracer {
|
||||
|
||||
class TraceBufferBase {
|
||||
public:
|
||||
static void FlushAll() {
|
||||
std::lock_guard lock(mutex_);
|
||||
|
||||
for (auto* trace_buffer = head_; trace_buffer != nullptr; trace_buffer = trace_buffer->next_)
|
||||
trace_buffer->Flush();
|
||||
}
|
||||
|
||||
static void Register(TraceBufferBase* elem) {
|
||||
std::lock_guard lock(mutex_);
|
||||
|
||||
auto** prev_ptr = &head_;
|
||||
while (*prev_ptr != nullptr && elem->priority_ > (*prev_ptr)->priority_)
|
||||
prev_ptr = &(*prev_ptr)->next_;
|
||||
|
||||
elem->next_ = *prev_ptr;
|
||||
*prev_ptr = elem;
|
||||
}
|
||||
|
||||
static void Unregister(TraceBufferBase* elem) {
|
||||
std::lock_guard lock(mutex_);
|
||||
|
||||
auto** prev_ptr = &head_;
|
||||
while (*prev_ptr != nullptr && *prev_ptr != elem) prev_ptr = &(*prev_ptr)->next_;
|
||||
|
||||
assert(*prev_ptr != nullptr && "elem is not in the list");
|
||||
*prev_ptr = elem->next_;
|
||||
}
|
||||
|
||||
TraceBufferBase(std::string name, int priority)
|
||||
: name_(std::move(name)), priority_(priority), next_(nullptr) {}
|
||||
|
||||
TraceBufferBase(const TraceBufferBase&) = delete;
|
||||
TraceBufferBase& operator=(const TraceBufferBase&) = delete;
|
||||
|
||||
virtual ~TraceBufferBase() { Unregister(this); }
|
||||
|
||||
virtual void Flush() = 0;
|
||||
|
||||
std::string name() && { return std::move(name_); }
|
||||
const std::string& name() const& { return name_; }
|
||||
|
||||
private:
|
||||
const std::string name_;
|
||||
const int priority_;
|
||||
TraceBufferBase* next_;
|
||||
|
||||
static TraceBufferBase* head_;
|
||||
static std::mutex mutex_;
|
||||
};
|
||||
|
||||
enum TraceEntryState { TRACE_ENTRY_INVALID = 0, TRACE_ENTRY_INIT = 1, TRACE_ENTRY_COMPLETE = 2 };
|
||||
|
||||
template <typename Entry, typename Allocator = std::allocator<Entry>>
|
||||
class TraceBuffer : protected TraceBufferBase {
|
||||
public:
|
||||
using callback_t = std::function<void(Entry*)>;
|
||||
|
||||
TraceBuffer(std::string name, uint64_t size, callback_t flush_callback, int priority = 0)
|
||||
: TraceBufferBase(std::move(name), priority),
|
||||
flush_callback_(std::move(flush_callback)),
|
||||
size_(size) {
|
||||
assert(size_ != 0 && "cannot create an empty trace buffer");
|
||||
|
||||
Entry* write_buffer = allocator_.allocate(size_);
|
||||
assert(write_buffer != nullptr);
|
||||
buffer_list_.push_back(write_buffer);
|
||||
|
||||
read_index_ = 0;
|
||||
write_index_ = {0, write_buffer};
|
||||
|
||||
AllocateFreeBuffer();
|
||||
|
||||
// Add this instance to the link list of all trace buffers in the process.
|
||||
Register(this);
|
||||
}
|
||||
|
||||
~TraceBuffer() override {
|
||||
// Flush the remaining records. After flushing, there should not be any records left in the
|
||||
// trace buffer.
|
||||
Flush();
|
||||
assert(read_index_ == write_index_.load().index);
|
||||
|
||||
// Acquire both the writer and worker lock as we are accessing shared variables they protect.
|
||||
std::unique_lock writer_lock(write_mutex_, std::defer_lock);
|
||||
std::unique_lock worker_lock(worker_mutex_, std::defer_lock);
|
||||
std::lock(writer_lock, worker_lock);
|
||||
|
||||
// Deallocate the buffers.
|
||||
allocator_.deallocate(write_index_.load().buffer, size_);
|
||||
allocator_.deallocate(free_buffer_, size_);
|
||||
|
||||
// Stop the worker thread. The worker thread loop checks the 'worker_thread_' std::optional
|
||||
// after waking up, and exits if it does not have a value.
|
||||
if (worker_thread_) {
|
||||
std::thread worker_thread = std::move(worker_thread_.value());
|
||||
{
|
||||
// Tell the worker thread loop to exit.
|
||||
worker_thread_.reset();
|
||||
free_buffer_ = nullptr;
|
||||
worker_cond_.notify_one();
|
||||
}
|
||||
// Release the worker lock to allow the worker thread to exit.
|
||||
worker_lock.unlock();
|
||||
worker_thread.join();
|
||||
}
|
||||
}
|
||||
|
||||
// Flush all entries between read_pointer and write_pointer. read_pointer and write_pointer are
|
||||
// monotonically increasing indices, with read_pointer % size always indexing inside the first
|
||||
// buffer in the list. Stop flushing if an incomplete entry is found, it will be flushed with
|
||||
// the next invocation after changing its state to 'complete'.
|
||||
void Flush() override {
|
||||
std::lock_guard lock(write_mutex_);
|
||||
auto write_index = write_index_.load(std::memory_order_relaxed);
|
||||
|
||||
for (auto it = buffer_list_.begin(); it != buffer_list_.end();) {
|
||||
auto end_of_buffer = read_index_ - read_index_ % size_ + size_;
|
||||
|
||||
while (read_index_ < std::min(write_index.index, end_of_buffer)) {
|
||||
Entry* entry = &(*it)[read_index_ % size_];
|
||||
|
||||
// The entry is not yet complete, stop flushing here.
|
||||
if (entry->valid.load(std::memory_order_acquire) != TRACE_ENTRY_COMPLETE) return;
|
||||
|
||||
flush_callback_(entry);
|
||||
entry->~Entry();
|
||||
|
||||
++read_index_;
|
||||
}
|
||||
|
||||
// The buffer is still in use or the read pointer did not reach the end of the buffer.
|
||||
if (*it == write_index.buffer || read_index_ != end_of_buffer) return;
|
||||
|
||||
// All entries in the current buffer are now processed. Destroy the buffer and move onto the
|
||||
// next buffer in the list.
|
||||
allocator_.deallocate(*it, size_);
|
||||
it = buffer_list_.erase(it);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename... Args> Entry& Emplace(Args... args) {
|
||||
return *new (GetEntry()) Entry(std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
private:
|
||||
Entry* GetEntry() {
|
||||
auto current = write_index_.load(std::memory_order_relaxed);
|
||||
|
||||
while (true) {
|
||||
// If the pointer is at the end of the current buffer, switch to the available free buffer and
|
||||
// notify the worker thread to allocate a new buffer.
|
||||
if (current.index != 0 && current.index % size_ == 0) {
|
||||
std::lock_guard lock(write_mutex_);
|
||||
|
||||
// If the worker thread wasn't already started, start it now. This avoids starting a new
|
||||
// thread when the trace buffer is created.
|
||||
if (!worker_thread_) {
|
||||
std::promise<void> ready;
|
||||
auto future = ready.get_future();
|
||||
{
|
||||
std::lock_guard worker_lock(worker_mutex_);
|
||||
worker_thread_.emplace(&TraceBuffer::WorkerThreadLoop, this, std::move(ready));
|
||||
}
|
||||
future.wait();
|
||||
}
|
||||
|
||||
// Re-check the pointer overflow under the writer lock, another thread could have beaten us
|
||||
// to it and already bumped the write_index_.
|
||||
current = write_index_.load(std::memory_order_relaxed);
|
||||
if (current.index % size_ == 0) {
|
||||
std::unique_lock worker_lock(worker_mutex_);
|
||||
|
||||
// Wait for the free buffer to become available.
|
||||
worker_cond_.wait(worker_lock, [this]() { return free_buffer_ != nullptr; });
|
||||
|
||||
current.buffer = free_buffer_;
|
||||
buffer_list_.push_back(current.buffer);
|
||||
write_index_.store({current.index + 1, current.buffer}, std::memory_order_relaxed);
|
||||
|
||||
// Tell the worker thread to allocate a new free buffer.
|
||||
free_buffer_ = nullptr;
|
||||
worker_cond_.notify_one();
|
||||
|
||||
// We successfully allocated a new buffer, return the first element.
|
||||
return ¤t.buffer[0];
|
||||
}
|
||||
}
|
||||
|
||||
if (write_index_.compare_exchange_weak(current, {current.index + 1, current.buffer},
|
||||
std::memory_order_relaxed))
|
||||
return ¤t.buffer[current.index % size_];
|
||||
}
|
||||
}
|
||||
|
||||
void AllocateFreeBuffer() {
|
||||
assert(free_buffer_ == nullptr);
|
||||
|
||||
free_buffer_ = allocator_.allocate(size_);
|
||||
assert(free_buffer_ != nullptr);
|
||||
|
||||
for (size_t i = 0; i < size_; ++i)
|
||||
free_buffer_[i].valid.store(TRACE_ENTRY_INVALID, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
void WorkerThreadLoop(std::promise<void> ready) {
|
||||
std::unique_lock lock(worker_mutex_);
|
||||
|
||||
// This worker thread is now ready to accept work.
|
||||
ready.set_value();
|
||||
|
||||
while (true) {
|
||||
worker_cond_.wait(lock, [this]() { return free_buffer_ == nullptr; });
|
||||
if (!worker_thread_) break;
|
||||
AllocateFreeBuffer();
|
||||
worker_cond_.notify_one();
|
||||
}
|
||||
}
|
||||
|
||||
// The WriteIndex is used to store both the index and the buffer associated with that index (the
|
||||
// buffer contains the trace buffer records at [index - index % size, index - index % size_t +
|
||||
// size_ - 1]) in a single atomic variable.
|
||||
struct WriteIndex {
|
||||
uint64_t index;
|
||||
Entry* buffer;
|
||||
};
|
||||
|
||||
const callback_t flush_callback_;
|
||||
const uint64_t size_;
|
||||
|
||||
uint64_t read_index_; // The index of the next record to flush.
|
||||
std::atomic<WriteIndex> write_index_; // The index of the next record that could be written.
|
||||
Entry* free_buffer_{nullptr}; // The next available free buffer.
|
||||
|
||||
std::optional<std::thread> worker_thread_;
|
||||
std::mutex worker_mutex_;
|
||||
std::condition_variable worker_cond_;
|
||||
|
||||
std::mutex write_mutex_;
|
||||
std::list<Entry*> buffer_list_;
|
||||
Allocator allocator_;
|
||||
};
|
||||
} // namespace roctracer
|
||||
|
||||
#define TRACE_BUFFER_INSTANTIATE() \
|
||||
roctracer::TraceBufferBase* roctracer::TraceBufferBase::head_ = nullptr; \
|
||||
std::mutex roctracer::TraceBufferBase::mutex_;
|
||||
|
||||
#endif // TOOL_TRACE_BUFFER_H_
|
||||
@@ -0,0 +1,794 @@
|
||||
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include <roctracer_ext.h>
|
||||
#include <roctracer_hip.h>
|
||||
#include <roctracer_hsa.h>
|
||||
#include <roctracer_plugin.h>
|
||||
#include <roctracer_roctx.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <cassert>
|
||||
#include <chrono>
|
||||
#include <experimental/filesystem>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include <stack>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <variant>
|
||||
|
||||
#include <cxxabi.h> /* kernel name demangling */
|
||||
#include <dirent.h>
|
||||
#include <dlfcn.h>
|
||||
#include <hsa/hsa_api_trace.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <sys/syscall.h> /* SYS_xxx definitions */
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h> /* usleep */
|
||||
|
||||
#include "debug.h"
|
||||
#include "loader.h"
|
||||
#include "trace_buffer.h"
|
||||
#include "xml.h"
|
||||
|
||||
void initialize() __attribute__((constructor(101)));
|
||||
|
||||
namespace fs = std::experimental::filesystem;
|
||||
|
||||
// Macro to check ROC-tracer calls status
|
||||
#define CHECK_ROCTRACER(call) \
|
||||
do { \
|
||||
if ((call) != ROCTRACER_STATUS_SUCCESS) { \
|
||||
fatal(#call " failed: %s", roctracer_error_string()); \
|
||||
} \
|
||||
} while (false)
|
||||
|
||||
TRACE_BUFFER_INSTANTIATE();
|
||||
|
||||
namespace {
|
||||
|
||||
inline roctracer_timestamp_t timestamp_ns() {
|
||||
roctracer_timestamp_t timestamp;
|
||||
CHECK_ROCTRACER(roctracer_get_timestamp(×tamp));
|
||||
return timestamp;
|
||||
}
|
||||
|
||||
std::vector<std::string> hsa_api_vec;
|
||||
std::vector<std::string> hip_api_vec;
|
||||
|
||||
bool trace_roctx = false;
|
||||
bool trace_hsa_api = false;
|
||||
bool trace_hsa_activity = false;
|
||||
bool trace_hip_api = false;
|
||||
bool trace_hip_activity = false;
|
||||
bool trace_pcs = false;
|
||||
|
||||
uint32_t GetPid() {
|
||||
static uint32_t pid = syscall(__NR_getpid);
|
||||
return pid;
|
||||
}
|
||||
uint32_t GetTid() {
|
||||
static thread_local uint32_t tid = syscall(__NR_gettid);
|
||||
return tid;
|
||||
}
|
||||
|
||||
size_t GetBufferSize() {
|
||||
auto bufSize = getenv("ROCTRACER_BUFFER_SIZE");
|
||||
// Default size if not set
|
||||
if (!bufSize) return 0x200000;
|
||||
return std::stoll({bufSize});
|
||||
}
|
||||
|
||||
// Tracing control thread
|
||||
uint32_t control_delay_us = 0;
|
||||
uint32_t control_len_us = 0;
|
||||
uint32_t control_dist_us = 0;
|
||||
std::thread* trace_period_thread = nullptr;
|
||||
std::atomic_bool trace_period_stop = false;
|
||||
void trace_period_fun() {
|
||||
std::this_thread::sleep_for(std::chrono::microseconds(control_delay_us));
|
||||
do {
|
||||
roctracer_start();
|
||||
if (trace_period_stop) {
|
||||
roctracer_stop();
|
||||
break;
|
||||
}
|
||||
std::this_thread::sleep_for(std::chrono::microseconds(control_len_us));
|
||||
roctracer_stop();
|
||||
if (trace_period_stop) break;
|
||||
std::this_thread::sleep_for(std::chrono::microseconds(control_dist_us));
|
||||
} while (!trace_period_stop);
|
||||
}
|
||||
|
||||
// Flushing control thread
|
||||
uint32_t control_flush_us = 0;
|
||||
std::thread* flush_thread = nullptr;
|
||||
std::atomic_bool stop_flush_thread = false;
|
||||
|
||||
|
||||
void flush_thr_fun() {
|
||||
while (!stop_flush_thread) {
|
||||
CHECK_ROCTRACER(roctracer_flush_activity());
|
||||
roctracer::TraceBufferBase::FlushAll();
|
||||
std::this_thread::sleep_until(std::chrono::steady_clock::now() +
|
||||
std::chrono::microseconds(control_flush_us));
|
||||
}
|
||||
}
|
||||
|
||||
class roctracer_plugin_t {
|
||||
public:
|
||||
roctracer_plugin_t(const std::string& plugin_path) {
|
||||
plugin_handle_ = dlopen(plugin_path.c_str(), RTLD_LAZY);
|
||||
if (plugin_handle_ == nullptr) {
|
||||
warning("dlopen(\"%s\") failed: %s", plugin_path.c_str(), dlerror());
|
||||
return;
|
||||
}
|
||||
|
||||
roctracer_plugin_write_callback_record_ =
|
||||
reinterpret_cast<decltype(roctracer_plugin_write_callback_record)*>(
|
||||
dlsym(plugin_handle_, "roctracer_plugin_write_callback_record"));
|
||||
if (!roctracer_plugin_write_callback_record_) return;
|
||||
|
||||
roctracer_plugin_write_activity_records_ =
|
||||
reinterpret_cast<decltype(roctracer_plugin_write_activity_records)*>(
|
||||
dlsym(plugin_handle_, "roctracer_plugin_write_activity_records"));
|
||||
if (!roctracer_plugin_write_activity_records_) return;
|
||||
|
||||
roctracer_plugin_finalize_ = reinterpret_cast<decltype(roctracer_plugin_finalize)*>(
|
||||
dlsym(plugin_handle_, "roctracer_plugin_finalize"));
|
||||
if (!roctracer_plugin_finalize_) return;
|
||||
|
||||
if (auto* initialize = reinterpret_cast<decltype(roctracer_plugin_initialize)*>(
|
||||
dlsym(plugin_handle_, "roctracer_plugin_initialize"));
|
||||
initialize != nullptr)
|
||||
valid_ = initialize(ROCTRACER_VERSION_MAJOR, ROCTRACER_VERSION_MINOR) == 0;
|
||||
}
|
||||
|
||||
~roctracer_plugin_t() {
|
||||
if (is_valid()) roctracer_plugin_finalize_();
|
||||
if (plugin_handle_ != nullptr) dlclose(plugin_handle_);
|
||||
}
|
||||
|
||||
bool is_valid() const { return valid_; }
|
||||
|
||||
template <typename... Args> auto write_callback_record(Args... args) const {
|
||||
assert(is_valid());
|
||||
return roctracer_plugin_write_callback_record_(std::forward<Args>(args)...);
|
||||
}
|
||||
template <typename... Args> auto write_activity_records(Args... args) const {
|
||||
assert(is_valid());
|
||||
return roctracer_plugin_write_activity_records_(std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
private:
|
||||
bool valid_{false};
|
||||
void* plugin_handle_;
|
||||
|
||||
decltype(roctracer_plugin_finalize)* roctracer_plugin_finalize_;
|
||||
decltype(roctracer_plugin_write_callback_record)* roctracer_plugin_write_callback_record_;
|
||||
decltype(roctracer_plugin_write_activity_records)* roctracer_plugin_write_activity_records_;
|
||||
};
|
||||
|
||||
std::optional<roctracer_plugin_t> plugin;
|
||||
|
||||
} // namespace
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// rocTX annotation tracing
|
||||
|
||||
struct roctx_trace_entry_t {
|
||||
std::atomic<roctracer::TraceEntryState> valid;
|
||||
roctracer_record_t record;
|
||||
union {
|
||||
roctx_api_data_t data;
|
||||
};
|
||||
|
||||
roctx_trace_entry_t(uint32_t cid, roctracer_timestamp_t time, uint32_t pid, uint32_t tid,
|
||||
roctx_range_id_t rid, const char* message)
|
||||
: valid(roctracer::TRACE_ENTRY_INIT) {
|
||||
record.domain = ACTIVITY_DOMAIN_ROCTX;
|
||||
record.op = cid;
|
||||
record.kind = 0;
|
||||
record.begin_ns = time;
|
||||
record.end_ns = 0;
|
||||
record.process_id = pid;
|
||||
record.thread_id = tid;
|
||||
data.args.message = message != nullptr ? strdup(message) : nullptr;
|
||||
data.args.id = rid;
|
||||
}
|
||||
~roctx_trace_entry_t() {
|
||||
if (data.args.message != nullptr) free(const_cast<char*>(data.args.message));
|
||||
}
|
||||
};
|
||||
|
||||
roctracer::TraceBuffer<roctx_trace_entry_t> roctx_trace_buffer(
|
||||
"rocTX API", GetBufferSize(), [](roctx_trace_entry_t* entry) {
|
||||
assert(plugin && "plugin is not initialized");
|
||||
plugin->write_callback_record(&entry->record, &entry->data);
|
||||
});
|
||||
|
||||
// rocTX callback function
|
||||
void roctx_api_callback(uint32_t domain, uint32_t cid, const void* callback_data,
|
||||
void* /* user_arg */) {
|
||||
const roctx_api_data_t* data = reinterpret_cast<const roctx_api_data_t*>(callback_data);
|
||||
|
||||
roctx_trace_entry_t& entry = roctx_trace_buffer.Emplace(cid, timestamp_ns(), GetPid(), GetTid(),
|
||||
data->args.id, data->args.message);
|
||||
entry.valid.store(roctracer::TRACE_ENTRY_COMPLETE, std::memory_order_release);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// HSA API tracing
|
||||
|
||||
struct hsa_api_trace_entry_t {
|
||||
std::atomic<uint32_t> valid;
|
||||
roctracer_record_t record;
|
||||
union {
|
||||
hsa_api_data_t data;
|
||||
};
|
||||
|
||||
hsa_api_trace_entry_t(uint32_t cid, roctracer_timestamp_t begin, roctracer_timestamp_t end,
|
||||
uint32_t pid, uint32_t tid, const hsa_api_data_t& hsa_api_data)
|
||||
: valid(roctracer::TRACE_ENTRY_INIT) {
|
||||
record.domain = ACTIVITY_DOMAIN_HSA_API;
|
||||
record.op = cid;
|
||||
record.kind = 0;
|
||||
record.begin_ns = begin;
|
||||
record.end_ns = end;
|
||||
record.process_id = pid;
|
||||
record.thread_id = tid;
|
||||
data = hsa_api_data;
|
||||
}
|
||||
~hsa_api_trace_entry_t() {}
|
||||
};
|
||||
|
||||
roctracer::TraceBuffer<hsa_api_trace_entry_t> hsa_api_trace_buffer(
|
||||
"HSA API", GetBufferSize(), [](hsa_api_trace_entry_t* entry) {
|
||||
assert(plugin && "plugin is not initialized");
|
||||
plugin->write_callback_record(&entry->record, &entry->data);
|
||||
});
|
||||
|
||||
// HSA API callback function
|
||||
|
||||
void hsa_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg) {
|
||||
(void)arg;
|
||||
const hsa_api_data_t* data = reinterpret_cast<const hsa_api_data_t*>(callback_data);
|
||||
if (data->phase == ACTIVITY_API_PHASE_ENTER) {
|
||||
*data->phase_data = timestamp_ns();
|
||||
} else {
|
||||
const roctracer_timestamp_t begin_timestamp = *data->phase_data;
|
||||
const roctracer_timestamp_t end_timestamp =
|
||||
(cid == HSA_API_ID_hsa_shut_down) ? begin_timestamp : timestamp_ns();
|
||||
hsa_api_trace_entry_t& entry = hsa_api_trace_buffer.Emplace(cid, begin_timestamp, end_timestamp,
|
||||
GetPid(), GetTid(), *data);
|
||||
entry.valid.store(roctracer::TRACE_ENTRY_COMPLETE, std::memory_order_release);
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// HIP API tracing
|
||||
|
||||
struct hip_api_trace_entry_t {
|
||||
std::atomic<uint32_t> valid;
|
||||
roctracer_record_t record;
|
||||
union {
|
||||
hip_api_data_t data;
|
||||
};
|
||||
|
||||
hip_api_trace_entry_t(uint32_t cid, roctracer_timestamp_t begin, roctracer_timestamp_t end,
|
||||
uint32_t pid, uint32_t tid, const hip_api_data_t& hip_api_data,
|
||||
const char* name)
|
||||
: valid(roctracer::TRACE_ENTRY_INIT) {
|
||||
record.domain = ACTIVITY_DOMAIN_HIP_API;
|
||||
record.op = cid;
|
||||
record.kind = 0;
|
||||
record.begin_ns = begin;
|
||||
record.end_ns = end;
|
||||
record.process_id = pid;
|
||||
record.thread_id = tid;
|
||||
data = hip_api_data;
|
||||
record.kernel_name = name ? strdup(name) : nullptr;
|
||||
}
|
||||
|
||||
~hip_api_trace_entry_t() {
|
||||
if (record.kernel_name != nullptr) free(const_cast<char*>(record.kernel_name));
|
||||
}
|
||||
};
|
||||
|
||||
static std::string getKernelNameMultiKernelMultiDevice(hipLaunchParams* launchParamsList,
|
||||
int numDevices) {
|
||||
std::stringstream name_str;
|
||||
for (int i = 0; i < numDevices; ++i) {
|
||||
if (launchParamsList[i].func != nullptr) {
|
||||
name_str << roctracer::HipLoader::Instance().KernelNameRefByPtr(launchParamsList[i].func)
|
||||
<< ":"
|
||||
<< roctracer::HipLoader::Instance().GetStreamDeviceId(launchParamsList[i].stream)
|
||||
<< ";";
|
||||
}
|
||||
}
|
||||
return name_str.str();
|
||||
}
|
||||
|
||||
template <typename... Ts> struct Overloaded : Ts... {
|
||||
using Ts::operator()...;
|
||||
};
|
||||
template <class... Ts> Overloaded(Ts...) -> Overloaded<Ts...>;
|
||||
|
||||
|
||||
static std::optional<std::string> getKernelName(uint32_t cid, const hip_api_data_t* data) {
|
||||
std::variant<const void*, hipFunction_t> function;
|
||||
switch (cid) {
|
||||
case HIP_API_ID_hipExtLaunchMultiKernelMultiDevice: {
|
||||
return getKernelNameMultiKernelMultiDevice(
|
||||
data->args.hipExtLaunchMultiKernelMultiDevice.launchParamsList,
|
||||
data->args.hipExtLaunchMultiKernelMultiDevice.numDevices);
|
||||
}
|
||||
case HIP_API_ID_hipLaunchCooperativeKernelMultiDevice: {
|
||||
return getKernelNameMultiKernelMultiDevice(
|
||||
data->args.hipLaunchCooperativeKernelMultiDevice.launchParamsList,
|
||||
data->args.hipLaunchCooperativeKernelMultiDevice.numDevices);
|
||||
}
|
||||
case HIP_API_ID_hipLaunchKernel: {
|
||||
function = data->args.hipLaunchKernel.function_address;
|
||||
break;
|
||||
}
|
||||
case HIP_API_ID_hipExtLaunchKernel: {
|
||||
function = data->args.hipExtLaunchKernel.function_address;
|
||||
break;
|
||||
}
|
||||
case HIP_API_ID_hipLaunchCooperativeKernel: {
|
||||
function = data->args.hipLaunchCooperativeKernel.f;
|
||||
break;
|
||||
}
|
||||
case HIP_API_ID_hipLaunchByPtr: {
|
||||
function = data->args.hipLaunchByPtr.hostFunction;
|
||||
break;
|
||||
}
|
||||
case HIP_API_ID_hipGraphAddKernelNode: {
|
||||
function = data->args.hipGraphAddKernelNode.pNodeParams->func;
|
||||
break;
|
||||
}
|
||||
case HIP_API_ID_hipGraphExecKernelNodeSetParams: {
|
||||
function = data->args.hipGraphExecKernelNodeSetParams.pNodeParams->func;
|
||||
break;
|
||||
}
|
||||
case HIP_API_ID_hipGraphKernelNodeSetParams: {
|
||||
function = data->args.hipGraphKernelNodeSetParams.pNodeParams->func;
|
||||
break;
|
||||
}
|
||||
case HIP_API_ID_hipModuleLaunchKernel: {
|
||||
function = data->args.hipModuleLaunchKernel.f;
|
||||
break;
|
||||
}
|
||||
case HIP_API_ID_hipExtModuleLaunchKernel: {
|
||||
function = data->args.hipExtModuleLaunchKernel.f;
|
||||
break;
|
||||
}
|
||||
case HIP_API_ID_hipHccModuleLaunchKernel: {
|
||||
function = data->args.hipHccModuleLaunchKernel.f;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
return {};
|
||||
}
|
||||
return std::visit(
|
||||
Overloaded{
|
||||
[](const void* func) {
|
||||
return roctracer::HipLoader::Instance().KernelNameRefByPtr(func);
|
||||
},
|
||||
[](hipFunction_t func) { return roctracer::HipLoader::Instance().KernelNameRef(func); },
|
||||
},
|
||||
function);
|
||||
}
|
||||
|
||||
roctracer::TraceBuffer<hip_api_trace_entry_t> hip_api_trace_buffer(
|
||||
"HIP API", GetBufferSize(), [](hip_api_trace_entry_t* entry) {
|
||||
assert(plugin && "plugin is not initialized");
|
||||
plugin->write_callback_record(&entry->record, &entry->data);
|
||||
});
|
||||
|
||||
void hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg) {
|
||||
(void)arg;
|
||||
const hip_api_data_t* data = reinterpret_cast<const hip_api_data_t*>(callback_data);
|
||||
const roctracer_timestamp_t timestamp = timestamp_ns();
|
||||
std::optional<std::string> kernel_name;
|
||||
|
||||
if (data->phase == ACTIVITY_API_PHASE_ENTER) {
|
||||
*data->phase_data = timestamp;
|
||||
} else {
|
||||
// Post init of HIP APU args
|
||||
hipApiArgsInit((hip_api_id_t)cid, const_cast<hip_api_data_t*>(data));
|
||||
kernel_name = getKernelName(cid, data);
|
||||
hip_api_trace_entry_t& entry =
|
||||
hip_api_trace_buffer.Emplace(cid, *data->phase_data, timestamp, GetPid(), GetTid(), *data,
|
||||
kernel_name ? kernel_name->c_str() : nullptr);
|
||||
entry.valid.store(roctracer::TRACE_ENTRY_COMPLETE, std::memory_order_release);
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Input parser
|
||||
std::string normalize_token(const std::string& token, bool not_empty, const std::string& label) {
|
||||
const std::string space_chars_set = " \t";
|
||||
const size_t first_pos = token.find_first_not_of(space_chars_set);
|
||||
size_t norm_len = 0;
|
||||
std::string error_str = "none";
|
||||
if (first_pos != std::string::npos) {
|
||||
const size_t last_pos = token.find_last_not_of(space_chars_set);
|
||||
if (last_pos == std::string::npos)
|
||||
error_str = "token string error: \"" + token + "\"";
|
||||
else {
|
||||
const size_t end_pos = last_pos + 1;
|
||||
if (end_pos <= first_pos)
|
||||
error_str = "token string error: \"" + token + "\"";
|
||||
else
|
||||
norm_len = end_pos - first_pos;
|
||||
}
|
||||
}
|
||||
if (((first_pos != std::string::npos) && (norm_len == 0)) ||
|
||||
((first_pos == std::string::npos) && not_empty)) {
|
||||
error("normalize_token error: %s", error_str.c_str());
|
||||
}
|
||||
return (norm_len != 0) ? token.substr(first_pos, norm_len) : std::string("");
|
||||
}
|
||||
|
||||
int get_xml_array(const xml::Xml::level_t* node, const std::string& field, const std::string& delim,
|
||||
std::vector<std::string>* vec, const char* label = nullptr) {
|
||||
int parse_iter = 0;
|
||||
const auto& opts = node->opts;
|
||||
auto it = opts.find(field);
|
||||
if (it != opts.end()) {
|
||||
const std::string& array_string = it->second;
|
||||
if (label != nullptr) std::cout << label << field << " = " << array_string << std::endl;
|
||||
size_t pos1 = 0;
|
||||
size_t string_len = array_string.length();
|
||||
while (pos1 < string_len) {
|
||||
// set pos2 such that it also handles case of multiple delimiter options.
|
||||
// For example- "hipLaunchKernel, hipExtModuleLaunchKernel, hipMemsetAsync"
|
||||
// in this example delimiters are ' ' and also ','
|
||||
size_t pos2 = array_string.find_first_of(delim, pos1);
|
||||
bool found = (pos2 != std::string::npos);
|
||||
size_t token_len = (pos2 != std::string::npos) ? pos2 - pos1 : string_len - pos1;
|
||||
std::string token = array_string.substr(pos1, token_len);
|
||||
std::string norm_str = normalize_token(token, found, "get_xml_array");
|
||||
if (norm_str.length() != 0) vec->push_back(norm_str);
|
||||
if (!found) break;
|
||||
// update pos2 such that it represents the first non-delimiter character
|
||||
// in case multiple delimiters are specified in variable 'delim'
|
||||
pos1 = array_string.find_first_not_of(delim, pos2);
|
||||
++parse_iter;
|
||||
}
|
||||
}
|
||||
return parse_iter;
|
||||
}
|
||||
|
||||
// Allocating tracing pool
|
||||
void open_tracing_pool() {
|
||||
if (roctracer_default_pool() == nullptr) {
|
||||
roctracer_properties_t properties{};
|
||||
properties.buffer_size = GetBufferSize();
|
||||
properties.buffer_callback_fun = [](const char* begin, const char* end, void* /* arg */) {
|
||||
assert(plugin && "plugin is not initialized");
|
||||
plugin->write_activity_records(reinterpret_cast<const roctracer_record_t*>(begin),
|
||||
reinterpret_cast<const roctracer_record_t*>(end));
|
||||
};
|
||||
CHECK_ROCTRACER(roctracer_open_pool(&properties));
|
||||
}
|
||||
}
|
||||
|
||||
// Flush tracing pool
|
||||
void close_tracing_pool() {
|
||||
if (roctracer_pool_t* pool = roctracer_default_pool(); pool != nullptr) {
|
||||
CHECK_ROCTRACER(roctracer_flush_activity_expl(pool));
|
||||
CHECK_ROCTRACER(roctracer_close_pool_expl(pool));
|
||||
}
|
||||
}
|
||||
|
||||
// tool library is loaded
|
||||
static bool is_loaded = false;
|
||||
|
||||
// tool unload method
|
||||
void tool_unload() {
|
||||
if (is_loaded == false) return;
|
||||
is_loaded = false;
|
||||
|
||||
if (flush_thread) {
|
||||
stop_flush_thread = true;
|
||||
flush_thread->join();
|
||||
delete flush_thread;
|
||||
flush_thread = nullptr;
|
||||
}
|
||||
|
||||
if (trace_period_thread) {
|
||||
trace_period_stop = true;
|
||||
trace_period_thread->join();
|
||||
delete trace_period_thread;
|
||||
trace_period_thread = nullptr;
|
||||
}
|
||||
|
||||
if (trace_roctx) {
|
||||
CHECK_ROCTRACER(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_ROCTX));
|
||||
}
|
||||
if (trace_hsa_api) {
|
||||
CHECK_ROCTRACER(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HSA_API));
|
||||
}
|
||||
if (trace_hsa_activity || trace_pcs) {
|
||||
CHECK_ROCTRACER(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HSA_OPS));
|
||||
}
|
||||
if (trace_hip_api || trace_hip_activity) {
|
||||
CHECK_ROCTRACER(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HIP_API));
|
||||
CHECK_ROCTRACER(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HIP_API));
|
||||
CHECK_ROCTRACER(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HIP_OPS));
|
||||
}
|
||||
|
||||
// Flush tracing pool
|
||||
close_tracing_pool();
|
||||
roctracer::TraceBufferBase::FlushAll();
|
||||
}
|
||||
|
||||
// tool load method
|
||||
void tool_load() {
|
||||
if (is_loaded == true) return;
|
||||
is_loaded = true;
|
||||
|
||||
// API traces switches
|
||||
const char* trace_domain = getenv("ROCTRACER_DOMAIN");
|
||||
if (trace_domain != nullptr) {
|
||||
// ROCTX domain
|
||||
if (std::string(trace_domain).find("roctx") != std::string::npos) {
|
||||
trace_roctx = true;
|
||||
}
|
||||
|
||||
// HSA/HIP domains enabling
|
||||
if (std::string(trace_domain).find("hsa") != std::string::npos) {
|
||||
trace_hsa_api = true;
|
||||
trace_hsa_activity = true;
|
||||
}
|
||||
if (std::string(trace_domain).find("hip") != std::string::npos) {
|
||||
trace_hip_api = true;
|
||||
trace_hip_activity = true;
|
||||
}
|
||||
if (std::string(trace_domain).find("sys") != std::string::npos) {
|
||||
trace_hsa_api = true;
|
||||
trace_hip_api = true;
|
||||
trace_hip_activity = true;
|
||||
}
|
||||
|
||||
// PC sampling enabling
|
||||
if (std::string(trace_domain).find("pcs") != std::string::npos) {
|
||||
trace_pcs = true;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "ROCtracer (" << std::dec << GetPid() << "):";
|
||||
|
||||
// XML input
|
||||
const char* xml_name = getenv("ROCP_INPUT");
|
||||
if (xml_name != nullptr) {
|
||||
xml::Xml* xml = xml::Xml::Create(xml_name);
|
||||
if (xml == nullptr) error("input file not found '%s'", xml_name);
|
||||
|
||||
bool found = false;
|
||||
for (const auto* entry : xml->GetNodes("top.trace")) {
|
||||
auto it = entry->opts.find("name");
|
||||
if (it == entry->opts.end()) error("trace name is missing");
|
||||
const std::string& name = it->second;
|
||||
|
||||
std::vector<std::string> api_vec;
|
||||
for (const auto* node : entry->nodes) {
|
||||
if (node->tag != "parameters")
|
||||
error("trace node is not supported '%s:%%%s'", name.c_str(), node->tag.c_str());
|
||||
get_xml_array(node, "api", ", ",
|
||||
&api_vec); // delimiter options given as both spaces and commas (' ' and ',')
|
||||
break;
|
||||
}
|
||||
|
||||
if (name == "rocTX") {
|
||||
found = true;
|
||||
trace_roctx = true;
|
||||
}
|
||||
if (name == "HSA") {
|
||||
found = true;
|
||||
trace_hsa_api = true;
|
||||
hsa_api_vec = api_vec;
|
||||
}
|
||||
if (name == "GPU") {
|
||||
found = true;
|
||||
trace_hsa_activity = true;
|
||||
}
|
||||
if (name == "HIP") {
|
||||
found = true;
|
||||
trace_hip_api = true;
|
||||
trace_hip_activity = true;
|
||||
hip_api_vec = api_vec;
|
||||
}
|
||||
}
|
||||
|
||||
if (found) std::cout << " input from \"" << xml_name << "\"";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
|
||||
// Disable HIP activity if HSA activity was set
|
||||
if (trace_hsa_activity == true) trace_hip_activity = false;
|
||||
|
||||
// Enable rpcTX callbacks
|
||||
if (trace_roctx) {
|
||||
// initialize HSA tracing
|
||||
std::cout << " rocTX-trace()" << std::endl;
|
||||
CHECK_ROCTRACER(
|
||||
roctracer_enable_domain_callback(ACTIVITY_DOMAIN_ROCTX, roctx_api_callback, nullptr));
|
||||
}
|
||||
|
||||
const char* ctrl_str = getenv("ROCP_CTRL_RATE");
|
||||
if (ctrl_str != nullptr) {
|
||||
uint32_t ctrl_delay = 0;
|
||||
uint32_t ctrl_len = 0;
|
||||
uint32_t ctrl_rate = 0;
|
||||
|
||||
if (sscanf(ctrl_str, "%d:%d:%d", &ctrl_delay, &ctrl_len, &ctrl_rate) != 3 ||
|
||||
ctrl_len > ctrl_rate)
|
||||
error("invalid ROCP_CTRL_RATE variable (ctrl_delay:ctrl_len:ctrl_rate)");
|
||||
|
||||
control_dist_us = ctrl_rate - ctrl_len;
|
||||
control_len_us = ctrl_len;
|
||||
control_delay_us = ctrl_delay;
|
||||
|
||||
roctracer_stop();
|
||||
|
||||
if (ctrl_delay != UINT32_MAX) {
|
||||
std::cout << "ROCtracer: trace control: delay(" << ctrl_delay << "us), length(" << ctrl_len
|
||||
<< "us), rate(" << ctrl_rate << "us)" << std::endl;
|
||||
trace_period_thread = new std::thread(trace_period_fun);
|
||||
} else {
|
||||
std::cout << "ROCtracer: trace start disabled" << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
const char* flush_str = getenv("ROCP_FLUSH_RATE");
|
||||
if (flush_str != nullptr) {
|
||||
sscanf(flush_str, "%d", &control_flush_us);
|
||||
if (control_flush_us == 0) error("invalid control flush rate value '%s'", flush_str);
|
||||
|
||||
std::cout << "ROCtracer: trace control flush rate(" << control_flush_us << "us)" << std::endl;
|
||||
flush_thread = new std::thread(flush_thr_fun);
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
|
||||
// The HSA_AMD_TOOL_PRIORITY variable must be a constant value type initialized by the loader
|
||||
// itself, not by code during _init. 'extern const' seems to do that although that is not a
|
||||
// guarantee.
|
||||
ROCTRACER_EXPORT extern const uint32_t HSA_AMD_TOOL_PRIORITY = 1050;
|
||||
|
||||
// HSA-runtime tool on-load method
|
||||
ROCTRACER_EXPORT bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
|
||||
uint64_t failed_tool_count, const char* const* failed_tool_names) {
|
||||
if (roctracer_version_major() != ROCTRACER_VERSION_MAJOR ||
|
||||
roctracer_version_minor() < ROCTRACER_VERSION_MINOR) {
|
||||
warning("the ROCtracer API version is not compatible with this tool");
|
||||
return true;
|
||||
}
|
||||
|
||||
// Load output plugin
|
||||
const char* plugin_name = getenv("ROCTRACER_PLUGIN_LIB");
|
||||
if (plugin_name == nullptr) plugin_name = "libfile_plugin.so";
|
||||
if (Dl_info dl_info; dladdr((void*)tool_load, &dl_info) != 0) {
|
||||
if (!plugin.emplace(fs::path(dl_info.dli_fname).replace_filename(plugin_name)).is_valid())
|
||||
plugin.reset();
|
||||
}
|
||||
|
||||
tool_load();
|
||||
|
||||
// OnUnload may not be called if the ROC runtime is not shutdown by the client
|
||||
// application before exiting, so register an atexit handler to unload the tool.
|
||||
std::atexit(tool_unload);
|
||||
|
||||
// Enable HSA API callbacks/activity
|
||||
if (trace_hsa_api) {
|
||||
std::ostringstream out;
|
||||
out << " HSA-trace(";
|
||||
if (hsa_api_vec.size() != 0) {
|
||||
out << "-*";
|
||||
for (unsigned i = 0; i < hsa_api_vec.size(); ++i) {
|
||||
uint32_t cid = HSA_API_ID_NUMBER;
|
||||
const char* api = hsa_api_vec[i].c_str();
|
||||
if (roctracer_op_code(ACTIVITY_DOMAIN_HSA_API, api, &cid, nullptr) ==
|
||||
ROCTRACER_STATUS_SUCCESS &&
|
||||
roctracer_enable_op_callback(ACTIVITY_DOMAIN_HSA_API, cid, hsa_api_callback, nullptr) ==
|
||||
ROCTRACER_STATUS_SUCCESS)
|
||||
out << ' ' << api;
|
||||
else
|
||||
warning("Unable to enable HSA_API tracing for invalid operation %s", api);
|
||||
}
|
||||
} else {
|
||||
CHECK_ROCTRACER(
|
||||
roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HSA_API, hsa_api_callback, nullptr));
|
||||
out << "*";
|
||||
}
|
||||
std::cout << out.str() << ')' << std::endl;
|
||||
}
|
||||
|
||||
// Enable HSA GPU activity
|
||||
if (trace_hsa_activity) {
|
||||
// Allocating tracing pool
|
||||
open_tracing_pool();
|
||||
|
||||
std::cout << " HSA-activity-trace()" << std::endl;
|
||||
CHECK_ROCTRACER(roctracer_enable_op_activity(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_COPY));
|
||||
}
|
||||
|
||||
// Enable HIP API callbacks/activity
|
||||
if (trace_hip_api || trace_hip_activity) {
|
||||
std::ostringstream out;
|
||||
out << " HIP-trace(";
|
||||
// Allocating tracing pool
|
||||
open_tracing_pool();
|
||||
|
||||
// Enable tracing
|
||||
if (trace_hip_api) {
|
||||
if (hip_api_vec.size() != 0) {
|
||||
out << "-*";
|
||||
for (unsigned i = 0; i < hip_api_vec.size(); ++i) {
|
||||
uint32_t cid = HIP_API_ID_NONE;
|
||||
const char* api = hip_api_vec[i].c_str();
|
||||
if (roctracer_op_code(ACTIVITY_DOMAIN_HIP_API, api, &cid, nullptr) ==
|
||||
ROCTRACER_STATUS_SUCCESS &&
|
||||
roctracer_enable_op_callback(ACTIVITY_DOMAIN_HIP_API, cid, hip_api_callback,
|
||||
nullptr) == ROCTRACER_STATUS_SUCCESS)
|
||||
out << ' ' << api;
|
||||
else
|
||||
warning("Unable to enable HIP_API tracing for invalid operation %s", api);
|
||||
}
|
||||
} else {
|
||||
CHECK_ROCTRACER(
|
||||
roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HIP_API, hip_api_callback, nullptr));
|
||||
out << "*";
|
||||
}
|
||||
}
|
||||
|
||||
if (trace_hip_activity) {
|
||||
CHECK_ROCTRACER(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_OPS));
|
||||
}
|
||||
std::cout << out.str() << ')' << std::endl;
|
||||
}
|
||||
|
||||
// Enable PC sampling
|
||||
if (trace_pcs) {
|
||||
std::cout << " PCS-trace()" << std::endl;
|
||||
open_tracing_pool();
|
||||
CHECK_ROCTRACER(roctracer_enable_op_activity(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_RESERVED1));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// HSA-runtime on-unload method
|
||||
ROCTRACER_EXPORT void OnUnload() { tool_unload(); }
|
||||
|
||||
} // extern "C"
|
||||
|
||||
void initialize() {
|
||||
tool_load();
|
||||
}
|
||||
@@ -0,0 +1,125 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "debug.h"
|
||||
#include "util.h"
|
||||
|
||||
#include <cstdarg>
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
#if defined(ENABLE_BACKTRACE)
|
||||
|
||||
#include <cxxabi.h>
|
||||
#include <backtrace.h>
|
||||
|
||||
namespace {
|
||||
|
||||
struct BackTraceInfo {
|
||||
struct ::backtrace_state* state = nullptr;
|
||||
std::stringstream sstream{};
|
||||
int depth = 0;
|
||||
int error = 0;
|
||||
};
|
||||
|
||||
void errorCallback(void* data, const char* message, int errnum) {
|
||||
BackTraceInfo* info = static_cast<BackTraceInfo*>(data);
|
||||
info->sstream << "ROCtracer error: " << message << '(' << errnum << ')';
|
||||
info->error = 1;
|
||||
}
|
||||
|
||||
void syminfoCallback(void* data, uintptr_t /* pc */, const char* symname, uintptr_t /* symval */,
|
||||
uintptr_t /* symsize */) {
|
||||
BackTraceInfo* info = static_cast<BackTraceInfo*>(data);
|
||||
|
||||
if (symname == nullptr) return;
|
||||
|
||||
int status;
|
||||
char* demangled = abi::__cxa_demangle(symname, nullptr, nullptr, &status);
|
||||
info->sstream << ' ' << (status == 0 ? demangled : symname);
|
||||
free(demangled);
|
||||
}
|
||||
|
||||
int fullCallback(void* data, uintptr_t pc, const char* filename, int lineno, const char* function) {
|
||||
BackTraceInfo* info = static_cast<BackTraceInfo*>(data);
|
||||
|
||||
info->sstream << std::endl
|
||||
<< " #" << std::dec << info->depth++ << ' ' << "0x" << std::noshowbase
|
||||
<< std::hex << std::setfill('0') << std::setw(sizeof(pc) * 2) << pc;
|
||||
if (function == nullptr)
|
||||
backtrace_syminfo(info->state, pc, syminfoCallback, errorCallback, data);
|
||||
else {
|
||||
int status;
|
||||
char* demangled = abi::__cxa_demangle(function, nullptr, nullptr, &status);
|
||||
info->sstream << ' ' << (status == 0 ? demangled : function);
|
||||
free(demangled);
|
||||
|
||||
if (filename != nullptr) {
|
||||
info->sstream << " in " << filename;
|
||||
if (lineno) info->sstream << ':' << std::dec << lineno;
|
||||
}
|
||||
}
|
||||
|
||||
return info->error;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
#endif // defined (ENABLE_BACKTRACE)
|
||||
|
||||
namespace roctracer {
|
||||
|
||||
void warning(const char* format, ...) {
|
||||
va_list va;
|
||||
va_start(va, format);
|
||||
std::cerr << "ROCtracer warning: " << string_vprintf(format, va) << std::endl;
|
||||
va_end(va);
|
||||
}
|
||||
|
||||
void error(const char* format, ...) {
|
||||
va_list va;
|
||||
va_start(va, format);
|
||||
std::cerr << "ROCtracer error: " << string_vprintf(format, va) << std::endl;
|
||||
va_end(va);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
void fatal [[noreturn]] (const char* format, ...) {
|
||||
va_list va;
|
||||
va_start(va, format);
|
||||
std::string message = string_vprintf(format, va);
|
||||
va_end(va);
|
||||
|
||||
#if defined(ENABLE_BACKTRACE)
|
||||
BackTraceInfo info;
|
||||
|
||||
info.sstream << std::endl << "Backtrace:";
|
||||
info.state = ::backtrace_create_state("/proc/self/exe", 0, errorCallback, &info);
|
||||
::backtrace_full(info.state, 1, fullCallback, errorCallback, &info);
|
||||
|
||||
message += info.sstream.str();
|
||||
#endif /* defined (ENABLE_BACKTRACE) */
|
||||
|
||||
std::cerr << "ROCtracer fatal error: " << message << std::endl;
|
||||
abort();
|
||||
}
|
||||
|
||||
} // namespace roctracer
|
||||
@@ -0,0 +1,47 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace roctracer {
|
||||
|
||||
extern void warning(const char* format, ...)
|
||||
#if defined(__GNUC__)
|
||||
__attribute__((format(printf, 1, 2)))
|
||||
#endif // defined (__GNUC__)
|
||||
;
|
||||
|
||||
extern void error [[noreturn]] (const char* format, ...)
|
||||
#if defined(__GNUC__)
|
||||
__attribute__((format(printf, 1, 2)))
|
||||
#endif // defined (__GNUC__)
|
||||
;
|
||||
|
||||
extern void fatal [[noreturn]] (const char* format, ...)
|
||||
#if defined(__GNUC__)
|
||||
__attribute__((format(printf, 1, 2)))
|
||||
#endif // defined (__GNUC__)
|
||||
;
|
||||
|
||||
} // namespace roctracer
|
||||
|
||||
using roctracer::error;
|
||||
using roctracer::fatal;
|
||||
using roctracer::warning;
|
||||
@@ -0,0 +1,167 @@
|
||||
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef SRC_UTIL_LOGGER_H_
|
||||
#define SRC_UTIL_LOGGER_H_
|
||||
|
||||
#include <time.h>
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <sys/file.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <fstream>
|
||||
#include <exception>
|
||||
#include <mutex>
|
||||
#include <map>
|
||||
|
||||
namespace roctracer::util {
|
||||
|
||||
class Logger {
|
||||
public:
|
||||
template <typename T> Logger& operator<<(T&& m) {
|
||||
std::ostringstream oss;
|
||||
oss << std::forward<T>(m);
|
||||
if (!streaming_)
|
||||
Log(oss.str());
|
||||
else
|
||||
Put(oss.str());
|
||||
streaming_ = true;
|
||||
return *this;
|
||||
}
|
||||
|
||||
using manip_t = void (*)();
|
||||
Logger& operator<<(manip_t f) {
|
||||
f();
|
||||
return *this;
|
||||
}
|
||||
|
||||
static void begm() { Instance().ResetStreaming(true); }
|
||||
static void endl() { Instance().ResetStreaming(false); }
|
||||
|
||||
const std::string& LastMessage() {
|
||||
std::lock_guard lock(mutex_);
|
||||
return message_[GetTid()];
|
||||
}
|
||||
|
||||
static Logger& Instance() {
|
||||
static Logger instance;
|
||||
return instance;
|
||||
}
|
||||
|
||||
static uint32_t GetPid() { return syscall(__NR_getpid); }
|
||||
static uint32_t GetTid() { return syscall(__NR_gettid); }
|
||||
|
||||
private:
|
||||
Logger() : file_(nullptr), dirty_(false), streaming_(false), messaging_(false) {
|
||||
const char* var = getenv("ROCTRACER_LOG");
|
||||
if (var != nullptr) file_ = fopen("/tmp/roctracer_log.txt", "a");
|
||||
ResetStreaming(false);
|
||||
}
|
||||
|
||||
~Logger() {
|
||||
if (file_ != nullptr) {
|
||||
if (dirty_) Put("\n");
|
||||
fclose(file_);
|
||||
}
|
||||
}
|
||||
|
||||
void ResetStreaming(const bool messaging) {
|
||||
std::lock_guard lock(mutex_);
|
||||
if (messaging) {
|
||||
message_[GetTid()] = "";
|
||||
} else if (streaming_) {
|
||||
Put("\n");
|
||||
dirty_ = false;
|
||||
}
|
||||
messaging_ = messaging;
|
||||
streaming_ = messaging;
|
||||
}
|
||||
|
||||
void Put(const std::string& m) {
|
||||
std::lock_guard lock(mutex_);
|
||||
if (messaging_) {
|
||||
message_[GetTid()] += m;
|
||||
}
|
||||
if (file_ != nullptr) {
|
||||
dirty_ = true;
|
||||
flock(fileno(file_), LOCK_EX);
|
||||
fprintf(file_, "%s", m.c_str());
|
||||
fflush(file_);
|
||||
flock(fileno(file_), LOCK_UN);
|
||||
}
|
||||
}
|
||||
|
||||
void Log(const std::string& m) {
|
||||
const time_t rawtime = time(nullptr);
|
||||
tm tm_info;
|
||||
localtime_r(&rawtime, &tm_info);
|
||||
char tm_str[26];
|
||||
strftime(tm_str, 26, "%Y-%m-%d %H:%M:%S", &tm_info);
|
||||
std::ostringstream oss;
|
||||
oss << "<" << tm_str << std::dec << " pid" << GetPid() << " tid" << GetTid() << "> " << m;
|
||||
Put(oss.str());
|
||||
}
|
||||
|
||||
FILE* file_;
|
||||
bool dirty_;
|
||||
bool streaming_;
|
||||
bool messaging_;
|
||||
|
||||
std::recursive_mutex mutex_;
|
||||
std::map<uint32_t, std::string> message_;
|
||||
};
|
||||
|
||||
} // namespace roctracer::util
|
||||
|
||||
#define FATAL_LOGGING(stream) \
|
||||
do { \
|
||||
roctracer::util::Logger::Instance() \
|
||||
<< "fatal: " << roctracer::util::Logger::begm << stream << roctracer::util::Logger::endl; \
|
||||
abort(); \
|
||||
} while (false)
|
||||
|
||||
#define ERR_LOGGING(stream) \
|
||||
do { \
|
||||
roctracer::util::Logger::Instance() \
|
||||
<< "error: " << roctracer::util::Logger::begm << stream << roctracer::util::Logger::endl; \
|
||||
} while (false)
|
||||
|
||||
#define INFO_LOGGING(stream) \
|
||||
do { \
|
||||
roctracer::util::Logger::Instance() \
|
||||
<< "info: " << roctracer::util::Logger::begm << stream << roctracer::util::Logger::endl; \
|
||||
} while (false)
|
||||
|
||||
#define WARN_LOGGING(stream) \
|
||||
do { \
|
||||
std::cerr << "ROCProfiler: " << stream << std::endl; \
|
||||
roctracer::util::Logger::Instance() << "warning: " << roctracer::util::Logger::begm << stream \
|
||||
<< roctracer::util::Logger::endl; \
|
||||
} while (false)
|
||||
|
||||
#endif // SRC_UTIL_LOGGER_H_
|
||||
@@ -0,0 +1,51 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "util.h"
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstdarg>
|
||||
#include <string>
|
||||
|
||||
namespace roctracer {
|
||||
|
||||
std::string string_vprintf(const char* format, va_list va) {
|
||||
va_list copy;
|
||||
|
||||
va_copy(copy, va);
|
||||
size_t size = vsnprintf(NULL, 0, format, copy);
|
||||
va_end(copy);
|
||||
|
||||
std::string str(size, '\0');
|
||||
vsprintf(&str[0], format, va);
|
||||
|
||||
return str;
|
||||
}
|
||||
|
||||
std::string string_printf(const char* format, ...) {
|
||||
va_list va;
|
||||
va_start(va, format);
|
||||
std::string str(string_vprintf(format, va));
|
||||
va_end(va);
|
||||
|
||||
return str;
|
||||
}
|
||||
|
||||
} // namespace roctracer
|
||||
@@ -0,0 +1,36 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdarg>
|
||||
#include <string>
|
||||
|
||||
namespace roctracer {
|
||||
|
||||
extern std::string string_vprintf(const char* format, va_list va);
|
||||
|
||||
extern std::string string_printf(const char* format, ...)
|
||||
#if defined(__GNUC__)
|
||||
__attribute__((format(printf, 1, 2)))
|
||||
#endif // defined (__GNUC__)
|
||||
;
|
||||
|
||||
} // namespace roctracer
|
||||
@@ -0,0 +1,457 @@
|
||||
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef TEST_UTIL_XML_H_
|
||||
#define TEST_UTIL_XML_H_
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace xml {
|
||||
|
||||
class Xml {
|
||||
public:
|
||||
typedef std::vector<char> token_t;
|
||||
|
||||
struct level_t;
|
||||
typedef std::vector<level_t*> nodes_t;
|
||||
typedef std::map<std::string, std::string> opts_t;
|
||||
struct level_t {
|
||||
std::string tag;
|
||||
nodes_t nodes;
|
||||
opts_t opts;
|
||||
};
|
||||
typedef std::vector<level_t*> nodes_vec_t;
|
||||
typedef std::map<std::string, nodes_vec_t> map_t;
|
||||
|
||||
enum { DECL_STATE, BODY_STATE };
|
||||
|
||||
static Xml* Create(const std::string& file_name, const Xml* obj = NULL) {
|
||||
Xml* xml = new Xml(file_name, obj);
|
||||
if (xml != NULL) {
|
||||
if (xml->Init() == false) {
|
||||
delete xml;
|
||||
xml = NULL;
|
||||
} else {
|
||||
const std::size_t pos = file_name.rfind('/');
|
||||
const std::string path = (pos != std::string::npos) ? file_name.substr(0, pos + 1) : "";
|
||||
|
||||
xml->PreProcess();
|
||||
nodes_t incl_nodes;
|
||||
for (auto* node : xml->GetNodes("top.include")) {
|
||||
if (node->opts.find("touch") == node->opts.end()) {
|
||||
node->opts["touch"] = "";
|
||||
incl_nodes.push_back(node);
|
||||
}
|
||||
}
|
||||
for (auto* incl : incl_nodes) {
|
||||
const std::string& incl_name = path + incl->opts["file"];
|
||||
Xml* ixml = Create(incl_name, xml);
|
||||
if (ixml == NULL) {
|
||||
delete xml;
|
||||
xml = NULL;
|
||||
break;
|
||||
} else {
|
||||
delete ixml;
|
||||
}
|
||||
}
|
||||
if (xml) {
|
||||
xml->Process();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return xml;
|
||||
}
|
||||
|
||||
static void Destroy(Xml* xml) { delete xml; }
|
||||
|
||||
std::string GetName() { return file_name_; }
|
||||
|
||||
void AddExpr(const std::string& full_tag, const std::string& name, const std::string& expr) {
|
||||
const std::size_t pos = full_tag.rfind('.');
|
||||
const std::size_t pos1 = (pos == std::string::npos) ? 0 : pos + 1;
|
||||
const std::string level_tag = full_tag.substr(pos1);
|
||||
level_t* level = new level_t;
|
||||
(*map_)[full_tag].push_back(level);
|
||||
level->tag = level_tag;
|
||||
level->opts["name"] = name;
|
||||
level->opts["expr"] = expr;
|
||||
}
|
||||
|
||||
void AddConst(const std::string& full_tag, const std::string& name, const uint64_t& val) {
|
||||
std::ostringstream oss;
|
||||
oss << val;
|
||||
AddExpr(full_tag, name, oss.str());
|
||||
}
|
||||
|
||||
nodes_t GetNodes(const std::string& global_tag) { return (*map_)[global_tag]; }
|
||||
|
||||
template <class F> F ForEach(const F& f_i) {
|
||||
F f = f_i;
|
||||
if (map_) {
|
||||
for (auto& entry : *map_) {
|
||||
for (auto node : entry.second) {
|
||||
if (f.fun(entry.first, node) == false) break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return f;
|
||||
}
|
||||
|
||||
template <class F> F ForEach(const F& f_i) const {
|
||||
F f = f_i;
|
||||
if (map_) {
|
||||
for (auto& entry : *map_) {
|
||||
for (auto node : entry.second) {
|
||||
if (f.fun(entry.first, node) == false) break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return f;
|
||||
}
|
||||
|
||||
struct print_func {
|
||||
bool fun(const std::string& global_tag, level_t* node) {
|
||||
for (auto& opt : node->opts) {
|
||||
std::cout << global_tag << "." << opt.first << " = " << opt.second << std::endl;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
void Print() const {
|
||||
std::cout << "XML file '" << file_name_ << "':" << std::endl;
|
||||
ForEach(print_func());
|
||||
}
|
||||
|
||||
private:
|
||||
Xml(const std::string& file_name, const Xml* obj)
|
||||
: file_name_(file_name),
|
||||
file_line_(0),
|
||||
data_size_(0),
|
||||
index_(0),
|
||||
state_(BODY_STATE),
|
||||
comment_(false),
|
||||
included_(false),
|
||||
level_(NULL),
|
||||
map_(NULL) {
|
||||
if (obj != NULL) {
|
||||
map_ = obj->map_;
|
||||
level_ = obj->level_;
|
||||
included_ = true;
|
||||
}
|
||||
}
|
||||
|
||||
struct delete_func {
|
||||
bool fun(const std::string&, level_t* node) {
|
||||
delete node;
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
~Xml() {
|
||||
if (included_ == false) {
|
||||
ForEach(delete_func());
|
||||
delete map_;
|
||||
}
|
||||
}
|
||||
|
||||
bool Init() {
|
||||
fd_ = open(file_name_.c_str(), O_RDONLY);
|
||||
if (fd_ == -1) {
|
||||
// perror((std::string("open XML file ") + file_name_).c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
if (map_ == NULL) {
|
||||
map_ = new map_t;
|
||||
if (map_ == NULL) return false;
|
||||
AddLevel("top");
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void PreProcess() {
|
||||
uint32_t ind = 0;
|
||||
char buf[kBufSize];
|
||||
bool error = false;
|
||||
|
||||
while (1) {
|
||||
const uint32_t pos = lseek(fd_, 0, SEEK_CUR);
|
||||
uint32_t size = read(fd_, buf, kBufSize);
|
||||
if (size <= 0) break;
|
||||
buf[size - 1] = '\0';
|
||||
|
||||
if (strncmp(buf, "#include \"", 10) == 0) {
|
||||
for (ind = 0; (ind < size) && (buf[ind] != '\n'); ++ind) {
|
||||
}
|
||||
if (ind == size) {
|
||||
fprintf(stderr, "XML PreProcess failed, line size limit %zu\n", kBufSize);
|
||||
error = true;
|
||||
break;
|
||||
}
|
||||
buf[ind] = '\0';
|
||||
size = ind;
|
||||
lseek(fd_, pos + ind + 1, SEEK_SET);
|
||||
|
||||
for (ind = 10; (ind < size) && (buf[ind] != '"'); ++ind) {
|
||||
}
|
||||
if (ind == size) {
|
||||
error = true;
|
||||
break;
|
||||
}
|
||||
buf[ind] = '\0';
|
||||
|
||||
AddLevel("include");
|
||||
AddOption("file", &buf[10]);
|
||||
UpLevel();
|
||||
}
|
||||
}
|
||||
|
||||
if (error) {
|
||||
fprintf(stderr, "XML PreProcess failed, line '%s'\n", buf);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
lseek(fd_, 0, SEEK_SET);
|
||||
}
|
||||
|
||||
void Process() {
|
||||
token_t remainder;
|
||||
|
||||
while (1) {
|
||||
token_t token = (remainder.size()) ? remainder : NextToken();
|
||||
remainder.clear();
|
||||
|
||||
// token_t token1 = token;
|
||||
// token1.push_back('\0');
|
||||
// std::cout << "> " << &token1[0] << std::endl;
|
||||
|
||||
// End of file
|
||||
if (token.size() == 0) break;
|
||||
|
||||
switch (state_) {
|
||||
case BODY_STATE:
|
||||
if (token[0] == '<') {
|
||||
bool node_begin = true;
|
||||
unsigned ind = 1;
|
||||
if (token[1] == '/') {
|
||||
node_begin = false;
|
||||
++ind;
|
||||
}
|
||||
|
||||
unsigned i = ind;
|
||||
while (i < token.size()) {
|
||||
if (token[i] == '>') break;
|
||||
++i;
|
||||
}
|
||||
for (unsigned j = i + 1; j < token.size(); ++j) remainder.push_back(token[j]);
|
||||
|
||||
if (i == token.size()) {
|
||||
if (node_begin)
|
||||
state_ = DECL_STATE;
|
||||
else
|
||||
BadFormat(token);
|
||||
token.push_back('\0');
|
||||
} else {
|
||||
token[i] = '\0';
|
||||
}
|
||||
|
||||
const char* tag = &token[ind];
|
||||
if (node_begin) {
|
||||
AddLevel(tag);
|
||||
} else {
|
||||
if (strncmp(CurrentLevel().c_str(), tag, strlen(tag)) != 0) {
|
||||
token.back() = '>';
|
||||
BadFormat(token);
|
||||
}
|
||||
UpLevel();
|
||||
}
|
||||
} else {
|
||||
BadFormat(token);
|
||||
}
|
||||
break;
|
||||
case DECL_STATE:
|
||||
if (token[0] == '>') {
|
||||
state_ = BODY_STATE;
|
||||
for (unsigned j = 1; j < token.size(); ++j) remainder.push_back(token[j]);
|
||||
continue;
|
||||
} else {
|
||||
token.push_back('\0');
|
||||
unsigned j = 0;
|
||||
for (j = 0; j < token.size(); ++j)
|
||||
if (token[j] == '=') break;
|
||||
if (j == token.size()) BadFormat(token);
|
||||
token[j] = '\0';
|
||||
const char* key = &token[0];
|
||||
const char* value = &token[j + 1];
|
||||
AddOption(key, value);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
std::cout << "XML parser error: wrong state: " << state_ << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool SpaceCheck() const {
|
||||
bool cond = ((buffer_[index_] == ' ') || (buffer_[index_] == '\t'));
|
||||
return cond;
|
||||
}
|
||||
|
||||
bool LineEndCheck() {
|
||||
bool found = false;
|
||||
if (buffer_[index_] == '\n') {
|
||||
buffer_[index_] = ' ';
|
||||
++file_line_;
|
||||
found = true;
|
||||
comment_ = false;
|
||||
} else if (comment_ || (buffer_[index_] == '#')) {
|
||||
found = true;
|
||||
comment_ = true;
|
||||
}
|
||||
return found;
|
||||
}
|
||||
|
||||
token_t NextToken() {
|
||||
token_t token;
|
||||
bool in_string = false;
|
||||
bool special_symb = false;
|
||||
|
||||
while (1) {
|
||||
if (data_size_ == 0) {
|
||||
data_size_ = read(fd_, buffer_, kBufSize);
|
||||
if (data_size_ <= 0) break;
|
||||
}
|
||||
|
||||
if (token.empty()) {
|
||||
while ((index_ < data_size_) && (SpaceCheck() || LineEndCheck())) {
|
||||
++index_;
|
||||
}
|
||||
}
|
||||
while ((index_ < data_size_) && (in_string || !(SpaceCheck() || LineEndCheck()))) {
|
||||
const char symb = buffer_[index_];
|
||||
bool skip_symb = false;
|
||||
|
||||
switch (symb) {
|
||||
case '\\':
|
||||
if (special_symb) {
|
||||
special_symb = false;
|
||||
} else {
|
||||
special_symb = true;
|
||||
skip_symb = true;
|
||||
}
|
||||
break;
|
||||
case '"':
|
||||
if (special_symb) {
|
||||
special_symb = false;
|
||||
} else {
|
||||
in_string = !in_string;
|
||||
if (!in_string) {
|
||||
buffer_[index_] = ' ';
|
||||
--index_;
|
||||
}
|
||||
skip_symb = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (!skip_symb) token.push_back(symb);
|
||||
++index_;
|
||||
}
|
||||
|
||||
if (index_ == data_size_) {
|
||||
index_ = 0;
|
||||
data_size_ = 0;
|
||||
} else {
|
||||
if (special_symb || in_string) BadFormat(token);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return token;
|
||||
}
|
||||
|
||||
void BadFormat(token_t token) {
|
||||
token.push_back('\0');
|
||||
std::cout << "Error: " << file_name_ << ", line " << file_line_ << ", bad XML token '"
|
||||
<< &token[0] << "'" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
void AddLevel(const std::string& tag) {
|
||||
level_t* level = new level_t;
|
||||
level->tag = tag;
|
||||
if (level_) {
|
||||
level_->nodes.push_back(level);
|
||||
stack_.push_back(level_);
|
||||
}
|
||||
level_ = level;
|
||||
|
||||
std::string global_tag;
|
||||
for (level_t* level : stack_) {
|
||||
global_tag += level->tag + ".";
|
||||
}
|
||||
global_tag += tag;
|
||||
(*map_)[global_tag].push_back(level_);
|
||||
}
|
||||
|
||||
void UpLevel() {
|
||||
level_ = stack_.back();
|
||||
stack_.pop_back();
|
||||
}
|
||||
|
||||
std::string CurrentLevel() const { return level_->tag; }
|
||||
|
||||
void AddOption(const std::string& key, const std::string& value) { level_->opts[key] = value; }
|
||||
|
||||
const std::string file_name_;
|
||||
unsigned file_line_;
|
||||
int fd_;
|
||||
|
||||
static const size_t kBufSize = 256;
|
||||
char buffer_[kBufSize];
|
||||
|
||||
unsigned data_size_;
|
||||
unsigned index_;
|
||||
unsigned state_;
|
||||
bool comment_;
|
||||
std::vector<level_t*> stack_;
|
||||
bool included_;
|
||||
level_t* level_;
|
||||
map_t* map_;
|
||||
};
|
||||
|
||||
} // namespace xml
|
||||
|
||||
#endif // TEST_UTIL_XML_H_
|
||||
@@ -0,0 +1,198 @@
|
||||
################################################################################
|
||||
## Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
##
|
||||
## Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
## of this software and associated documentation files (the "Software"), to
|
||||
## deal in the Software without restriction, including without limitation the
|
||||
## rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
## sell copies of the Software, and to permit persons to whom the Software is
|
||||
## furnished to do so, subject to the following conditions:
|
||||
##
|
||||
## The above copyright notice and this permission notice shall be included in
|
||||
## all copies or substantial portions of the Software.
|
||||
##
|
||||
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
## FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
## IN THE SOFTWARE.
|
||||
################################################################################
|
||||
|
||||
get_property(HSA_RUNTIME_INCLUDE_DIRECTORIES TARGET hsa-runtime64::hsa-runtime64 PROPERTY INTERFACE_INCLUDE_DIRECTORIES)
|
||||
|
||||
# Set the HIP language runtime link flags as FindHIP does not set them.
|
||||
set(CMAKE_EXECUTABLE_RUNTIME_HIP_FLAG ${CMAKE_SHARED_LIBRARY_RUNTIME_CXX_FLAG})
|
||||
set(CMAKE_EXECUTABLE_RUNTIME_HIP_FLAG_SEP ${CMAKE_SHARED_LIBRARY_RUNTIME_CXX_FLAG_SEP})
|
||||
set(CMAKE_EXECUTABLE_RPATH_LINK_HIP_FLAG ${CMAKE_SHARED_LIBRARY_RPATH_LINK_CXX_FLAG})
|
||||
|
||||
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${ROCM_PATH}/lib/cmake/hip")
|
||||
set(CMAKE_HIP_ARCHITECTURES OFF)
|
||||
if(DEFINED ROCM_PATH)
|
||||
set(HIP_ROOT_DIR "${ROCM_PATH}/bin")
|
||||
endif()
|
||||
find_package(HIP REQUIRED MODULE)
|
||||
|
||||
find_package(Clang REQUIRED CONFIG
|
||||
PATHS "${ROCM_PATH}"
|
||||
PATH_SUFFIXES "llvm/lib/cmake/clang")
|
||||
|
||||
## Add a custom targets to build and run all the tests
|
||||
add_custom_target(mytest)
|
||||
add_dependencies(mytest roctracer_tool hip_stats)
|
||||
add_custom_target(check COMMAND ${PROJECT_BINARY_DIR}/run.sh DEPENDS mytest)
|
||||
|
||||
## Build MatrixTranspose
|
||||
set_source_files_properties(hip/MatrixTranspose.cpp PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
|
||||
hip_add_executable(MatrixTranspose hip/MatrixTranspose.cpp)
|
||||
## Adding generated build-id as hip_add_executable doesn't generate automatically
|
||||
target_link_options(MatrixTranspose PRIVATE "-Wl,--build-id=md5")
|
||||
target_include_directories(MatrixTranspose PRIVATE ${PROJECT_SOURCE_DIR}/inc)
|
||||
target_link_libraries(MatrixTranspose PRIVATE roctracer roctx)
|
||||
add_dependencies(mytest MatrixTranspose)
|
||||
|
||||
## Build MatrixTranspose_test, MatrixTranspose_hipaact_test and MatrixTranspose_mgpu
|
||||
set_source_files_properties(app/MatrixTranspose_test.cpp PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
|
||||
function(build_matrix_transpose_test OUTPUT_FILE DEFINITIONS)
|
||||
hip_add_executable(${OUTPUT_FILE} app/MatrixTranspose_test.cpp)
|
||||
## Adding generated build-id as hip_add_executable doesn't generate automatically
|
||||
target_link_options(${OUTPUT_FILE} PRIVATE "-Wl,--build-id=md5")
|
||||
target_compile_definitions(${OUTPUT_FILE} PRIVATE ITERATIONS=100 HIP_TEST=1 ${DEFINITIONS})
|
||||
target_include_directories(${OUTPUT_FILE} PRIVATE ${PROJECT_SOURCE_DIR}/inc)
|
||||
target_link_libraries(${OUTPUT_FILE} PRIVATE roctracer roctx)
|
||||
add_dependencies(mytest ${OUTPUT_FILE})
|
||||
endfunction(build_matrix_transpose_test)
|
||||
|
||||
build_matrix_transpose_test(MatrixTranspose_test "")
|
||||
build_matrix_transpose_test(MatrixTranspose_hipaact_test HIP_API_ACTIVITY_ON=1)
|
||||
build_matrix_transpose_test(MatrixTranspose_mgpu MGPU_TEST=1)
|
||||
|
||||
## Build MatrixTranspose MatrixTranspose_ctest
|
||||
add_custom_command(OUTPUT MatrixTranspose.c
|
||||
COMMAND ${CMAKE_COMMAND} -E create_symlink ${CMAKE_CURRENT_SOURCE_DIR}/app/MatrixTranspose_test.cpp MatrixTranspose.c)
|
||||
hip_add_executable(MatrixTranspose_ctest MatrixTranspose.c)
|
||||
## Adding generated build-id as hip_add_executable doesn't generate automatically
|
||||
target_link_options(MatrixTranspose_ctest PRIVATE "-Wl,--build-id=md5")
|
||||
target_compile_definitions(MatrixTranspose_ctest PRIVATE HIP_TEST=0 __HIP_PLATFORM_AMD__)
|
||||
target_include_directories(MatrixTranspose_ctest PRIVATE ${PROJECT_SOURCE_DIR}/inc)
|
||||
target_link_libraries(MatrixTranspose_ctest PRIVATE roctracer roctx)
|
||||
add_dependencies(mytest MatrixTranspose_ctest)
|
||||
|
||||
## Build codeobj event test
|
||||
add_library(codeobj_test SHARED app/codeobj_test.cpp)
|
||||
target_include_directories(codeobj_test PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} ${PROJECT_SOURCE_DIR} ${PROJECT_SOURCE_DIR}/inc)
|
||||
target_link_libraries(codeobj_test roctracer)
|
||||
add_dependencies(mytest codeobj_test)
|
||||
install(TARGETS codeobj_test DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/test COMPONENT tests)
|
||||
|
||||
## Build the hsa (standalone) copy test
|
||||
function(generate_hsaco TARGET_ID INPUT_FILE OUTPUT_FILE)
|
||||
separate_arguments(CLANG_ARG_LIST UNIX_COMMAND
|
||||
"-O2 -x cl -Xclang -finclude-default-header -cl-denorms-are-zero -cl-std=CL2.0 -Wl,--build-id=sha1
|
||||
-target amdgcn-amd-amdhsa -mcpu=${TARGET_ID} -o ${OUTPUT_FILE} ${INPUT_FILE}")
|
||||
add_custom_command(OUTPUT ${PROJECT_BINARY_DIR}/${OUTPUT_FILE}
|
||||
COMMAND clang ${CLANG_ARG_LIST}
|
||||
WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
|
||||
DEPENDS ${INPUT_FILE} clang
|
||||
COMMENT "Building ${OUTPUT_FILE}..."
|
||||
VERBATIM)
|
||||
install(FILES ${PROJECT_BINARY_DIR}/${OUTPUT_FILE} DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME} COMPONENT tests)
|
||||
set(HSACO_TARGET_LIST ${HSACO_TARGET_LIST} ${PROJECT_BINARY_DIR}/${OUTPUT_FILE} PARENT_SCOPE)
|
||||
endfunction(generate_hsaco)
|
||||
|
||||
separate_arguments(GPU_TARGETS)
|
||||
list(LENGTH GPU_TARGETS list_count)
|
||||
if (${list_count} LESS_EQUAL 1)
|
||||
string(REPLACE " " ";" GPU_LIST "${GPU_TARGETS}")
|
||||
string(REPLACE "," ";" GPU_LIST "${GPU_TARGETS}")
|
||||
else()
|
||||
set(GPU_LIST ${GPU_TARGETS})
|
||||
endif()
|
||||
|
||||
foreach(target_id ${GPU_LIST})
|
||||
## generate kernel bitcodes
|
||||
generate_hsaco(${target_id} ${CMAKE_CURRENT_SOURCE_DIR}/hsa/copy.cl ${target_id}_copy.hsaco)
|
||||
endforeach(target_id)
|
||||
add_custom_target(hsaco_targets DEPENDS ${HSACO_TARGET_LIST})
|
||||
|
||||
add_executable(copy hsa/copy.cpp)
|
||||
target_link_libraries(copy hsa-runtime64::hsa-runtime64 Threads::Threads dl)
|
||||
add_dependencies(copy hsaco_targets)
|
||||
add_dependencies(mytest copy)
|
||||
|
||||
## Build the ROCTX test
|
||||
set_source_files_properties(app/roctx_test.cpp PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
|
||||
hip_add_executable(roctx_test app/roctx_test.cpp)
|
||||
## Adding generated build-id as hip_add_executable doesn't generate automatically
|
||||
target_link_options(roctx_test PRIVATE "-Wl,--build-id=md5")
|
||||
target_link_libraries(roctx_test Threads::Threads roctx)
|
||||
add_dependencies(mytest roctx_test)
|
||||
|
||||
## Build the backward compatibility test
|
||||
add_executable(backward_compat_test app/backward_compat_test.cpp)
|
||||
target_link_libraries(backward_compat_test roctracer)
|
||||
add_dependencies(mytest backward_compat_test)
|
||||
|
||||
add_executable(load_unload_reload_test hsa/load_unload_reload.cpp)
|
||||
target_link_libraries(load_unload_reload_test hsa-runtime64::hsa-runtime64)
|
||||
add_dependencies(mytest load_unload_reload_test)
|
||||
|
||||
## Build the trace_buffer test
|
||||
add_executable(trace_buffer directed/trace_buffer.cpp)
|
||||
target_include_directories(trace_buffer PRIVATE ${PROJECT_SOURCE_DIR}/src/tracer_tool)
|
||||
target_link_libraries(trace_buffer Threads::Threads atomic)
|
||||
add_dependencies(mytest trace_buffer)
|
||||
|
||||
## Build the memory_pool test
|
||||
add_executable(memory_pool directed/memory_pool.cpp)
|
||||
target_include_directories(memory_pool PRIVATE ${PROJECT_SOURCE_DIR}/src/roctracer ${PROJECT_SOURCE_DIR}/inc)
|
||||
target_link_libraries(memory_pool Threads::Threads atomic)
|
||||
add_dependencies(mytest memory_pool)
|
||||
|
||||
## Build the activity_and_callback test
|
||||
set_source_files_properties(directed/activity_and_callback.cpp PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
|
||||
hip_add_executable(activity_and_callback directed/activity_and_callback.cpp)
|
||||
## Adding generated build-id as hip_add_executable doesn't generate automatically
|
||||
target_link_options(activity_and_callback PRIVATE "-Wl,--build-id=md5")
|
||||
target_link_libraries(activity_and_callback roctracer)
|
||||
add_dependencies(mytest activity_and_callback)
|
||||
|
||||
## Build the multi_pool_activities test
|
||||
set_source_files_properties(directed/multi_pool_activities.cpp PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
|
||||
hip_add_executable(multi_pool_activities directed/multi_pool_activities.cpp)
|
||||
## Adding generated build-id as hip_add_executable doesn't generate automatically
|
||||
target_link_options(multi_pool_activities PRIVATE "-Wl,--build-id=md5")
|
||||
target_link_libraries(multi_pool_activities roctracer)
|
||||
add_dependencies(mytest multi_pool_activities)
|
||||
|
||||
## Build the dlopen test
|
||||
add_executable(dlopen directed/dlopen.cpp)
|
||||
target_include_directories(dlopen PRIVATE ${PROJECT_SOURCE_DIR}/inc ${HSA_RUNTIME_INCLUDE_DIRECTORIES})
|
||||
target_link_libraries(dlopen dl)
|
||||
add_dependencies(mytest dlopen)
|
||||
|
||||
## Copy the golden traces and test scripts
|
||||
configure_file(run.sh ${PROJECT_BINARY_DIR} COPYONLY)
|
||||
execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink run.sh ${PROJECT_BINARY_DIR}/run_ci.sh)
|
||||
install(PROGRAMS ${PROJECT_BINARY_DIR}/run.sh RENAME "run_tests.sh" DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME} COMPONENT tests)
|
||||
|
||||
configure_file(${PROJECT_SOURCE_DIR}/script/check_trace.py ${PROJECT_BINARY_DIR}/test/check_trace.py COPYONLY)
|
||||
install(PROGRAMS ${PROJECT_BINARY_DIR}/test/check_trace.py DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/test COMPONENT tests)
|
||||
|
||||
file(GLOB files RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "golden_traces/tests_trace_cmp_levels.txt" "golden_traces/*_trace.txt")
|
||||
foreach(file ${files})
|
||||
configure_file(${file} ${PROJECT_BINARY_DIR}/test/${file} COPYONLY)
|
||||
endforeach()
|
||||
install(DIRECTORY ${PROJECT_BINARY_DIR}/test/golden_traces DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/test COMPONENT tests)
|
||||
|
||||
# install all executables
|
||||
set(all_executables)
|
||||
get_property(all_targets DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY BUILDSYSTEM_TARGETS)
|
||||
foreach(target IN LISTS all_targets)
|
||||
get_target_property(target_type ${target} TYPE)
|
||||
if (target_type STREQUAL "EXECUTABLE")
|
||||
list(APPEND all_executables ${target})
|
||||
endif()
|
||||
endforeach()
|
||||
install(TARGETS ${all_executables} DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/test COMPONENT tests)
|
||||
@@ -0,0 +1,403 @@
|
||||
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
#include <cstdlib>
|
||||
using namespace std;
|
||||
#else
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
|
||||
// roctx header file
|
||||
#include <roctx.h>
|
||||
// roctracer extension API
|
||||
#include <roctracer_ext.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
static thread_local const size_t msg_size = 512;
|
||||
static thread_local char* msg_buf = NULL;
|
||||
static thread_local char* message = NULL;
|
||||
#else
|
||||
static const size_t msg_size = 512;
|
||||
static char* msg_buf = NULL;
|
||||
static char* message = NULL;
|
||||
#endif
|
||||
void SPRINT(const char* fmt, ...) {
|
||||
if (msg_buf == NULL) {
|
||||
msg_buf = (char*)calloc(msg_size, 1);
|
||||
message = msg_buf;
|
||||
}
|
||||
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
message += vsnprintf(message, msg_size - (message - msg_buf), fmt, args);
|
||||
va_end(args);
|
||||
}
|
||||
void SFLUSH() {
|
||||
if (msg_buf == NULL) abort();
|
||||
message = msg_buf;
|
||||
msg_buf[msg_size - 1] = 0;
|
||||
fprintf(stdout, "%s", msg_buf);
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
#if HIP_TEST
|
||||
// hip header file
|
||||
#include <hip/hip_runtime.h>
|
||||
// Macro to call HIP API
|
||||
#define CALL_HIP(call) \
|
||||
do { \
|
||||
call; \
|
||||
} while (0);
|
||||
#define CHECK_HIP(call) \
|
||||
do { \
|
||||
hipError_t err = call; \
|
||||
if (err != hipSuccess) { \
|
||||
fprintf(stderr, "%s\n", hipGetErrorString(err)); \
|
||||
abort(); \
|
||||
} \
|
||||
} while (0)
|
||||
#else
|
||||
#define CALL_HIP(call) \
|
||||
do { \
|
||||
} while (0)
|
||||
#define CHECK_HIP(call) \
|
||||
do { \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
#ifndef ITERATIONS
|
||||
#define ITERATIONS 101
|
||||
#endif
|
||||
#define WIDTH 1024
|
||||
#define NUM (WIDTH * WIDTH)
|
||||
#define THREADS_PER_BLOCK_X 4
|
||||
#define THREADS_PER_BLOCK_Y 4
|
||||
#define THREADS_PER_BLOCK_Z 1
|
||||
|
||||
#if HIP_TEST
|
||||
// Device (Kernel) function, it must be void
|
||||
__global__ void matrixTranspose(float* out, float* in, const int width) {
|
||||
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
|
||||
int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
|
||||
|
||||
out[y * width + x] = in[x * width + y];
|
||||
}
|
||||
#endif
|
||||
|
||||
// CPU implementation of matrix transpose
|
||||
void matrixTransposeCPUReference(float* output, float* input, const unsigned int width) {
|
||||
for (unsigned int j = 0; j < width; j++) {
|
||||
for (unsigned int i = 0; i < width; i++) {
|
||||
output[i * width + j] = input[j * width + i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int iterations = ITERATIONS;
|
||||
void init_tracing();
|
||||
void start_tracing();
|
||||
void stop_tracing();
|
||||
|
||||
int main() {
|
||||
float* Matrix;
|
||||
float* TransposeMatrix;
|
||||
float* cpuTransposeMatrix;
|
||||
|
||||
#if HIP_TEST
|
||||
float* gpuMatrix;
|
||||
float* gpuTransposeMatrix;
|
||||
#endif
|
||||
|
||||
int i;
|
||||
int errors = 0;
|
||||
|
||||
init_tracing();
|
||||
|
||||
#if HIP_TEST
|
||||
int gpuCount = 1;
|
||||
#if MGPU_TEST
|
||||
hipGetDeviceCount(&gpuCount);
|
||||
fprintf(stderr, "Number of GPUs: %d\n", gpuCount);
|
||||
#endif
|
||||
iterations *= gpuCount;
|
||||
#endif
|
||||
|
||||
while (iterations-- > 0) {
|
||||
start_tracing();
|
||||
|
||||
#if HIP_TEST
|
||||
// set GPU
|
||||
const int devIndex = iterations % gpuCount;
|
||||
hipSetDevice(devIndex);
|
||||
|
||||
hipDeviceProp_t devProp;
|
||||
CHECK_HIP(hipGetDeviceProperties(&devProp, 0));
|
||||
fprintf(stderr, "Device %d name: %s\n", devIndex, devProp.name);
|
||||
#endif
|
||||
|
||||
Matrix = (float*)malloc(NUM * sizeof(float));
|
||||
TransposeMatrix = (float*)malloc(NUM * sizeof(float));
|
||||
cpuTransposeMatrix = (float*)malloc(NUM * sizeof(float));
|
||||
|
||||
// initialize the input data
|
||||
for (i = 0; i < NUM; i++) {
|
||||
Matrix[i] = (float)i * 10.0f;
|
||||
}
|
||||
|
||||
// allocate the memory on the device side
|
||||
CHECK_HIP(hipMalloc((void**)&gpuMatrix, NUM * sizeof(float)));
|
||||
CHECK_HIP(hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)));
|
||||
|
||||
// correlation reagion32
|
||||
roctracer_activity_push_external_correlation_id(31);
|
||||
// correlation reagion32
|
||||
roctracer_activity_push_external_correlation_id(32);
|
||||
|
||||
// Memory transfer from host to device
|
||||
CHECK_HIP(hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice));
|
||||
|
||||
// correlation reagion33
|
||||
roctracer_activity_push_external_correlation_id(33);
|
||||
|
||||
roctxMark("before hipLaunchKernel");
|
||||
roctxRangePush("hipLaunchKernel");
|
||||
|
||||
// Lauching kernel from host
|
||||
CALL_HIP(hipLaunchKernelGGL(matrixTranspose,
|
||||
dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y),
|
||||
dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0,
|
||||
gpuTransposeMatrix, gpuMatrix, WIDTH));
|
||||
|
||||
roctxMark("after hipLaunchKernel");
|
||||
|
||||
// correlation reagion end
|
||||
roctracer_activity_pop_external_correlation_id(NULL);
|
||||
|
||||
// Memory transfer from device to host
|
||||
roctxRangePush("hipMemcpy");
|
||||
|
||||
CHECK_HIP(
|
||||
hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost));
|
||||
|
||||
roctxRangePop(); // for "hipMemcpy"
|
||||
roctxRangePop(); // for "hipLaunchKernel"
|
||||
|
||||
// correlation reagion end
|
||||
roctracer_activity_pop_external_correlation_id(NULL);
|
||||
|
||||
// CPU MatrixTranspose computation
|
||||
#if HIP_TEST
|
||||
matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH);
|
||||
|
||||
// verify the results
|
||||
double eps = 1.0E-6;
|
||||
for (i = 0; i < NUM; i++) {
|
||||
if (abs((double)TransposeMatrix[i] - cpuTransposeMatrix[i]) > eps) {
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
if (errors != 0) {
|
||||
fprintf(stderr, "FAILED: %d errors\n", errors);
|
||||
} else {
|
||||
errors = 0;
|
||||
fprintf(stderr, "PASSED!\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
// free the resources on device side
|
||||
CHECK_HIP(hipFree(gpuMatrix));
|
||||
CHECK_HIP(hipFree(gpuTransposeMatrix));
|
||||
|
||||
// correlation reagion end
|
||||
roctracer_activity_pop_external_correlation_id(NULL);
|
||||
// correlation reagion end
|
||||
roctracer_activity_pop_external_correlation_id(NULL);
|
||||
|
||||
// free the resources on host side
|
||||
free(Matrix);
|
||||
free(TransposeMatrix);
|
||||
free(cpuTransposeMatrix);
|
||||
}
|
||||
|
||||
stop_tracing();
|
||||
|
||||
return errors;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// HIP Callbacks/Activity tracing
|
||||
//
|
||||
#if 1
|
||||
#include <roctracer_hip.h>
|
||||
#include <roctracer_hsa.h>
|
||||
#include <roctracer_roctx.h>
|
||||
|
||||
#include <unistd.h>
|
||||
#include <sys/syscall.h> /* For SYS_xxx definitions */
|
||||
|
||||
// Macro to check ROC-tracer calls status
|
||||
#define CHECK_ROCTRACER(call) \
|
||||
do { \
|
||||
int err = call; \
|
||||
if (err != 0) { \
|
||||
fprintf(stderr, "%s\n", roctracer_error_string()); \
|
||||
abort(); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static inline uint32_t GetTid() { return syscall(__NR_gettid); }
|
||||
static inline uint32_t GetPid() { return syscall(__NR_getpid); }
|
||||
|
||||
|
||||
// Runtime API callback function
|
||||
void api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg) {
|
||||
(void)arg;
|
||||
|
||||
if (domain == ACTIVITY_DOMAIN_ROCTX) {
|
||||
const roctx_api_data_t* data = (const roctx_api_data_t*)(callback_data);
|
||||
fprintf(stdout, "rocTX <\"%s pid(%d) tid(%d)\">\n", data->args.message, GetPid(), GetTid());
|
||||
return;
|
||||
}
|
||||
const hip_api_data_t* data = (const hip_api_data_t*)(callback_data);
|
||||
SPRINT("<%s id(%u)\tcorrelation_id(%lu) %s pid(%d) tid(%d)> ",
|
||||
roctracer_op_string(ACTIVITY_DOMAIN_HIP_API, cid, 0), cid, data->correlation_id,
|
||||
(data->phase == ACTIVITY_API_PHASE_ENTER) ? "on-enter" : "on-exit", GetPid(), GetTid());
|
||||
if (data->phase == ACTIVITY_API_PHASE_ENTER) {
|
||||
switch (cid) {
|
||||
case HIP_API_ID_hipMemcpy:
|
||||
SPRINT("dst(%p) src(%p) size(0x%x) kind(%u)", data->args.hipMemcpy.dst,
|
||||
data->args.hipMemcpy.src, (uint32_t)(data->args.hipMemcpy.sizeBytes),
|
||||
(uint32_t)(data->args.hipMemcpy.kind));
|
||||
break;
|
||||
case HIP_API_ID_hipMalloc:
|
||||
SPRINT("ptr(%p) size(0x%x)", data->args.hipMalloc.ptr,
|
||||
(uint32_t)(data->args.hipMalloc.size));
|
||||
break;
|
||||
case HIP_API_ID_hipFree:
|
||||
SPRINT("ptr(%p)", data->args.hipFree.ptr);
|
||||
break;
|
||||
case HIP_API_ID_hipModuleLaunchKernel:
|
||||
SPRINT("kernel(\"%s\") stream(%p)", hipKernelNameRef(data->args.hipModuleLaunchKernel.f),
|
||||
data->args.hipModuleLaunchKernel.stream);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
switch (cid) {
|
||||
case HIP_API_ID_hipMalloc:
|
||||
SPRINT("*ptr(0x%p)", *(data->args.hipMalloc.ptr));
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
SPRINT("\n");
|
||||
SFLUSH();
|
||||
}
|
||||
// Activity tracing callback
|
||||
// hipMalloc id(3) correlation_id(1): begin_ns(1525888652762640464) end_ns(1525888652762877067)
|
||||
void activity_callback(const char* begin, const char* end, void* arg) {
|
||||
const roctracer_record_t* record = (const roctracer_record_t*)(begin);
|
||||
const roctracer_record_t* end_record = (const roctracer_record_t*)(end);
|
||||
|
||||
SPRINT("\tActivity records:\n");
|
||||
while (record < end_record) {
|
||||
const char* name = roctracer_op_string(record->domain, record->op, record->kind);
|
||||
SPRINT("\t%s\tcorrelation_id(%lu) time_ns(%lu:%lu)", name, record->correlation_id,
|
||||
record->begin_ns, record->end_ns);
|
||||
if (record->domain == ACTIVITY_DOMAIN_HIP_API) {
|
||||
SPRINT(" process_id(%u) thread_id(%u)", record->process_id, record->thread_id);
|
||||
} else if (record->domain == ACTIVITY_DOMAIN_HIP_OPS) {
|
||||
SPRINT(" device_id(%d) queue_id(%lu)", record->device_id, record->queue_id);
|
||||
if (record->op == HIP_OP_ID_COPY) SPRINT(" bytes(0x%zx)", record->bytes);
|
||||
} else if (record->domain == ACTIVITY_DOMAIN_HSA_OPS) {
|
||||
SPRINT(" se(%u) cycle(%lu) pc(%lx)", record->pc_sample.se, record->pc_sample.cycle,
|
||||
record->pc_sample.pc);
|
||||
} else if (record->domain == ACTIVITY_DOMAIN_EXT_API) {
|
||||
SPRINT(" external_id(%lu)", record->external_id);
|
||||
} else {
|
||||
fprintf(stdout, "Bad domain %d\n\n", record->domain);
|
||||
abort();
|
||||
}
|
||||
SPRINT("\n");
|
||||
SFLUSH();
|
||||
|
||||
CHECK_ROCTRACER(roctracer_next_record(record, &record));
|
||||
}
|
||||
}
|
||||
|
||||
// Init tracing routine
|
||||
void init_tracing() {
|
||||
fprintf(stderr, "# INIT #############################\n");
|
||||
// roctracer properties
|
||||
roctracer_set_properties(ACTIVITY_DOMAIN_HIP_API, NULL);
|
||||
// Allocating tracing pool
|
||||
roctracer_properties_t properties;
|
||||
memset(&properties, 0, sizeof(roctracer_properties_t));
|
||||
properties.buffer_size = 0x1000;
|
||||
properties.buffer_callback_fun = activity_callback;
|
||||
CHECK_ROCTRACER(roctracer_open_pool(&properties));
|
||||
// Enable HIP API callbacks
|
||||
CHECK_ROCTRACER(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HIP_API, api_callback, NULL));
|
||||
// Enable HIP activity tracing
|
||||
#if HIP_API_ACTIVITY_ON
|
||||
CHECK_ROCTRACER(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_API));
|
||||
#endif
|
||||
CHECK_ROCTRACER(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_OPS));
|
||||
// Enable PC sampling
|
||||
CHECK_ROCTRACER(roctracer_enable_op_activity(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_RESERVED1));
|
||||
// Enable rocTX
|
||||
CHECK_ROCTRACER(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_ROCTX, api_callback, NULL));
|
||||
}
|
||||
|
||||
// Start tracing routine
|
||||
void start_tracing() {
|
||||
fprintf(stderr, "# START (%d) #############################\n", iterations);
|
||||
// Start
|
||||
if ((iterations & 1) == 1)
|
||||
roctracer_start();
|
||||
else
|
||||
roctracer_stop();
|
||||
}
|
||||
|
||||
// Stop tracing routine
|
||||
void stop_tracing() {
|
||||
CHECK_ROCTRACER(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HIP_API));
|
||||
#if HIP_API_ACTIVITY_ON
|
||||
CHECK_ROCTRACER(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HIP_API));
|
||||
#endif
|
||||
CHECK_ROCTRACER(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HIP_OPS));
|
||||
CHECK_ROCTRACER(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HSA_OPS));
|
||||
CHECK_ROCTRACER(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_ROCTX));
|
||||
CHECK_ROCTRACER(roctracer_flush_activity());
|
||||
fprintf(stderr, "# STOP #############################\n");
|
||||
}
|
||||
#else
|
||||
void init_tracing() {}
|
||||
void start_tracing() {}
|
||||
void stop_tracing() {}
|
||||
#endif
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@@ -0,0 +1,32 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "roctracer.h"
|
||||
|
||||
extern "C" int roctracer_load(); // Removed in ROCTX 4.1
|
||||
extern "C" void roctracer_unload(); // Removed in ROCTX 4.1
|
||||
extern "C" void roctracer_flush_buf(); // Removed in ROCTX 4.1
|
||||
|
||||
int main() {
|
||||
if (roctracer_load() != 1) return -1;
|
||||
roctracer_flush_buf();
|
||||
roctracer_unload();
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,69 @@
|
||||
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include <cstring>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
|
||||
#include "roctracer.h"
|
||||
#include "roctracer_hsa.h"
|
||||
|
||||
namespace {
|
||||
// Check returned HSA API status
|
||||
inline void CHECK(roctracer_status_t status) {
|
||||
if (status != ROCTRACER_STATUS_SUCCESS) {
|
||||
fprintf(stderr, "ERROR: %s\n", roctracer_error_string());
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
// codeobj callback
|
||||
void CodeObjectCallback(uint32_t domain, uint32_t cid, const void* data, void* arg) {
|
||||
const hsa_evt_data_t* evt_data = reinterpret_cast<const hsa_evt_data_t*>(data);
|
||||
fprintf(stdout,
|
||||
"codeobj_callback domain(%u) cid(%u): load_base(0x%lx) load_size(0x%lx) "
|
||||
"load_delta(0x%lx) uri(\"%s\") unload(%d)\n",
|
||||
domain, cid, evt_data->codeobj.load_base, evt_data->codeobj.load_size,
|
||||
evt_data->codeobj.load_delta, evt_data->codeobj.uri, evt_data->codeobj.unload);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
#include <hsa/hsa_api_trace.h>
|
||||
|
||||
extern "C" {
|
||||
// The HSA_AMD_TOOL_PRIORITY variable must be a constant value type initialized by the loader
|
||||
// itself, not by code during _init. 'extern const' seems to do that although that is not a
|
||||
// guarantee.
|
||||
ROCTRACER_EXPORT extern const uint32_t HSA_AMD_TOOL_PRIORITY = 1050;
|
||||
|
||||
// HSA-runtime tool on-load method
|
||||
ROCTRACER_EXPORT bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
|
||||
uint64_t failed_tool_count, const char* const* failed_tool_names) {
|
||||
CHECK(roctracer_enable_op_callback(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_CODEOBJ,
|
||||
CodeObjectCallback, nullptr));
|
||||
return true;
|
||||
}
|
||||
|
||||
ROCTRACER_EXPORT void OnUnload() {
|
||||
CHECK(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HSA_EVT));
|
||||
}
|
||||
|
||||
} // extern "C"
|
||||
@@ -0,0 +1,75 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include <hip/hip_runtime.h>
|
||||
|
||||
#include "roctx.h"
|
||||
|
||||
#include <thread>
|
||||
|
||||
#define HIP_CALL(call) \
|
||||
do { \
|
||||
hipError_t err = call; \
|
||||
if (err != hipSuccess) { \
|
||||
fprintf(stderr, "%s\n", hipGetErrorString(err)); \
|
||||
abort(); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
__global__ void kernel() {}
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
HIP_CALL(hipSetDevice(0));
|
||||
|
||||
// Not in a roctx range.
|
||||
kernel<<<1, 1>>>();
|
||||
|
||||
int ret = roctxRangePush("NestedRangeA");
|
||||
|
||||
// In a simple first level roctx range.
|
||||
kernel<<<1, 1>>>();
|
||||
|
||||
if (roctxRangePop() != ret) return -1;
|
||||
|
||||
roctxRangePush("NestedRangeB");
|
||||
roctxRangePush("NestedRangeC");
|
||||
roctx_range_id_t id = roctxRangeStart("StartStopRangeA");
|
||||
|
||||
// In a nested roctx range.
|
||||
kernel<<<1, 1>>>();
|
||||
|
||||
roctxRangePop();
|
||||
roctxRangePop();
|
||||
|
||||
std::thread thread([id]() { roctxRangeStop(id); });
|
||||
thread.join();
|
||||
|
||||
roctxRangePush("NestedRangeD");
|
||||
roctxRangePush("NestedRangeE");
|
||||
roctxRangePop();
|
||||
|
||||
// In a first level roctx range, but after a nested range.
|
||||
kernel<<<1, 1>>>();
|
||||
|
||||
if (roctxRangePop() != 0) return -1;
|
||||
|
||||
HIP_CALL(hipDeviceSynchronize());
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,139 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <roctracer.h>
|
||||
#define HIP_PROF_HIP_API_STRING 1
|
||||
#include <roctracer_hip.h>
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/syscall.h>
|
||||
|
||||
__global__ void kernel() {}
|
||||
|
||||
template <typename T> inline void CHECK(T status);
|
||||
|
||||
template <> inline void CHECK(hipError_t err) {
|
||||
if (err != hipSuccess) {
|
||||
std::cerr << hipGetErrorString(err) << std::endl;
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
template <> inline void CHECK(roctracer_status_t status) {
|
||||
if (status != ROCTRACER_STATUS_SUCCESS) {
|
||||
std::cerr << roctracer_error_string() << std::endl;
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
uint32_t GetPid() {
|
||||
static auto pid = syscall(__NR_getpid);
|
||||
return pid;
|
||||
}
|
||||
uint32_t GetTid() {
|
||||
static thread_local auto tid = syscall(__NR_gettid);
|
||||
return tid;
|
||||
}
|
||||
|
||||
void hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg) {
|
||||
const hip_api_data_t* data = static_cast<const hip_api_data_t*>(callback_data);
|
||||
fprintf(stdout, "<%s id(%u)\tcorrelation_id(%lu) %s pid(%d) tid(%d)>\n",
|
||||
roctracer_op_string(domain, cid, 0), cid, data->correlation_id,
|
||||
(data->phase == ACTIVITY_API_PHASE_ENTER) ? "on-enter" : "on-exit", GetPid(), GetTid());
|
||||
}
|
||||
|
||||
void buffer_callback(const char* begin, const char* end, void* arg) {
|
||||
for (const roctracer_record_t* record = (const roctracer_record_t*)begin;
|
||||
record < (const roctracer_record_t*)end; CHECK(roctracer_next_record(record, &record))) {
|
||||
fprintf(stdout, "\t%s\tcorrelation_id(%lu) time_ns(%lu:%lu)\n",
|
||||
roctracer_op_string(record->domain, record->op, record->kind), record->correlation_id,
|
||||
record->begin_ns, record->end_ns);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
int main() {
|
||||
CHECK(hipSetDevice(0));
|
||||
|
||||
roctracer_properties_t properties{};
|
||||
properties.buffer_callback_fun = buffer_callback;
|
||||
properties.buffer_callback_arg = nullptr;
|
||||
properties.buffer_size = 1024;
|
||||
CHECK(roctracer_open_pool(&properties));
|
||||
|
||||
// 1: callbacks only
|
||||
CHECK(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HIP_API, hip_api_callback, nullptr));
|
||||
CHECK(hipSetDevice(0));
|
||||
kernel<<<1, 1>>>();
|
||||
CHECK(hipDeviceSynchronize());
|
||||
CHECK(roctracer_flush_activity());
|
||||
|
||||
// 2: callbacks and activities
|
||||
CHECK(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_API));
|
||||
CHECK(hipSetDevice(0));
|
||||
kernel<<<1, 1>>>();
|
||||
CHECK(hipDeviceSynchronize());
|
||||
CHECK(roctracer_flush_activity());
|
||||
|
||||
// 3: activities only
|
||||
CHECK(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HIP_API));
|
||||
CHECK(hipSetDevice(0));
|
||||
kernel<<<1, 1>>>();
|
||||
CHECK(hipDeviceSynchronize());
|
||||
CHECK(roctracer_flush_activity());
|
||||
|
||||
// 4: callbacks only
|
||||
CHECK(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HIP_API, hip_api_callback, nullptr));
|
||||
CHECK(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HIP_API));
|
||||
CHECK(hipSetDevice(0));
|
||||
kernel<<<1, 1>>>();
|
||||
CHECK(hipDeviceSynchronize());
|
||||
CHECK(roctracer_flush_activity());
|
||||
|
||||
// 5: callbacks and activities
|
||||
CHECK(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_API));
|
||||
CHECK(hipSetDevice(0));
|
||||
kernel<<<1, 1>>>();
|
||||
CHECK(hipDeviceSynchronize());
|
||||
CHECK(roctracer_flush_activity());
|
||||
|
||||
// 6: callbacks only
|
||||
CHECK(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HIP_API));
|
||||
CHECK(hipSetDevice(0));
|
||||
kernel<<<1, 1>>>();
|
||||
CHECK(hipDeviceSynchronize());
|
||||
CHECK(roctracer_flush_activity());
|
||||
|
||||
// 7: none
|
||||
CHECK(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HIP_API));
|
||||
CHECK(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HIP_API));
|
||||
CHECK(hipSetDevice(0));
|
||||
kernel<<<1, 1>>>();
|
||||
CHECK(hipDeviceSynchronize());
|
||||
CHECK(roctracer_flush_activity());
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,94 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
|
||||
#include "roctracer.h"
|
||||
|
||||
#include <dlfcn.h>
|
||||
#include <hsa/hsa.h>
|
||||
|
||||
#include <cassert>
|
||||
|
||||
using get_timestamp_t = decltype(roctracer_get_timestamp);
|
||||
using hsa_init_t = decltype(hsa_init);
|
||||
using hsa_shut_down_t = decltype(hsa_shut_down);
|
||||
|
||||
int main() {
|
||||
// CASE 1: HSA is not loaded.
|
||||
//
|
||||
{
|
||||
void* tracer_library = dlopen("libroctracer64.so", RTLD_LAZY);
|
||||
assert(tracer_library != nullptr);
|
||||
|
||||
auto* get_timestamp =
|
||||
reinterpret_cast<get_timestamp_t*>(dlsym(tracer_library, "roctracer_get_timestamp"));
|
||||
assert(get_timestamp != nullptr);
|
||||
|
||||
roctracer_timestamp_t timestamp;
|
||||
(*get_timestamp)(×tamp);
|
||||
dlclose(tracer_library);
|
||||
}
|
||||
|
||||
// CASE 2 Load the roctracer after hsa_init().
|
||||
//
|
||||
void* hsa_library = dlopen("libhsa-runtime64.so.1", RTLD_LAZY);
|
||||
assert(hsa_library != nullptr);
|
||||
|
||||
auto* hsa_init = reinterpret_cast<hsa_init_t*>(dlsym(hsa_library, "hsa_init"));
|
||||
auto* hsa_shut_down = reinterpret_cast<hsa_shut_down_t*>(dlsym(hsa_library, "hsa_shut_down"));
|
||||
assert(hsa_init != nullptr && hsa_shut_down != nullptr);
|
||||
|
||||
{
|
||||
(*hsa_init)();
|
||||
|
||||
void* tracer_library = dlopen("libroctracer64.so", RTLD_LAZY);
|
||||
assert(tracer_library != nullptr);
|
||||
|
||||
auto* get_timestamp =
|
||||
reinterpret_cast<get_timestamp_t*>(dlsym(tracer_library, "roctracer_get_timestamp"));
|
||||
assert(get_timestamp != nullptr);
|
||||
|
||||
roctracer_timestamp_t timestamp;
|
||||
(*get_timestamp)(×tamp);
|
||||
|
||||
dlclose(tracer_library);
|
||||
(*hsa_shut_down)();
|
||||
}
|
||||
|
||||
// CASE 3: Load and use the roctracer before hsa_init().
|
||||
//
|
||||
{
|
||||
void* tracer_library = dlopen("libroctracer64.so", RTLD_LAZY);
|
||||
assert(tracer_library != nullptr);
|
||||
|
||||
auto* get_timestamp =
|
||||
reinterpret_cast<get_timestamp_t*>(dlsym(tracer_library, "roctracer_get_timestamp"));
|
||||
assert(get_timestamp != nullptr);
|
||||
|
||||
roctracer_timestamp_t timestamp;
|
||||
(*get_timestamp)(×tamp);
|
||||
|
||||
(*hsa_init)();
|
||||
(*hsa_shut_down)();
|
||||
dlclose(tracer_library);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,125 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "roctracer.h"
|
||||
#include "memory_pool.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <atomic>
|
||||
#include <cstdlib>
|
||||
#include <iterator>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
using namespace roctracer;
|
||||
|
||||
namespace {
|
||||
|
||||
std::ifstream cpuinfo("/proc/cpuinfo");
|
||||
const std::size_t num_cpu_cores =
|
||||
std::count(std::istream_iterator<std::string>(cpuinfo), std::istream_iterator<std::string>(),
|
||||
std::string("processor"));
|
||||
|
||||
constexpr std::size_t num_iterations = 1000;
|
||||
constexpr std::size_t min_num_threads = 10;
|
||||
constexpr std::size_t max_num_threads = 50;
|
||||
|
||||
void fatal_error(const char* message) {
|
||||
std::cerr << message << std::endl;
|
||||
abort();
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
int main() {
|
||||
constexpr size_t buffer_size = 10 * sizeof(roctracer_record_t);
|
||||
constexpr size_t max_data_size = buffer_size - sizeof(roctracer_record_t);
|
||||
|
||||
size_t flush_count = 0, record_count = 0;
|
||||
auto flush_callback = [&flush_count, &record_count](const char* begin, const char* end) {
|
||||
++flush_count;
|
||||
std::this_thread::sleep_for(std::chrono::microseconds(10));
|
||||
record_count += (end - begin) / sizeof(roctracer_record_t);
|
||||
};
|
||||
|
||||
roctracer_properties_t properties{};
|
||||
properties.buffer_callback_fun = [](const char* begin, const char* end, void* arg) {
|
||||
(*static_cast<decltype(flush_callback)*>(arg))(begin, end);
|
||||
};
|
||||
properties.buffer_callback_arg = &flush_callback;
|
||||
properties.buffer_size = buffer_size;
|
||||
MemoryPool pool(properties);
|
||||
|
||||
const void* original_data;
|
||||
std::atomic<int> relocation_count{0};
|
||||
auto relocate_data = [&relocation_count, &original_data](roctracer_record_t&, const void* data) {
|
||||
if (data != original_data) ++relocation_count;
|
||||
};
|
||||
|
||||
// test1: the record and data fit in the buffer: no flush, data should get relocated.
|
||||
constexpr char data_fits[max_data_size] = {0};
|
||||
original_data = data_fits;
|
||||
pool.Write(roctracer_record_t{}, data_fits, sizeof(data_fits), relocate_data); // F=0, R=1
|
||||
pool.Flush(); // F=1, R=1
|
||||
if (flush_count != 1 || relocation_count != 1) fatal_error("failed test1");
|
||||
|
||||
flush_count = record_count = relocation_count = 0;
|
||||
|
||||
// test2: the records and data do not fit in the buffer: 1 flush, data should get relocated.
|
||||
pool.Write(roctracer_record_t{}); // F=0, R=0
|
||||
pool.Write(roctracer_record_t{}, data_fits, sizeof(data_fits), relocate_data); // F=1, R=1
|
||||
pool.Flush(); // F=2, R=1
|
||||
if (flush_count != 2 || relocation_count != 1) fatal_error("failed test2");
|
||||
|
||||
flush_count = record_count = relocation_count = 0;
|
||||
|
||||
// test3: data does not fit in the buffer: 1 Flush, data is not relocated, all records should be
|
||||
// processed.
|
||||
constexpr char does_not_fit[max_data_size + 1] = {0};
|
||||
original_data = does_not_fit;
|
||||
pool.Write(roctracer_record_t{}, does_not_fit, sizeof(does_not_fit), relocate_data); // F=1, R=0
|
||||
if (flush_count != 1 || relocation_count != 0 || record_count != 1) fatal_error("failed test3");
|
||||
|
||||
flush_count = record_count = relocation_count = 0;
|
||||
|
||||
// test4: stress test writing and flushing.
|
||||
const std::size_t num_threads = std::clamp(num_cpu_cores, min_num_threads, max_num_threads);
|
||||
std::vector<std::thread> threads(num_threads);
|
||||
|
||||
// Start the worker threads. Each thread will write 'num_iterations' records in the memory
|
||||
// pool, then exit.
|
||||
for (auto&& thread : threads) {
|
||||
thread = std::thread([&pool]() {
|
||||
for (std::size_t j = 0; j < num_iterations; ++j) pool.Write(roctracer_record_t{});
|
||||
});
|
||||
}
|
||||
|
||||
// Wait for all the threads to complete, then flush the trace buffer.
|
||||
for (auto&& thread : threads) thread.join();
|
||||
pool.Flush();
|
||||
|
||||
if (record_count != num_iterations * threads.size() ||
|
||||
flush_count != (record_count / (buffer_size / sizeof(roctracer_record_t))))
|
||||
fatal_error("failed test4");
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,94 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <roctracer.h>
|
||||
#include <roctracer_hip.h>
|
||||
|
||||
#include <iostream>
|
||||
|
||||
// This test checks that asynchronous activities can be enabled in distinct memory pools. It enables
|
||||
// activity reporting for HIP kernel dispatches in one memory pool, and memory copy reporting in
|
||||
// another memory pool. The output of this test to stdout should be a series of kernel dispatch
|
||||
// records (10) followed by a series of memory copy records (10). The records should not be
|
||||
// interleaved.
|
||||
|
||||
__global__ void kernel(void* global_memory) {}
|
||||
|
||||
namespace {
|
||||
|
||||
template <typename T> inline void CHECK(T status);
|
||||
|
||||
template <> inline void CHECK(hipError_t err) {
|
||||
if (err != hipSuccess) {
|
||||
std::cerr << hipGetErrorString(err) << std::endl;
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
template <> inline void CHECK(roctracer_status_t status) {
|
||||
if (status != ROCTRACER_STATUS_SUCCESS) {
|
||||
std::cerr << roctracer_error_string() << std::endl;
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
void buffer_callback(const char* begin, const char* end, void* arg) {
|
||||
for (const roctracer_record_t* record = (const roctracer_record_t*)begin;
|
||||
record != (const roctracer_record_t*)end; CHECK(roctracer_next_record(record, &record))) {
|
||||
fprintf(stdout, "\t:%s\t: correlation_id(%lu) time_ns(%lu:%lu)\n",
|
||||
roctracer_op_string(record->domain, record->op, record->kind), record->correlation_id,
|
||||
record->begin_ns, record->end_ns);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
int main() {
|
||||
CHECK(hipSetDevice(0));
|
||||
|
||||
roctracer_properties_t properties{};
|
||||
properties.buffer_callback_fun = buffer_callback;
|
||||
properties.buffer_callback_arg = nullptr;
|
||||
properties.buffer_size = 1024 * 1024;
|
||||
|
||||
roctracer_pool_t* pool_1;
|
||||
CHECK(roctracer_open_pool_expl(&properties, &pool_1));
|
||||
CHECK(roctracer_enable_op_activity_expl(ACTIVITY_DOMAIN_HIP_OPS, HIP_OP_ID_DISPATCH, pool_1));
|
||||
|
||||
roctracer_pool_t* pool_2;
|
||||
CHECK(roctracer_open_pool_expl(&properties, &pool_2));
|
||||
CHECK(roctracer_enable_op_activity_expl(ACTIVITY_DOMAIN_HIP_OPS, HIP_OP_ID_COPY, pool_2));
|
||||
CHECK(roctracer_enable_op_activity_expl(ACTIVITY_DOMAIN_HIP_API, HIP_API_ID_hipMemcpy, pool_2));
|
||||
|
||||
int host_array[256] = {0};
|
||||
int* device_memory;
|
||||
CHECK(hipMalloc(&device_memory, sizeof(host_array)));
|
||||
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
CHECK(hipMemcpy(device_memory, host_array, sizeof(host_array), hipMemcpyHostToDevice));
|
||||
kernel<<<1, 1>>>(device_memory);
|
||||
}
|
||||
CHECK(hipDeviceSynchronize());
|
||||
|
||||
CHECK(roctracer_flush_activity_expl(pool_1));
|
||||
CHECK(roctracer_flush_activity_expl(pool_2));
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,79 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "trace_buffer.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <atomic>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <iterator>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
|
||||
struct TraceEntry {
|
||||
std::atomic<roctracer::TraceEntryState> valid;
|
||||
};
|
||||
|
||||
TRACE_BUFFER_INSTANTIATE();
|
||||
|
||||
namespace {
|
||||
|
||||
std::ifstream cpuinfo("/proc/cpuinfo");
|
||||
const std::size_t num_cpu_cores =
|
||||
std::count(std::istream_iterator<std::string>(cpuinfo), std::istream_iterator<std::string>(),
|
||||
std::string("processor"));
|
||||
|
||||
constexpr std::size_t num_iterations = 1000;
|
||||
constexpr std::size_t min_num_threads = 10;
|
||||
constexpr std::size_t max_num_threads = 50;
|
||||
|
||||
} // namespace
|
||||
|
||||
int main() {
|
||||
const std::size_t num_threads = std::clamp(num_cpu_cores, min_num_threads, max_num_threads);
|
||||
std::vector<std::thread> threads(num_threads);
|
||||
|
||||
std::atomic<size_t> flush_count(0); // Count the number of times the flush callback is called.
|
||||
roctracer::TraceBuffer<TraceEntry> trace_buffer("Test", 10,
|
||||
[&flush_count](auto* entry) { ++flush_count; });
|
||||
|
||||
// Start the worker threads. Each thread will request 'num_iterations' entries from the
|
||||
// 'trace_buffer', then exit.
|
||||
for (auto&& thread : threads) {
|
||||
thread = std::thread([&trace_buffer]() {
|
||||
for (std::size_t j = 0; j < num_iterations; ++j) {
|
||||
auto& entry = trace_buffer.Emplace();
|
||||
entry.valid.store(roctracer::TRACE_ENTRY_COMPLETE, std::memory_order_release);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// Wait for all the threads to complete, then flush the trace buffer.
|
||||
for (auto&& thread : threads) thread.join();
|
||||
trace_buffer.Flush();
|
||||
|
||||
std::cout << "number of records flushed = " << flush_count << std::endl;
|
||||
if (flush_count != num_iterations * threads.size()) abort();
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
@@ -0,0 +1,503 @@
|
||||
+ LD_PRELOAD=libkfdwrapper64.so ./test/MatrixTranspose_ctest
|
||||
# INIT #############################
|
||||
# START (99) #############################
|
||||
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipMemcpy pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
PASSED!
|
||||
# START (98) #############################
|
||||
PASSED!
|
||||
# START (97) #############################
|
||||
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipMemcpy pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
PASSED!
|
||||
# START (96) #############################
|
||||
PASSED!
|
||||
# START (95) #############################
|
||||
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipMemcpy pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
PASSED!
|
||||
# START (94) #############################
|
||||
PASSED!
|
||||
# START (93) #############################
|
||||
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipMemcpy pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
PASSED!
|
||||
# START (92) #############################
|
||||
PASSED!
|
||||
# START (91) #############################
|
||||
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipMemcpy pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
PASSED!
|
||||
# START (90) #############################
|
||||
PASSED!
|
||||
# START (89) #############################
|
||||
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipMemcpy pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
PASSED!
|
||||
# START (88) #############################
|
||||
PASSED!
|
||||
# START (87) #############################
|
||||
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipMemcpy pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
PASSED!
|
||||
# START (86) #############################
|
||||
PASSED!
|
||||
# START (85) #############################
|
||||
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipMemcpy pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
PASSED!
|
||||
# START (84) #############################
|
||||
PASSED!
|
||||
# START (83) #############################
|
||||
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipMemcpy pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
PASSED!
|
||||
# START (82) #############################
|
||||
PASSED!
|
||||
# START (81) #############################
|
||||
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipMemcpy pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
PASSED!
|
||||
# START (80) #############################
|
||||
PASSED!
|
||||
# START (79) #############################
|
||||
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipMemcpy pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
PASSED!
|
||||
# START (78) #############################
|
||||
PASSED!
|
||||
# START (77) #############################
|
||||
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipMemcpy pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
PASSED!
|
||||
# START (76) #############################
|
||||
PASSED!
|
||||
# START (75) #############################
|
||||
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipMemcpy pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
PASSED!
|
||||
# START (74) #############################
|
||||
PASSED!
|
||||
# START (73) #############################
|
||||
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipMemcpy pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
PASSED!
|
||||
# START (72) #############################
|
||||
PASSED!
|
||||
# START (71) #############################
|
||||
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipMemcpy pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
PASSED!
|
||||
# START (70) #############################
|
||||
PASSED!
|
||||
# START (69) #############################
|
||||
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipMemcpy pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
PASSED!
|
||||
# START (68) #############################
|
||||
PASSED!
|
||||
# START (67) #############################
|
||||
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipMemcpy pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
PASSED!
|
||||
# START (66) #############################
|
||||
PASSED!
|
||||
# START (65) #############################
|
||||
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipMemcpy pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
PASSED!
|
||||
# START (64) #############################
|
||||
PASSED!
|
||||
# START (63) #############################
|
||||
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipMemcpy pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
PASSED!
|
||||
# START (62) #############################
|
||||
PASSED!
|
||||
# START (61) #############################
|
||||
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipMemcpy pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
PASSED!
|
||||
# START (60) #############################
|
||||
PASSED!
|
||||
# START (59) #############################
|
||||
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipMemcpy pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
PASSED!
|
||||
# START (58) #############################
|
||||
PASSED!
|
||||
# START (57) #############################
|
||||
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipMemcpy pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
PASSED!
|
||||
# START (56) #############################
|
||||
PASSED!
|
||||
# START (55) #############################
|
||||
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipMemcpy pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
PASSED!
|
||||
# START (54) #############################
|
||||
PASSED!
|
||||
# START (53) #############################
|
||||
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipMemcpy pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
PASSED!
|
||||
# START (52) #############################
|
||||
PASSED!
|
||||
# START (51) #############################
|
||||
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipMemcpy pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
PASSED!
|
||||
# START (50) #############################
|
||||
PASSED!
|
||||
# START (49) #############################
|
||||
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipMemcpy pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
PASSED!
|
||||
# START (48) #############################
|
||||
PASSED!
|
||||
# START (47) #############################
|
||||
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipMemcpy pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
PASSED!
|
||||
# START (46) #############################
|
||||
PASSED!
|
||||
# START (45) #############################
|
||||
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipMemcpy pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
PASSED!
|
||||
# START (44) #############################
|
||||
PASSED!
|
||||
# START (43) #############################
|
||||
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipMemcpy pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
PASSED!
|
||||
# START (42) #############################
|
||||
PASSED!
|
||||
# START (41) #############################
|
||||
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipMemcpy pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
PASSED!
|
||||
# START (40) #############################
|
||||
PASSED!
|
||||
# START (39) #############################
|
||||
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipMemcpy pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
PASSED!
|
||||
# START (38) #############################
|
||||
PASSED!
|
||||
# START (37) #############################
|
||||
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipMemcpy pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
PASSED!
|
||||
# START (36) #############################
|
||||
PASSED!
|
||||
# START (35) #############################
|
||||
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipMemcpy pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
PASSED!
|
||||
# START (34) #############################
|
||||
PASSED!
|
||||
# START (33) #############################
|
||||
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipMemcpy pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
PASSED!
|
||||
# START (32) #############################
|
||||
PASSED!
|
||||
# START (31) #############################
|
||||
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipMemcpy pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
PASSED!
|
||||
# START (30) #############################
|
||||
PASSED!
|
||||
# START (29) #############################
|
||||
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipMemcpy pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
PASSED!
|
||||
# START (28) #############################
|
||||
PASSED!
|
||||
# START (27) #############################
|
||||
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipMemcpy pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
PASSED!
|
||||
# START (26) #############################
|
||||
PASSED!
|
||||
# START (25) #############################
|
||||
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipMemcpy pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
PASSED!
|
||||
# START (24) #############################
|
||||
PASSED!
|
||||
# START (23) #############################
|
||||
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipMemcpy pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
PASSED!
|
||||
# START (22) #############################
|
||||
PASSED!
|
||||
# START (21) #############################
|
||||
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipMemcpy pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
PASSED!
|
||||
# START (20) #############################
|
||||
PASSED!
|
||||
# START (19) #############################
|
||||
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipMemcpy pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
PASSED!
|
||||
# START (18) #############################
|
||||
PASSED!
|
||||
# START (17) #############################
|
||||
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipMemcpy pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
PASSED!
|
||||
# START (16) #############################
|
||||
PASSED!
|
||||
# START (15) #############################
|
||||
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipMemcpy pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
PASSED!
|
||||
# START (14) #############################
|
||||
PASSED!
|
||||
# START (13) #############################
|
||||
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipMemcpy pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
PASSED!
|
||||
# START (12) #############################
|
||||
PASSED!
|
||||
# START (11) #############################
|
||||
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipMemcpy pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
PASSED!
|
||||
# START (10) #############################
|
||||
PASSED!
|
||||
# START (9) #############################
|
||||
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipMemcpy pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
PASSED!
|
||||
# START (8) #############################
|
||||
PASSED!
|
||||
# START (7) #############################
|
||||
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipMemcpy pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
PASSED!
|
||||
# START (6) #############################
|
||||
PASSED!
|
||||
# START (5) #############################
|
||||
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipMemcpy pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
PASSED!
|
||||
# START (4) #############################
|
||||
PASSED!
|
||||
# START (3) #############################
|
||||
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipMemcpy pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
PASSED!
|
||||
# START (2) #############################
|
||||
PASSED!
|
||||
# START (1) #############################
|
||||
rocTX <"before hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"after hipLaunchKernel pid(22834) tid(22834)">
|
||||
rocTX <"hipMemcpy pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
rocTX <"(null) pid(22834) tid(22834)">
|
||||
PASSED!
|
||||
# START (0) #############################
|
||||
PASSED!
|
||||
# STOP #############################
|
||||
@@ -0,0 +1,815 @@
|
||||
0x21fde60 agent cpu
|
||||
0x21ff890 agent cpu
|
||||
0x2239bd0 agent gpu
|
||||
0x223ddf0 agent gpu
|
||||
0x2242290 agent gpu
|
||||
0x2246740 agent gpu
|
||||
6503209724059324
|
||||
ROCtracer (35331):
|
||||
ROCtracer: trace control flush rate(100000us)
|
||||
HIP-trace(*)
|
||||
6503209734529563:6503209734531917 35331:35331 hipGetDevicePropertiesR0600(props={pageableMemoryAccessUsesHostPageTables=0, pageableMemoryAccess=0, concurrentManagedAccess=1, directManagedMemAccessFromHost=0, managedMemory=1, asicRevision=1, isLargeBar=1, cooperativeMultiDeviceUnmatchedSharedMem=1, cooperativeMultiDeviceUnmatchedBlockDim=1, cooperativeMultiDeviceUnmatchedGridDim=1, cooperativeMultiDeviceUnmatchedFunc=1, tccDriver=0, ECCEnabled=0, kernelExecTimeoutEnabled=0, texturePitchAlignment=256, textureAlignment=256, memPitch=2147483647, hdpRegFlushCntl=0x7f1f3948c004, hdpMemFlushCntl=0x7f1f3948c000, maxTexture3D=0x7f1e76c31318, maxTexture2D=0x7f1e76c31310, maxTexture1D=16384, maxTexture1DLinear=2147483647, cooperativeMultiDeviceLaunch=1, cooperativeLaunch=1, integrated=0, gcnArchName="gfx90a:sramecc+:xnack-", gcnArch=910, canMapHostMemory=1, isMultiGpuBoard=0, maxSharedMemoryPerMultiProcessor=65536, pciDeviceID=0, pciBusID=74, pciDomainID=0, concurrentKernels=1, arch={}, clockInstructionRate=1000000, computeMode=0, maxThreadsPerMultiProcessor=2048, l2CacheSize=8388608, multiProcessorCount=104, minor=0, major=9, totalConstMem=2147483647, memoryBusWidth=4096, memoryClockRate=1600000, clockRate=1700000, maxGridSize=0x7f1e76c31198, maxThreadsDim=0x7f1e76c3118c, maxThreadsPerBlock=1024, warpSize=64, regsPerBlock=65536, sharedMemPerBlock=65536, totalGlobalMem=68702699520, name="AMD Instinct MI210"}, device=0) :1
|
||||
6503209735378958:6503209735406670 35331:35331 hipMalloc(ptr=0x7f19f7a00000, size=4194304) :2
|
||||
6503209735407101:6503209735423080 35331:35331 hipMalloc(ptr=0x7f19f7400000, size=4194304) :3
|
||||
6503209978814181:6503209979619458 2:0 CopyHostToDevice:4:35331
|
||||
6503209980083977:6503209980169097 2:0 matrixTranspose(float*, float*, int):7:35331
|
||||
6503209981680453:6503209984909684 2:0 CopyDeviceToHost:8:35331
|
||||
6503209994109180:6503209995685815 2:0 CopyHostToDevice:9:35331
|
||||
6503209995761855:6503209995862975 2:0 matrixTranspose(float*, float*, int):12:35331
|
||||
6503209995867135:6503209997327251 2:0 CopyDeviceToHost:13:35331
|
||||
6503210005717148:6503210007365784 2:0 CopyHostToDevice:14:35331
|
||||
6503210007436384:6503210007539904 2:0 matrixTranspose(float*, float*, int):17:35331
|
||||
6503210007543584:6503210008994420 2:0 CopyDeviceToHost:18:35331
|
||||
6503210017396637:6503210019047993 2:0 CopyHostToDevice:19:35331
|
||||
6503210019116193:6503210019222752 2:0 matrixTranspose(float*, float*, int):22:35331
|
||||
6503210019226432:6503210020670388 2:0 CopyDeviceToHost:23:35331
|
||||
6503209735433059:6503209979689030 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :4
|
||||
6503209979696614:6503209979697045 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :5
|
||||
6503209979701393:6503209979701654 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :6
|
||||
6503209979703096:6503209980073714 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :7
|
||||
6503209980087790:6503209985036979 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :8
|
||||
6503209994087224:6503209995737275 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :9
|
||||
6503209995740781:6503209995741012 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :10
|
||||
6503209995741503:6503209995741663 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :11
|
||||
6503209995741943:6503209995748686 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :12
|
||||
6503209995750359:6503209997440904 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :13
|
||||
6503210005702036:6503210007416737 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :14
|
||||
6503210007419652:6503210007419802 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :15
|
||||
6503210007420093:6503210007420233 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :16
|
||||
6503210007420464:6503210007424171 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :17
|
||||
6503210007424591:6503210009107362 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :18
|
||||
6503210017382250:6503210019098583 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :19
|
||||
6503210019099114:6503210019099254 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :20
|
||||
6503210019099474:6503210019099595 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :21
|
||||
6503210019099845:6503210019104073 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :22
|
||||
6503210019104454:6503210020779761 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :23
|
||||
6503210029076606:6503210030728121 2:0 CopyHostToDevice:24:35331
|
||||
6503210030793281:6503210030895521 2:0 matrixTranspose(float*, float*, int):27:35331
|
||||
6503210030899201:6503210032343317 2:0 CopyDeviceToHost:28:35331
|
||||
6503210040715328:6503210042368282 2:0 CopyHostToDevice:29:35331
|
||||
6503210042440800:6503210042542880 2:0 matrixTranspose(float*, float*, int):32:35331
|
||||
6503210042546560:6503210043989715 2:0 CopyDeviceToHost:33:35331
|
||||
6503210052371125:6503210054023439 2:0 CopyHostToDevice:34:35331
|
||||
6503210054093079:6503210054195158 2:0 matrixTranspose(float*, float*, int):37:35331
|
||||
6503210054198838:6503210055644233 2:0 CopyDeviceToHost:38:35331
|
||||
6503210064560521:6503210066213955 2:0 CopyHostToDevice:39:35331
|
||||
6503210066279755:6503210066381835 2:0 matrixTranspose(float*, float*, int):42:35331
|
||||
6503210066385515:6503210067829149 2:0 CopyDeviceToHost:43:35331
|
||||
6503210076195679:6503210077863833 2:0 CopyHostToDevice:44:35331
|
||||
6503210077934113:6503210078036193 2:0 matrixTranspose(float*, float*, int):47:35331
|
||||
6503210078040033:6503210079483188 2:0 CopyDeviceToHost:48:35331
|
||||
6503210088352916:6503210090004750 2:0 CopyHostToDevice:49:35331
|
||||
6503210090070070:6503210090172149 2:0 matrixTranspose(float*, float*, int):52:35331
|
||||
6503210090175669:6503210091618984 2:0 CopyDeviceToHost:53:35331
|
||||
6503210100501672:6503210102156066 2:0 CopyHostToDevice:54:35331
|
||||
6503210102222026:6503210102324106 2:0 matrixTranspose(float*, float*, int):57:35331
|
||||
6503210102327946:6503210103774141 2:0 CopyDeviceToHost:58:35331
|
||||
6503210112110867:6503210113766700 2:0 CopyHostToDevice:59:35331
|
||||
6503210113844338:6503210113947698 2:0 matrixTranspose(float*, float*, int):62:35331
|
||||
6503210113951378:6503210115396771 2:0 CopyDeviceToHost:63:35331
|
||||
6503210029060790:6503210030776071 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :24
|
||||
6503210030776722:6503210030776813 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :25
|
||||
6503210030777083:6503210030777213 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :26
|
||||
6503210030777534:6503210030780930 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :27
|
||||
6503210030783495:6503210032454133 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :28
|
||||
6503210040700588:6503210042424295 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :29
|
||||
6503210042424756:6503210042424896 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :30
|
||||
6503210042425107:6503210042425237 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :31
|
||||
6503210042425457:6503210042428863 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :32
|
||||
6503210042432991:6503210044098270 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :33
|
||||
6503210052357328:6503210054074643 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :34
|
||||
6503210054075114:6503210054075204 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :35
|
||||
6503210054075394:6503210054075505 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :36
|
||||
6503210054077799:6503210054081145 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :37
|
||||
6503210054081536:6503210055752064 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :38
|
||||
6503210064546646:6503210066261336 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :39
|
||||
6503210066261796:6503210066261907 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :40
|
||||
6503210066262137:6503210066262227 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :41
|
||||
6503210066264632:6503210066267798 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :42
|
||||
6503210066268178:6503210067936813 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :43
|
||||
6503210076181475:6503210077915942 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :44
|
||||
6503210077916312:6503210077916473 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :45
|
||||
6503210077918687:6503210077918827 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :46
|
||||
6503210077919047:6503210077922233 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :47
|
||||
6503210077922664:6503210079590527 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :48
|
||||
6503210088339324:6503210090051840 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :49
|
||||
6503210090052241:6503210090052371 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :50
|
||||
6503210090054485:6503210090054625 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :51
|
||||
6503210090054855:6503210090058121 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :52
|
||||
6503210090058462:6503210091724542 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :53
|
||||
6503210100488016:6503210102203548 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :54
|
||||
6503210102205732:6503210102205872 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :55
|
||||
6503210102206082:6503210102206213 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :56
|
||||
6503210102206463:6503210102210010 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :57
|
||||
6503210102210380:6503210103881750 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :58
|
||||
6503210112098009:6503210113825333 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :59
|
||||
6503210113828058:6503210113828198 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :60
|
||||
6503210113828388:6503210113828519 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :61
|
||||
6503210113828889:6503210113832867 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :62
|
||||
6503210113833307:6503210115506611 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :63
|
||||
6503210123754974:6503210125414806 2:0 CopyHostToDevice:64:35331
|
||||
6503210125480606:6503210125582365 2:0 matrixTranspose(float*, float*, int):67:35331
|
||||
6503210125585885:6503210127028879 2:0 CopyDeviceToHost:68:35331
|
||||
6503210135409161:6503210137061633 2:0 CopyHostToDevice:69:35331
|
||||
6503210137127913:6503210137231913 2:0 matrixTranspose(float*, float*, int):72:35331
|
||||
6503210137235753:6503210138677786 2:0 CopyDeviceToHost:73:35331
|
||||
6503210147599026:6503210149250218 2:0 CopyHostToDevice:74:35331
|
||||
6503210149317778:6503210149419538 2:0 matrixTranspose(float*, float*, int):77:35331
|
||||
6503210149423378:6503210150866211 2:0 CopyDeviceToHost:78:35331
|
||||
6503210159229213:6503210160882006 2:0 CopyHostToDevice:79:35331
|
||||
6503210160948285:6503210161050205 2:0 matrixTranspose(float*, float*, int):82:35331
|
||||
6503210161053885:6503210162499918 2:0 CopyDeviceToHost:83:35331
|
||||
6503210170875401:6503210172542113 2:0 CopyHostToDevice:84:35331
|
||||
6503210172609833:6503210172712232 2:0 matrixTranspose(float*, float*, int):87:35331
|
||||
6503210172715912:6503210174186746 2:0 CopyDeviceToHost:88:35331
|
||||
6503210182557576:6503210184208611 2:0 CopyHostToDevice:89:35331
|
||||
6503210184280809:6503210184383369 2:0 matrixTranspose(float*, float*, int):92:35331
|
||||
6503210184387049:6503210185831324 2:0 CopyDeviceToHost:93:35331
|
||||
6503210194685533:6503210196337527 2:0 CopyHostToDevice:94:35331
|
||||
6503210196406687:6503210196508767 2:0 matrixTranspose(float*, float*, int):97:35331
|
||||
6503210196512607:6503210197956242 2:0 CopyDeviceToHost:98:35331
|
||||
6503210206316372:6503210207967406 2:0 CopyHostToDevice:99:35331
|
||||
6503210208037686:6503210208139766 2:0 matrixTranspose(float*, float*, int):102:35331
|
||||
6503210208143446:6503210209588201 2:0 CopyDeviceToHost:103:35331
|
||||
6503210217934252:6503210219584806 2:0 CopyHostToDevice:104:35331
|
||||
6503210219650605:6503210219752205 2:0 matrixTranspose(float*, float*, int):107:35331
|
||||
6503210219755885:6503210221203520 2:0 CopyDeviceToHost:108:35331
|
||||
6503210123741484:6503210125463969 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :64
|
||||
6503210125464811:6503210125464901 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :65
|
||||
6503210125465201:6503210125465321 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :66
|
||||
6503210125465632:6503210125469068 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :67
|
||||
6503210125469589:6503210127137302 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :68
|
||||
6503210135395218:6503210137111722 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :69
|
||||
6503210137112112:6503210137112263 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :70
|
||||
6503210137112493:6503210137112623 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :71
|
||||
6503210137112844:6503210137116330 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :72
|
||||
6503210137118444:6503210138787510 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :73
|
||||
6503210147585037:6503210149301811 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :74
|
||||
6503210149302232:6503210149302372 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :75
|
||||
6503210149302582:6503210149302702 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :76
|
||||
6503210149302913:6503210149306179 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :77
|
||||
6503210149308704:6503210150974684 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :78
|
||||
6503210159216530:6503210160930018 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :79
|
||||
6503210160930449:6503210160930579 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :80
|
||||
6503210160930759:6503210160930869 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :81
|
||||
6503210160933294:6503210160936600 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :82
|
||||
6503210160936991:6503210162608491 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :83
|
||||
6503210170862119:6503210172590714 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :84
|
||||
6503210172591115:6503210172591235 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :85
|
||||
6503210172591436:6503210172591566 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :86
|
||||
6503210172595143:6503210172598329 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :87
|
||||
6503210172598689:6503210174298051 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :88
|
||||
6503210182544005:6503210184262582 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :89
|
||||
6503210184263033:6503210184263133 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :90
|
||||
6503210184265157:6503210184265297 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :91
|
||||
6503210184265538:6503210184268804 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :92
|
||||
6503210184269185:6503210185939693 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :93
|
||||
6503210194672109:6503210196385186 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :94
|
||||
6503210196385617:6503210196385727 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :95
|
||||
6503210196390917:6503210196391047 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :96
|
||||
6503210196391288:6503210196394534 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :97
|
||||
6503210196395065:6503210198065643 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :98
|
||||
6503210206302971:6503210208019605 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :99
|
||||
6503210208021769:6503210208021919 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :100
|
||||
6503210208022109:6503210208022230 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :101
|
||||
6503210208022480:6503210208025616 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :102
|
||||
6503210208026027:6503210209696425 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :103
|
||||
6503210217920678:6503210219631912 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :104
|
||||
6503210219634487:6503210219634607 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :105
|
||||
6503210219634857:6503210219634977 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :106
|
||||
6503210219635208:6503210219638524 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :107
|
||||
6503210219639005:6503210221311988 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :108
|
||||
6503210229575811:6503210231227325 2:0 CopyHostToDevice:109:35331
|
||||
6503210231294245:6503210231396004 2:0 matrixTranspose(float*, float*, int):112:35331
|
||||
6503210231399684:6503210232882999 2:0 CopyDeviceToHost:113:35331
|
||||
6503210241818168:6503210243474482 2:0 CopyHostToDevice:114:35331
|
||||
6503210243540762:6503210243642681 2:0 matrixTranspose(float*, float*, int):117:35331
|
||||
6503210243646361:6503210245111276 2:0 CopyDeviceToHost:118:35331
|
||||
6503210253536692:6503210255190285 2:0 CopyHostToDevice:119:35331
|
||||
6503210255263123:6503210255364883 2:0 matrixTranspose(float*, float*, int):122:35331
|
||||
6503210255368723:6503210256818757 2:0 CopyDeviceToHost:123:35331
|
||||
6503210265732641:6503210267385274 2:0 CopyHostToDevice:124:35331
|
||||
6503210267455393:6503210267557793 2:0 matrixTranspose(float*, float*, int):127:35331
|
||||
6503210267561473:6503210269015667 2:0 CopyDeviceToHost:128:35331
|
||||
6503210277598192:6503210279249545 2:0 CopyHostToDevice:129:35331
|
||||
6503210279319185:6503210279421104 2:0 matrixTranspose(float*, float*, int):132:35331
|
||||
6503210279424784:6503210280874338 2:0 CopyDeviceToHost:133:35331
|
||||
6503210289260224:6503210290912697 2:0 CopyHostToDevice:134:35331
|
||||
6503210290983137:6503210291085217 2:0 matrixTranspose(float*, float*, int):137:35331
|
||||
6503210291089057:6503210292533171 2:0 CopyDeviceToHost:138:35331
|
||||
6503210300907376:6503210302558730 2:0 CopyHostToDevice:139:35331
|
||||
6503210302628049:6503210302730129 2:0 matrixTranspose(float*, float*, int):142:35331
|
||||
6503210302733809:6503210304178563 2:0 CopyDeviceToHost:143:35331
|
||||
6503210312547809:6503210314201242 2:0 CopyHostToDevice:144:35331
|
||||
6503210314267522:6503210314369601 2:0 matrixTranspose(float*, float*, int):147:35331
|
||||
6503210314373121:6503210315816595 2:0 CopyDeviceToHost:148:35331
|
||||
6503210229562500:6503210231277591 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :109
|
||||
6503210231278423:6503210231278513 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :110
|
||||
6503210231278803:6503210231278924 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :111
|
||||
6503210231279164:6503210231282240 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :112
|
||||
6503210231282681:6503210233000296 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :113
|
||||
6503210241804135:6503210243524255 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :114
|
||||
6503210243524656:6503210243524756 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :115
|
||||
6503210243524926:6503210243525046 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :116
|
||||
6503210243525277:6503210243528703 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :117
|
||||
6503210243530877:6503210245227574 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :118
|
||||
6503210253522920:6503210255246977 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :119
|
||||
6503210255247438:6503210255247558 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :120
|
||||
6503210255247789:6503210255247909 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :121
|
||||
6503210255248169:6503210255251365 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :122
|
||||
6503210255253920:6503210256939426 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :123
|
||||
6503210265719601:6503210267437236 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :124
|
||||
6503210267437627:6503210267437747 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :125
|
||||
6503210267437968:6503210267438078 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :126
|
||||
6503210267440492:6503210267443688 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :127
|
||||
6503210267444109:6503210269129916 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :128
|
||||
6503210277584437:6503210279300810 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :129
|
||||
6503210279301261:6503210279301401 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :130
|
||||
6503210279301671:6503210279301792 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :131
|
||||
6503210279304346:6503210279307522 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :132
|
||||
6503210279307933:6503210280988640 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :133
|
||||
6503210289246847:6503210290965093 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :134
|
||||
6503210290965514:6503210290965645 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :135
|
||||
6503210290967768:6503210290967889 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :136
|
||||
6503210290968169:6503210290971425 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :137
|
||||
6503210290971806:6503210292641222 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :138
|
||||
6503210300894099:6503210302609931 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :139
|
||||
6503210302610392:6503210302610492 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :140
|
||||
6503210302612446:6503210302612586 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :141
|
||||
6503210302612857:6503210302616123 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :142
|
||||
6503210302616584:6503210304286501 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :143
|
||||
6503210312534909:6503210314249539 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :144
|
||||
6503210314251813:6503210314251934 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :145
|
||||
6503210314252204:6503210314252324 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :146
|
||||
6503210314252585:6503210314255711 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :147
|
||||
6503210314256091:6503210315925297 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :148
|
||||
6503210324183531:6503210325837284 2:0 CopyHostToDevice:149:35331
|
||||
6503210325912842:6503210326015722 2:0 matrixTranspose(float*, float*, int):152:35331
|
||||
6503210326019402:6503210327465916 2:0 CopyDeviceToHost:153:35331
|
||||
6503210335831961:6503210337492434 2:0 CopyHostToDevice:154:35331
|
||||
6503210337580473:6503210337682233 2:0 matrixTranspose(float*, float*, int):157:35331
|
||||
6503210337686073:6503210339132907 2:0 CopyDeviceToHost:158:35331
|
||||
6503210348021029:6503210349674942 2:0 CopyHostToDevice:159:35331
|
||||
6503210349740262:6503210349846021 2:0 matrixTranspose(float*, float*, int):162:35331
|
||||
6503210349849701:6503210351297815 2:0 CopyDeviceToHost:163:35331
|
||||
6503210359647380:6503210361302893 2:0 CopyHostToDevice:164:35331
|
||||
6503210361389813:6503210361491572 2:0 matrixTranspose(float*, float*, int):167:35331
|
||||
6503210361495412:6503210362940006 2:0 CopyDeviceToHost:168:35331
|
||||
6503210371314691:6503210372968284 2:0 CopyHostToDevice:169:35331
|
||||
6503210373048644:6503210373150563 2:0 matrixTranspose(float*, float*, int):172:35331
|
||||
6503210373154243:6503210374617877 2:0 CopyDeviceToHost:173:35331
|
||||
6503210383017682:6503210384692714 2:0 CopyHostToDevice:174:35331
|
||||
6503210384765234:6503210384867634 2:0 matrixTranspose(float*, float*, int):177:35331
|
||||
6503210384871474:6503210386314948 2:0 CopyDeviceToHost:178:35331
|
||||
6503210394700772:6503210396409404 2:0 CopyHostToDevice:179:35331
|
||||
6503210396484162:6503210396586242 2:0 matrixTranspose(float*, float*, int):182:35331
|
||||
6503210396589922:6503210398071155 2:0 CopyDeviceToHost:183:35331
|
||||
6503210407004395:6503210408654947 2:0 CopyHostToDevice:184:35331
|
||||
6503210408726827:6503210408829226 2:0 matrixTranspose(float*, float*, int):187:35331
|
||||
6503210408832906:6503210410293660 2:0 CopyDeviceToHost:188:35331
|
||||
6503210419220499:6503210420887211 2:0 CopyHostToDevice:189:35331
|
||||
6503210420952691:6503210421054931 2:0 matrixTranspose(float*, float*, int):192:35331
|
||||
6503210421058611:6503210422501444 2:0 CopyDeviceToHost:193:35331
|
||||
6503210324170330:6503210325893976 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :149
|
||||
6503210325896952:6503210325897082 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :150
|
||||
6503210325897282:6503210325897403 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :151
|
||||
6503210325897673:6503210325900849 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :152
|
||||
6503210325901390:6503210327573671 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :153
|
||||
6503210335818373:6503210337564121 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :154
|
||||
6503210337564561:6503210337564702 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :155
|
||||
6503210337564932:6503210337565042 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :156
|
||||
6503210337565293:6503210337568729 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :157
|
||||
6503210337569150:6503210339241812 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :158
|
||||
6503210348008101:6503210349724094 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :159
|
||||
6503210349724505:6503210349724635 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :160
|
||||
6503210349724845:6503210349724975 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :161
|
||||
6503210349725196:6503210349728512 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :162
|
||||
6503210349731698:6503210351407356 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :163
|
||||
6503210359634144:6503210361373791 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :164
|
||||
6503210361374241:6503210361374362 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :165
|
||||
6503210361374602:6503210361374712 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :166
|
||||
6503210361374983:6503210361378219 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :167
|
||||
6503210361380343:6503210363048967 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :168
|
||||
6503210371301383:6503210373030941 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :169
|
||||
6503210373031292:6503210373031402 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :170
|
||||
6503210373031572:6503210373031683 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :171
|
||||
6503210373033807:6503210373037013 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :172
|
||||
6503210373037403:6503210374734290 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :173
|
||||
6503210383004048:6503210384746339 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :174
|
||||
6503210384746710:6503210384746850 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :175
|
||||
6503210384747081:6503210384747211 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :176
|
||||
6503210384749806:6503210384752962 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :177
|
||||
6503210384753332:6503210386424672 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :178
|
||||
6503210394688249:6503210396465866 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :179
|
||||
6503210396466266:6503210396466407 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :180
|
||||
6503210396468781:6503210396468941 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :181
|
||||
6503210396469212:6503210396472748 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :182
|
||||
6503210396473209:6503210398185786 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :183
|
||||
6503210406990927:6503210408706859 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :184
|
||||
6503210408707260:6503210408707390 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :185
|
||||
6503210408711929:6503210408712049 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :186
|
||||
6503210408712299:6503210408715385 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :187
|
||||
6503210408715756:6503210410404919 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :188
|
||||
6503210419207635:6503210420934348 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :189
|
||||
6503210420937233:6503210420937413 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :190
|
||||
6503210420937654:6503210420937764 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :191
|
||||
6503210420937995:6503210420941241 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :192
|
||||
6503210420941741:6503210422609735 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :193
|
||||
6503210430876446:6503210432536598 2:0 CopyHostToDevice:194:35331
|
||||
6503210432603198:6503210432705278 2:0 matrixTranspose(float*, float*, int):197:35331
|
||||
6503210432708958:6503210434158351 2:0 CopyDeviceToHost:198:35331
|
||||
6503210443064391:6503210444714783 2:0 CopyHostToDevice:199:35331
|
||||
6503210444781223:6503210444882982 2:0 matrixTranspose(float*, float*, int):202:35331
|
||||
6503210444886662:6503210446330776 2:0 CopyDeviceToHost:203:35331
|
||||
6503210455262895:6503210456915368 2:0 CopyHostToDevice:204:35331
|
||||
6503210456979727:6503210457081807 2:0 matrixTranspose(float*, float*, int):207:35331
|
||||
6503210457085487:6503210458529120 2:0 CopyDeviceToHost:208:35331
|
||||
6503210466903764:6503210468580717 2:0 CopyHostToDevice:209:35331
|
||||
6503210468651475:6503210468753555 2:0 matrixTranspose(float*, float*, int):212:35331
|
||||
6503210468757235:6503210470211269 2:0 CopyDeviceToHost:213:35331
|
||||
6503210479096990:6503210480748022 2:0 CopyHostToDevice:214:35331
|
||||
6503210480817982:6503210480923262 2:0 matrixTranspose(float*, float*, int):217:35331
|
||||
6503210480926942:6503210482369935 2:0 CopyDeviceToHost:218:35331
|
||||
6503210491251336:6503210492903009 2:0 CopyHostToDevice:219:35331
|
||||
6503210492969289:6503210493071368 2:0 matrixTranspose(float*, float*, int):222:35331
|
||||
6503210493075048:6503210494517402 2:0 CopyDeviceToHost:223:35331
|
||||
6503210502850325:6503210504500558 2:0 CopyHostToDevice:224:35331
|
||||
6503210504566518:6503210504668597 2:0 matrixTranspose(float*, float*, int):227:35331
|
||||
6503210504672117:6503210506125191 2:0 CopyDeviceToHost:228:35331
|
||||
6503210514485314:6503210516095067 2:0 CopyHostToDevice:229:35331
|
||||
6503210516162147:6503210516264547 2:0 matrixTranspose(float*, float*, int):232:35331
|
||||
6503210516268067:6503210517714100 2:0 CopyDeviceToHost:233:35331
|
||||
6503210430863463:6503210432584675 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :194
|
||||
6503210432587661:6503210432587821 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :195
|
||||
6503210432588061:6503210432588162 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :196
|
||||
6503210432588392:6503210432591738 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :197
|
||||
6503210432592079:6503210434267126 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :198
|
||||
6503210443050897:6503210444765146 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :199
|
||||
6503210444765547:6503210444765677 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :200
|
||||
6503210444765888:6503210444765988 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :201
|
||||
6503210444766228:6503210444769665 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :202
|
||||
6503210444770025:6503210446438640 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :203
|
||||
6503210455249722:6503210456963611 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :204
|
||||
6503210456964182:6503210456964292 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :205
|
||||
6503210456964533:6503210456964643 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :206
|
||||
6503210456964873:6503210456968250 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :207
|
||||
6503210456970354:6503210458636714 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :208
|
||||
6503210466890212:6503210468635499 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :209
|
||||
6503210468635910:6503210468636070 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :210
|
||||
6503210468636301:6503210468636401 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :211
|
||||
6503210468636741:6503210468639717 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :212
|
||||
6503210468642282:6503210470320454 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :213
|
||||
6503210479083366:6503210480800040 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :214
|
||||
6503210480800641:6503210480800772 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :215
|
||||
6503210480800962:6503210480801072 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :216
|
||||
6503210480803306:6503210480806332 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :217
|
||||
6503210480806773:6503210482476800 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :218
|
||||
6503210491237789:6503210492951458 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :219
|
||||
6503210492951989:6503210492952129 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :220
|
||||
6503210492952319:6503210492952429 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :221
|
||||
6503210492954613:6503210492957589 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :222
|
||||
6503210492958020:6503210494624851 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :223
|
||||
6503210502837203:6503210504548326 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :224
|
||||
6503210504548847:6503210504549008 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :225
|
||||
6503210504551382:6503210504551502 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :226
|
||||
6503210504551753:6503210504554989 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :227
|
||||
6503210504555339:6503210506236838 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :228
|
||||
6503210514472503:6503210516142770 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :229
|
||||
6503210516143201:6503210516143371 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :230
|
||||
6503210516147078:6503210516147228 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :231
|
||||
6503210516147439:6503210516150625 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :232
|
||||
6503210516151046:6503210517823568 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :233
|
||||
6503210526084144:6503210527734376 2:0 CopyHostToDevice:234:35331
|
||||
6503210527801776:6503210527908976 2:0 matrixTranspose(float*, float*, int):237:35331
|
||||
6503210527912656:6503210529356609 2:0 CopyDeviceToHost:238:35331
|
||||
6503210537712303:6503210539363018 2:0 CopyHostToDevice:239:35331
|
||||
6503210539438256:6503210539540016 2:0 matrixTranspose(float*, float*, int):242:35331
|
||||
6503210539543696:6503210540989091 2:0 CopyDeviceToHost:243:35331
|
||||
6503210549357383:6503210551008578 2:0 CopyHostToDevice:244:35331
|
||||
6503210551075178:6503210551177417 2:0 matrixTranspose(float*, float*, int):247:35331
|
||||
6503210551181257:6503210552624572 2:0 CopyDeviceToHost:248:35331
|
||||
6503210560964704:6503210562615579 2:0 CopyHostToDevice:249:35331
|
||||
6503210562680419:6503210562782498 2:0 matrixTranspose(float*, float*, int):252:35331
|
||||
6503210562786178:6503210564229654 2:0 CopyDeviceToHost:253:35331
|
||||
6503210572594266:6503210574246260 2:0 CopyHostToDevice:254:35331
|
||||
6503210574317340:6503210574419259 2:0 matrixTranspose(float*, float*, int):257:35331
|
||||
6503210574423099:6503210575865295 2:0 CopyDeviceToHost:258:35331
|
||||
6503210584215347:6503210585868301 2:0 CopyHostToDevice:259:35331
|
||||
6503210585935061:6503210586038421 2:0 matrixTranspose(float*, float*, int):262:35331
|
||||
6503210586042101:6503210587484936 2:0 CopyDeviceToHost:263:35331
|
||||
6503210596359946:6503210598009861 2:0 CopyHostToDevice:264:35331
|
||||
6503210598075660:6503210598177740 2:0 matrixTranspose(float*, float*, int):267:35331
|
||||
6503210598181260:6503210599623615 2:0 CopyDeviceToHost:268:35331
|
||||
6503210608497171:6503210610150604 2:0 CopyHostToDevice:269:35331
|
||||
6503210610222482:6503210610324562 2:0 matrixTranspose(float*, float*, int):272:35331
|
||||
6503210610328242:6503210611771396 2:0 CopyDeviceToHost:273:35331
|
||||
6503210620107038:6503210621757751 2:0 CopyHostToDevice:274:35331
|
||||
6503210621824831:6503210621928830 2:0 matrixTranspose(float*, float*, int):277:35331
|
||||
6503210621932670:6503210623375664 2:0 CopyDeviceToHost:278:35331
|
||||
6503210526069862:6503210527781697 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :234
|
||||
6503210527785755:6503210527785895 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :235
|
||||
6503210527786145:6503210527786256 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :236
|
||||
6503210527786516:6503210527790173 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :237
|
||||
6503210527790564:6503210529465250 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :238
|
||||
6503210537697307:6503210539418851 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :239
|
||||
6503210539421175:6503210539421345 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :240
|
||||
6503210539421556:6503210539421666 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :241
|
||||
6503210539421916:6503210539425713 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :242
|
||||
6503210539426124:6503210541097394 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :243
|
||||
6503210549342877:6503210551058419 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :244
|
||||
6503210551058990:6503210551059160 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :245
|
||||
6503210551059340:6503210551059450 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :246
|
||||
6503210551059671:6503210551062797 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :247
|
||||
6503210551063137:6503210552732353 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :248
|
||||
6503210560950676:6503210562663613 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :249
|
||||
6503210562664013:6503210562664154 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :250
|
||||
6503210562664334:6503210562664444 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :251
|
||||
6503210562664664:6503210562667860 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :252
|
||||
6503210562669824:6503210564336495 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :253
|
||||
6503210572580205:6503210574298452 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :254
|
||||
6503210574298953:6503210574299083 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :255
|
||||
6503210574299384:6503210574299464 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :256
|
||||
6503210574301558:6503210574304834 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :257
|
||||
6503210574305515:6503210575972046 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :258
|
||||
6503210584201108:6503210585916069 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :259
|
||||
6503210585916470:6503210585916620 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :260
|
||||
6503210585916820:6503210585916920 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :261
|
||||
6503210585919225:6503210585922541 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :262
|
||||
6503210585922962:6503210587592057 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :263
|
||||
6503210596345753:6503210598056696 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :264
|
||||
6503210598057127:6503210598057227 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :265
|
||||
6503210598059521:6503210598059652 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :266
|
||||
6503210598059892:6503210598063068 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :267
|
||||
6503210598063459:6503210599729980 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :268
|
||||
6503210608484356:6503210610204246 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :269
|
||||
6503210610204707:6503210610204847 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :270
|
||||
6503210610207372:6503210610207492 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :271
|
||||
6503210610207743:6503210610210929 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :272
|
||||
6503210610211239:6503210611879683 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :273
|
||||
6503210620094470:6503210621806725 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :274
|
||||
6503210621809471:6503210621809591 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :275
|
||||
6503210621809811:6503210621809921 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :276
|
||||
6503210621810162:6503210621813348 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :277
|
||||
6503210621813749:6503210623483445 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :278
|
||||
6503210631751947:6503210633402499 2:0 CopyHostToDevice:279:35331
|
||||
6503210633468619:6503210633571339 2:0 matrixTranspose(float*, float*, int):282:35331
|
||||
6503210633575179:6503210635028572 2:0 CopyDeviceToHost:283:35331
|
||||
6503210643384375:6503210645036048 2:0 CopyHostToDevice:284:35331
|
||||
6503210645100887:6503210645202967 2:0 matrixTranspose(float*, float*, int):287:35331
|
||||
6503210645206807:6503210646653001 2:0 CopyDeviceToHost:288:35331
|
||||
6503210655533601:6503210657184314 2:0 CopyHostToDevice:289:35331
|
||||
6503210657249793:6503210657351073 2:0 matrixTranspose(float*, float*, int):292:35331
|
||||
6503210657354753:6503210658798867 2:0 CopyDeviceToHost:293:35331
|
||||
6503210667164749:6503210668815462 2:0 CopyHostToDevice:294:35331
|
||||
6503210668883822:6503210668995821 2:0 matrixTranspose(float*, float*, int):297:35331
|
||||
6503210668999501:6503210670442655 2:0 CopyDeviceToHost:298:35331
|
||||
6503210678783507:6503210680434380 2:0 CopyHostToDevice:299:35331
|
||||
6503210680506738:6503210680608978 2:0 matrixTranspose(float*, float*, int):302:35331
|
||||
6503210680612658:6503210682055652 2:0 CopyDeviceToHost:303:35331
|
||||
6503210690383137:6503210692034010 2:0 CopyHostToDevice:304:35331
|
||||
6503210692101890:6503210692204129 2:0 matrixTranspose(float*, float*, int):307:35331
|
||||
6503210692207809:6503210693651923 2:0 CopyDeviceToHost:308:35331
|
||||
6503210701982929:6503210703634282 2:0 CopyHostToDevice:309:35331
|
||||
6503210703703121:6503210703804881 2:0 matrixTranspose(float*, float*, int):312:35331
|
||||
6503210703808561:6503210705252995 2:0 CopyDeviceToHost:313:35331
|
||||
6503210713597280:6503210715247353 2:0 CopyHostToDevice:314:35331
|
||||
6503210715314433:6503210715415872 2:0 matrixTranspose(float*, float*, int):317:35331
|
||||
6503210715419552:6503210716863506 2:0 CopyDeviceToHost:318:35331
|
||||
6503210631738005:6503210633449850 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :279
|
||||
6503210633452975:6503210633453126 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :280
|
||||
6503210633453326:6503210633453436 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :281
|
||||
6503210633453677:6503210633457203 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :282
|
||||
6503210633457704:6503210635138311 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :283
|
||||
6503210643371151:6503210645085070 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :284
|
||||
6503210645085581:6503210645085691 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :285
|
||||
6503210645085921:6503210645086021 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :286
|
||||
6503210645086262:6503210645089438 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :287
|
||||
6503210645089768:6503210646760868 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :288
|
||||
6503210655519883:6503210657233431 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :289
|
||||
6503210657233942:6503210657234082 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :290
|
||||
6503210657234293:6503210657234403 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :291
|
||||
6503210657234644:6503210657237799 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :292
|
||||
6503210657240424:6503210658906494 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :293
|
||||
6503210667151617:6503210668867469 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :294
|
||||
6503210668868070:6503210668868200 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :295
|
||||
6503210668868421:6503210668868521 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :296
|
||||
6503210668868752:6503210668872318 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :297
|
||||
6503210668875023:6503210670550541 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :298
|
||||
6503210678770185:6503210680488482 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :299
|
||||
6503210680489033:6503210680489163 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :300
|
||||
6503210680489344:6503210680489454 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :301
|
||||
6503210680491758:6503210680494954 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :302
|
||||
6503210680495325:6503210682164791 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :303
|
||||
6503210690369849:6503210692083578 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :304
|
||||
6503210692084099:6503210692084229 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :305
|
||||
6503210692084459:6503210692084570 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :306
|
||||
6503210692087074:6503210692089970 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :307
|
||||
6503210692090330:6503210693760588 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :308
|
||||
6503210701969663:6503210703684755 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :309
|
||||
6503210703685275:6503210703685386 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :310
|
||||
6503210703687610:6503210703687740 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :311
|
||||
6503210703687950:6503210703691447 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :312
|
||||
6503210703691798:6503210705361635 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :313
|
||||
6503210713583945:6503210715295008 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :314
|
||||
6503210715295750:6503210715296010 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :315
|
||||
6503210715298414:6503210715298545 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :316
|
||||
6503210715298785:6503210715302362 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :317
|
||||
6503210715302783:6503210716972550 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :318
|
||||
6503210725761229:6503210727411622 2:0 CopyHostToDevice:319:35331
|
||||
6503210727481582:6503210727583821 2:0 matrixTranspose(float*, float*, int):322:35331
|
||||
6503210727587501:6503210729031295 2:0 CopyDeviceToHost:323:35331
|
||||
6503210737379900:6503210739031893 2:0 CopyHostToDevice:324:35331
|
||||
6503210739103133:6503210739205213 2:0 matrixTranspose(float*, float*, int):327:35331
|
||||
6503210739208893:6503210740651887 2:0 CopyDeviceToHost:328:35331
|
||||
6503210749014368:6503210750667322 2:0 CopyHostToDevice:329:35331
|
||||
6503210750738080:6503210750841440 2:0 matrixTranspose(float*, float*, int):332:35331
|
||||
6503210750845120:6503210752288594 2:0 CopyDeviceToHost:333:35331
|
||||
6503210760655122:6503210762323595 2:0 CopyHostToDevice:334:35331
|
||||
6503210762391475:6503210762493234 2:0 matrixTranspose(float*, float*, int):337:35331
|
||||
6503210762497074:6503210763941669 2:0 CopyDeviceToHost:338:35331
|
||||
6503210772825794:6503210774477627 2:0 CopyHostToDevice:339:35331
|
||||
6503210774541987:6503210774644067 2:0 matrixTranspose(float*, float*, int):342:35331
|
||||
6503210774647747:6503210776090901 2:0 CopyDeviceToHost:343:35331
|
||||
6503210784455188:6503210786106382 2:0 CopyHostToDevice:344:35331
|
||||
6503210786174582:6503210786276981 2:0 matrixTranspose(float*, float*, int):347:35331
|
||||
6503210786280661:6503210787727176 2:0 CopyDeviceToHost:348:35331
|
||||
6503210796076583:6503210797745696 2:0 CopyHostToDevice:349:35331
|
||||
6503210797815176:6503210797922536 2:0 matrixTranspose(float*, float*, int):352:35331
|
||||
6503210797926216:6503210799369530 2:0 CopyDeviceToHost:353:35331
|
||||
6503210807752537:6503210809406451 2:0 CopyHostToDevice:354:35331
|
||||
6503210809473370:6503210809575610 2:0 matrixTranspose(float*, float*, int):357:35331
|
||||
6503210809579290:6503210811021324 2:0 CopyDeviceToHost:358:35331
|
||||
6503210819961955:6503210821613308 2:0 CopyHostToDevice:359:35331
|
||||
6503210821687586:6503210821789666 2:0 matrixTranspose(float*, float*, int):362:35331
|
||||
6503210821793506:6503210823265459 2:0 CopyDeviceToHost:363:35331
|
||||
6503210725746452:6503210727461493 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :319
|
||||
6503210727464579:6503210727464719 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :320
|
||||
6503210727464919:6503210727465020 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :321
|
||||
6503210727465260:6503210727469738 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :322
|
||||
6503210727470340:6503210729141118 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :323
|
||||
6503210737366233:6503210739084721 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :324
|
||||
6503210739087175:6503210739087315 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :325
|
||||
6503210739087516:6503210739087626 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :326
|
||||
6503210739087866:6503210739091373 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :327
|
||||
6503210739091714:6503210740761150 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :328
|
||||
6503210749000702:6503210750721674 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :329
|
||||
6503210750722245:6503210750722365 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :330
|
||||
6503210750722596:6503210750722696 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :331
|
||||
6503210750722976:6503210750726162 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :332
|
||||
6503210750726533:6503210752396500 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :333
|
||||
6503210760641773:6503210762374967 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :334
|
||||
6503210762375528:6503210762375649 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :335
|
||||
6503210762375839:6503210762375929 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :336
|
||||
6503210762376180:6503210762379365 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :337
|
||||
6503210762381499:6503210764049994 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :338
|
||||
6503210772812756:6503210774525202 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :339
|
||||
6503210774526044:6503210774526184 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :340
|
||||
6503210774526364:6503210774526475 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :341
|
||||
6503210774526715:6503210774529871 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :342
|
||||
6503210774532075:6503210776198065 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :343
|
||||
6503210784440993:6503210786156064 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :344
|
||||
6503210786156565:6503210786156725 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :345
|
||||
6503210786156925:6503210786157026 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :346
|
||||
6503210786159190:6503210786162536 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :347
|
||||
6503210786162957:6503210787834447 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :348
|
||||
6503210796063088:6503210797796694 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :349
|
||||
6503210797797265:6503210797797415 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :350
|
||||
6503210797797605:6503210797797726 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :351
|
||||
6503210797799920:6503210797803066 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :352
|
||||
6503210797803526:6503210799476900 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :353
|
||||
6503210807737842:6503210809452582 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :354
|
||||
6503210809453163:6503210809453333 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :355
|
||||
6503210809457191:6503210809457301 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :356
|
||||
6503210809457541:6503210809461328 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :357
|
||||
6503210809461769:6503210811128370 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :358
|
||||
6503210819949250:6503210821668940 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :359
|
||||
6503210821669381:6503210821669501 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :360
|
||||
6503210821671805:6503210821671925 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :361
|
||||
6503210821672176:6503210821675782 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :362
|
||||
6503210821676213:6503210823374924 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :363
|
||||
6503210831639182:6503210833290854 2:0 CopyHostToDevice:364:35331
|
||||
6503210833359214:6503210833460974 2:0 matrixTranspose(float*, float*, int):367:35331
|
||||
6503210833464654:6503210834907167 2:0 CopyDeviceToHost:368:35331
|
||||
6503210843838007:6503210845490000 2:0 CopyHostToDevice:369:35331
|
||||
6503210845559320:6503210845661239 2:0 matrixTranspose(float*, float*, int):372:35331
|
||||
6503210845664759:6503210847140072 2:0 CopyDeviceToHost:373:35331
|
||||
6503210856092192:6503210857746265 2:0 CopyHostToDevice:374:35331
|
||||
6503210857815585:6503210857922944 2:0 matrixTranspose(float*, float*, int):377:35331
|
||||
6503210857926624:6503210859369458 2:0 CopyDeviceToHost:378:35331
|
||||
6503210867730540:6503210869382533 2:0 CopyHostToDevice:379:35331
|
||||
6503210869445933:6503210869548332 2:0 matrixTranspose(float*, float*, int):382:35331
|
||||
6503210869552012:6503210870995166 2:0 CopyDeviceToHost:383:35331
|
||||
6503210879894966:6503210881556878 2:0 CopyHostToDevice:384:35331
|
||||
6503210881622998:6503210881724918 2:0 matrixTranspose(float*, float*, int):387:35331
|
||||
6503210881728438:6503210883172871 2:0 CopyDeviceToHost:388:35331
|
||||
6503210892050211:6503210893700764 2:0 CopyHostToDevice:389:35331
|
||||
6503210893772642:6503210893874562 2:0 matrixTranspose(float*, float*, int):392:35331
|
||||
6503210893878402:6503210895320596 2:0 CopyDeviceToHost:393:35331
|
||||
6503210903654322:6503210905304715 2:0 CopyHostToDevice:394:35331
|
||||
6503210905371635:6503210905473394 2:0 matrixTranspose(float*, float*, int):397:35331
|
||||
6503210905477234:6503210906920228 2:0 CopyDeviceToHost:398:35331
|
||||
6503210915260834:6503210916913307 2:0 CopyHostToDevice:399:35331
|
||||
6503210916982627:6503210917084546 2:0 matrixTranspose(float*, float*, int):402:35331
|
||||
6503210917088226:6503210918532980 2:0 CopyDeviceToHost:403:35331
|
||||
6503210831626428:6503210833341068 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :364
|
||||
6503210833343813:6503210833343983 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :365
|
||||
6503210833344173:6503210833344274 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :366
|
||||
6503210833344524:6503210833347790 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :367
|
||||
6503210833348171:6503210835016245 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :368
|
||||
6503210843824752:6503210845541526 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :369
|
||||
6503210845543991:6503210845544131 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :370
|
||||
6503210845544331:6503210845544431 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :371
|
||||
6503210845544662:6503210845547748 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :372
|
||||
6503210845548128:6503210847251848 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :373
|
||||
6503210856079371:6503210857799601 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :374
|
||||
6503210857800082:6503210857800212 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :375
|
||||
6503210857800463:6503210857800563 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :376
|
||||
6503210857800803:6503210857804099 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :377
|
||||
6503210857804530:6503210859477092 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :378
|
||||
6503210867717666:6503210869429962 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :379
|
||||
6503210869430403:6503210869430503 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :380
|
||||
6503210869430704:6503210869430804 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :381
|
||||
6503210869431044:6503210869434511 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :382
|
||||
6503210869436925:6503210871103376 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :383
|
||||
6503210879881967:6503210881607237 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :384
|
||||
6503210881607648:6503210881607798 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :385
|
||||
6503210881607988:6503210881608089 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :386
|
||||
6503210881608389:6503210881611625 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :387
|
||||
6503210881613889:6503210883281583 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :388
|
||||
6503210892036641:6503210893754166 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :389
|
||||
6503210893754617:6503210893754757 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :390
|
||||
6503210893755058:6503210893755168 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :391
|
||||
6503210893757572:6503210893760788 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :392
|
||||
6503210893761279:6503210895428351 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :393
|
||||
6503210903641534:6503210905353099 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :394
|
||||
6503210905353479:6503210905353590 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :395
|
||||
6503210905353810:6503210905353920 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :396
|
||||
6503210905356325:6503210905359491 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :397
|
||||
6503210905359951:6503210907027193 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :398
|
||||
6503210915247620:6503210916964555 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :399
|
||||
6503210916964975:6503210916965096 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :400
|
||||
6503210916967220:6503210916967330 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :401
|
||||
6503210916967570:6503210916970786 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :402
|
||||
6503210916971327:6503210918641334 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :403
|
||||
6503210926882706:6503210928535339 2:0 CopyHostToDevice:404:35331
|
||||
6503210928604339:6503210928706579 2:0 matrixTranspose(float*, float*, int):407:35331
|
||||
6503210928710258:6503210930183332 2:0 CopyDeviceToHost:408:35331
|
||||
6503210939076416:6503210940698649 2:0 CopyHostToDevice:409:35331
|
||||
6503210940766209:6503210940869888 2:0 matrixTranspose(float*, float*, int):412:35331
|
||||
6503210940873568:6503210942322002 2:0 CopyDeviceToHost:413:35331
|
||||
6503210950657488:6503210952308681 2:0 CopyHostToDevice:414:35331
|
||||
6503210952378001:6503210952479761 2:0 matrixTranspose(float*, float*, int):417:35331
|
||||
6503210952483441:6503210953926915 2:0 CopyDeviceToHost:418:35331
|
||||
6503210962245221:6503210963897215 2:0 CopyHostToDevice:419:35331
|
||||
6503210963971653:6503210964073733 2:0 matrixTranspose(float*, float*, int):422:35331
|
||||
6503210964077413:6503210965523607 2:0 CopyDeviceToHost:423:35331
|
||||
6503210973867413:6503210975517967 2:0 CopyHostToDevice:424:35331
|
||||
6503210975583127:6503210975684726 2:0 matrixTranspose(float*, float*, int):427:35331
|
||||
6503210975688406:6503210977155560 2:0 CopyDeviceToHost:428:35331
|
||||
6503210985723206:6503210987374559 2:0 CopyHostToDevice:429:35331
|
||||
6503210987439079:6503210987540998 2:0 matrixTranspose(float*, float*, int):432:35331
|
||||
6503210987544678:6503210988989112 2:0 CopyDeviceToHost:433:35331
|
||||
6503210997346359:6503210998996752 2:0 CopyHostToDevice:434:35331
|
||||
6503210999065112:6503210999167191 2:0 matrixTranspose(float*, float*, int):437:35331
|
||||
6503210999170871:6503211000613385 2:0 CopyDeviceToHost:438:35331
|
||||
6503211008987272:6503211010637665 2:0 CopyHostToDevice:439:35331
|
||||
6503211010703145:6503211010805384 2:0 matrixTranspose(float*, float*, int):442:35331
|
||||
6503211010809064:6503211012253658 2:0 CopyDeviceToHost:443:35331
|
||||
6503211021150582:6503211022801776 2:0 CopyHostToDevice:444:35331
|
||||
6503211022868536:6503211022971415 2:0 matrixTranspose(float*, float*, int):447:35331
|
||||
6503211022975255:6503211024420329 2:0 CopyDeviceToHost:448:35331
|
||||
6503210926869886:6503210928584566 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :404
|
||||
6503210928585858:6503210928586099 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :405
|
||||
6503210928588794:6503210928588924 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :406
|
||||
6503210928589204:6503210928592420 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :407
|
||||
6503210928593132:6503210930294828 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :408
|
||||
6503210939062670:6503210940748146 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :409
|
||||
6503210940750480:6503210940750600 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :410
|
||||
6503210940750791:6503210940750901 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :411
|
||||
6503210940751151:6503210940754367 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :412
|
||||
6503210940754788:6503210942430186 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :413
|
||||
6503210950644370:6503210952359491 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :414
|
||||
6503210952361846:6503210952361956 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :415
|
||||
6503210952362156:6503210952362267 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :416
|
||||
6503210952362487:6503210952365613 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :417
|
||||
6503210952366003:6503210954036371 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :418
|
||||
6503210962230730:6503210963954958 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :419
|
||||
6503210963955459:6503210963955629 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :420
|
||||
6503210963955879:6503210963955990 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :421
|
||||
6503210963956240:6503210963959747 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :422
|
||||
6503210963960197:6503210965633361 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :423
|
||||
6503210973853176:6503210975566133 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :424
|
||||
6503210975566644:6503210975566804 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :425
|
||||
6503210975567064:6503210975567175 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :426
|
||||
6503210975567415:6503210975571122 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :427
|
||||
6503210975573707:6503210977264983 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :428
|
||||
6503210985709475:6503210987422643 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :429
|
||||
6503210987423164:6503210987423284 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :430
|
||||
6503210987423575:6503210987423685 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :431
|
||||
6503210987424015:6503210987427141 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :432
|
||||
6503210987429796:6503210989097720 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :433
|
||||
6503210997332883:6503210999046261 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :434
|
||||
6503210999047083:6503210999047243 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :435
|
||||
6503210999047453:6503210999047564 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :436
|
||||
6503210999050189:6503210999053144 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :437
|
||||
6503210999053505:6503211000719795 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :438
|
||||
6503211008973273:6503211010685108 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :439
|
||||
6503211010685529:6503211010685669 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :440
|
||||
6503211010685929:6503211010686040 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :441
|
||||
6503211010688204:6503211010691119 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :442
|
||||
6503211010691510:6503211012360636 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :443
|
||||
6503211021136903:6503211022849770 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :444
|
||||
6503211022850160:6503211022850291 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :445
|
||||
6503211022853206:6503211022853326 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :446
|
||||
6503211022853537:6503211022856632 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :447
|
||||
6503211022857003:6503211024527271 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :448
|
||||
6503211033332820:6503211034986574 2:0 CopyHostToDevice:449:35331
|
||||
6503211035063412:6503211035165812 2:0 matrixTranspose(float*, float*, int):452:35331
|
||||
6503211035169492:6503211036612966 2:0 CopyDeviceToHost:453:35331
|
||||
6503211044941734:6503211046592928 2:0 CopyHostToDevice:454:35331
|
||||
6503211046660167:6503211046762407 2:0 matrixTranspose(float*, float*, int):457:35331
|
||||
6503211046766087:6503211048208921 2:0 CopyDeviceToHost:458:35331
|
||||
6503211056554489:6503211058224883 2:0 CopyHostToDevice:459:35331
|
||||
6503211058295322:6503211058397242 2:0 matrixTranspose(float*, float*, int):462:35331
|
||||
6503211058400922:6503211059843116 2:0 CopyDeviceToHost:463:35331
|
||||
6503211068187244:6503211069838117 2:0 CopyHostToDevice:464:35331
|
||||
6503211069901357:6503211070004077 2:0 matrixTranspose(float*, float*, int):467:35331
|
||||
6503211070007757:6503211071451071 2:0 CopyDeviceToHost:468:35331
|
||||
6503211080319837:6503211081970710 2:0 CopyHostToDevice:469:35331
|
||||
6503211082034430:6503211082136510 2:0 matrixTranspose(float*, float*, int):472:35331
|
||||
6503211082140190:6503211083587344 2:0 CopyDeviceToHost:473:35331
|
||||
6503211091935152:6503211093586025 2:0 CopyHostToDevice:474:35331
|
||||
6503211093654705:6503211093756625 2:0 matrixTranspose(float*, float*, int):477:35331
|
||||
6503211093760465:6503211095204419 2:0 CopyDeviceToHost:478:35331
|
||||
6503211103543170:6503211105195483 2:0 CopyHostToDevice:479:35331
|
||||
6503211105267041:6503211105368961 2:0 matrixTranspose(float*, float*, int):482:35331
|
||||
6503211105372641:6503211106818035 2:0 CopyDeviceToHost:483:35331
|
||||
6503211115179760:6503211116830793 2:0 CopyHostToDevice:484:35331
|
||||
6503211116900433:6503211117012272 2:0 matrixTranspose(float*, float*, int):487:35331
|
||||
6503211117015952:6503211118458626 2:0 CopyDeviceToHost:488:35331
|
||||
6503211033319297:6503211035044196 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :449
|
||||
6503211035045098:6503211035045208 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :450
|
||||
6503211035047342:6503211035047473 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :451
|
||||
6503211035047693:6503211035050849 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :452
|
||||
6503211035051240:6503211036720235 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :453
|
||||
6503211044928008:6503211046641827 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :454
|
||||
6503211046644271:6503211046644401 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :455
|
||||
6503211046644592:6503211046644692 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :456
|
||||
6503211046644922:6503211046648078 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :457
|
||||
6503211046648519:6503211048315691 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :458
|
||||
6503211056541107:6503211058274482 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :459
|
||||
6503211058279040:6503211058279171 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :460
|
||||
6503211058279361:6503211058279471 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :461
|
||||
6503211058279672:6503211058282968 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :462
|
||||
6503211058283699:6503211059949549 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :463
|
||||
6503211068173462:6503211069884565 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :464
|
||||
6503211069885096:6503211069885226 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :465
|
||||
6503211069885487:6503211069885607 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :466
|
||||
6503211069885817:6503211069889134 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :467
|
||||
6503211069889584:6503211071557778 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :468
|
||||
6503211080305903:6503211082017999 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :469
|
||||
6503211082018409:6503211082018550 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :470
|
||||
6503211082018750:6503211082018860 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :471
|
||||
6503211082019111:6503211082022247 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :472
|
||||
6503211082024421:6503211083694618 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :473
|
||||
6503211091920876:6503211093637910 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :474
|
||||
6503211093638371:6503211093638501 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :475
|
||||
6503211093638752:6503211093638852 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :476
|
||||
6503211093639072:6503211093642469 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :477
|
||||
6503211093644693:6503211095310793 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :478
|
||||
6503211103529326:6503211105248805 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :479
|
||||
6503211105249226:6503211105249376 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :480
|
||||
6503211105249607:6503211105249717 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :481
|
||||
6503211105251971:6503211105255207 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :482
|
||||
6503211105255578:6503211106925876 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :483
|
||||
6503211115166119:6503211116881490 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :484
|
||||
6503211116881941:6503211116882061 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :485
|
||||
6503211116882262:6503211116882372 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :486
|
||||
6503211116885498:6503211116888724 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :487
|
||||
6503211116889145:6503211118565954 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :488
|
||||
6503211127375709:6503211129029622 2:0 CopyHostToDevice:489:35331
|
||||
6503211129101662:6503211129204061 2:0 matrixTranspose(float*, float*, int):492:35331
|
||||
6503211129207741:6503211130650415 2:0 CopyDeviceToHost:493:35331
|
||||
6503211139021580:6503211140676133 2:0 CopyHostToDevice:494:35331
|
||||
6503211140742573:6503211140844653 2:0 matrixTranspose(float*, float*, int):497:35331
|
||||
6503211140848333:6503211142290686 2:0 CopyDeviceToHost:498:35331
|
||||
6503211150658811:6503211152310805 2:0 CopyHostToDevice:499:35331
|
||||
6503211152381564:6503211152483644 2:0 matrixTranspose(float*, float*, int):502:35331
|
||||
6503211152487164:6503211153938958 2:0 CopyDeviceToHost:503:35331
|
||||
6503211127362319:6503211129082529 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :489
|
||||
6503211129083321:6503211129083441 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :490
|
||||
6503211129086256:6503211129086386 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :491
|
||||
6503211129086627:6503211129089923 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :492
|
||||
6503211129090444:6503211130757556 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :493
|
||||
6503211139008319:6503211140724522 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :494
|
||||
6503211140725013:6503211140725163 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :495
|
||||
6503211140727227:6503211140727357 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :496
|
||||
6503211140727628:6503211140730843 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :497
|
||||
6503211140731445:6503211142398517 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :498
|
||||
6503211150645472:6503211152362817 35331:35331 hipMemcpy(dst=0x7f19f7a00000, src=0x7f19fc9ff010, sizeBytes=4194304, kind=1) :499
|
||||
6503211152365633:6503211152365803 35331:35331 __hipPushCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :500
|
||||
6503211152366043:6503211152366144 35331:35331 __hipPopCallConfiguration(gridDim={z=1, y=256, x=256}, blockDim={z=1, y=4, x=4}, sharedMem=0, stream=0) :501
|
||||
6503211152366384:6503211152369500 35331:35331 hipLaunchKernel(function_address=0x201010, numBlocks={z=1, y=256, x=256}, dimBlocks={z=1, y=4, x=4}, args=0x7ffc8155e688, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int) :502
|
||||
6503211152369901:6503211154050868 35331:35331 hipMemcpy(dst=0x7f19fc5fe010, src=0x7f19f7400000, sizeBytes=4194304, kind=2) :503
|
||||
6503211162290811:6503211162359739 35331:35331 hipFree(ptr=0x7f19f7a00000) :504
|
||||
6503211162362394:6503211162387610 35331:35331 hipFree(ptr=0x7f19f7400000) :505
|
||||
@@ -0,0 +1,611 @@
|
||||
ROCTracer (pid=880592): input from "input.xml"
|
||||
0x1e17410 agent cpu
|
||||
0x1e4f580 agent gpu
|
||||
0x1e51e80 agent gpu
|
||||
4496524891130970
|
||||
HIP-trace()
|
||||
hipFree hipMalloc hipMemcpyDevice name AMD Radeon VII
|
||||
## Iteration (99) #################
|
||||
PASSED!
|
||||
## Iteration (98) #################
|
||||
PASSED!
|
||||
## Iteration (97) #################
|
||||
PASSED!
|
||||
## Iteration (96) #################
|
||||
PASSED!
|
||||
## Iteration (95) #################
|
||||
PASSED!
|
||||
## Iteration (94) #################
|
||||
PASSED!
|
||||
## Iteration (93) #################
|
||||
PASSED!
|
||||
## Iteration (92) #################
|
||||
PASSED!
|
||||
## Iteration (91) #################
|
||||
PASSED!
|
||||
## Iteration (90) #################
|
||||
PASSED!
|
||||
## Iteration (89) #################
|
||||
PASSED!
|
||||
## Iteration (88) #################
|
||||
PASSED!
|
||||
## Iteration (87) #################
|
||||
PASSED!
|
||||
## Iteration (86) #################
|
||||
PASSED!
|
||||
## Iteration (85) #################
|
||||
PASSED!
|
||||
## Iteration (84) #################
|
||||
PASSED!
|
||||
## Iteration (83) #################
|
||||
PASSED!
|
||||
## Iteration (82) #################
|
||||
PASSED!
|
||||
## Iteration (81) #################
|
||||
PASSED!
|
||||
## Iteration (80) #################
|
||||
PASSED!
|
||||
## Iteration (79) #################
|
||||
PASSED!
|
||||
## Iteration (78) #################
|
||||
PASSED!
|
||||
## Iteration (77) #################
|
||||
PASSED!
|
||||
## Iteration (76) #################
|
||||
PASSED!
|
||||
## Iteration (75) #################
|
||||
PASSED!
|
||||
## Iteration (74) #################
|
||||
PASSED!
|
||||
## Iteration (73) #################
|
||||
PASSED!
|
||||
## Iteration (72) #################
|
||||
PASSED!
|
||||
## Iteration (71) #################
|
||||
PASSED!
|
||||
## Iteration (70) #################
|
||||
PASSED!
|
||||
## Iteration (69) #################
|
||||
PASSED!
|
||||
## Iteration (68) #################
|
||||
PASSED!
|
||||
## Iteration (67) #################
|
||||
PASSED!
|
||||
## Iteration (66) #################
|
||||
PASSED!
|
||||
## Iteration (65) #################
|
||||
PASSED!
|
||||
## Iteration (64) #################
|
||||
PASSED!
|
||||
## Iteration (63) #################
|
||||
PASSED!
|
||||
## Iteration (62) #################
|
||||
PASSED!
|
||||
## Iteration (61) #################
|
||||
PASSED!
|
||||
## Iteration (60) #################
|
||||
PASSED!
|
||||
## Iteration (59) #################
|
||||
PASSED!
|
||||
## Iteration (58) #################
|
||||
PASSED!
|
||||
## Iteration (57) #################
|
||||
PASSED!
|
||||
## Iteration (56) #################
|
||||
PASSED!
|
||||
## Iteration (55) #################
|
||||
PASSED!
|
||||
## Iteration (54) #################
|
||||
PASSED!
|
||||
## Iteration (53) #################
|
||||
PASSED!
|
||||
## Iteration (52) #################
|
||||
PASSED!
|
||||
## Iteration (51) #################
|
||||
PASSED!
|
||||
## Iteration (50) #################
|
||||
PASSED!
|
||||
## Iteration (49) #################
|
||||
PASSED!
|
||||
## Iteration (48) #################
|
||||
PASSED!
|
||||
## Iteration (47) #################
|
||||
PASSED!
|
||||
## Iteration (46) #################
|
||||
PASSED!
|
||||
## Iteration (45) #################
|
||||
PASSED!
|
||||
## Iteration (44) #################
|
||||
PASSED!
|
||||
## Iteration (43) #################
|
||||
PASSED!
|
||||
## Iteration (42) #################
|
||||
PASSED!
|
||||
## Iteration (41) #################
|
||||
PASSED!
|
||||
## Iteration (40) #################
|
||||
PASSED!
|
||||
## Iteration (39) #################
|
||||
PASSED!
|
||||
## Iteration (38) #################
|
||||
PASSED!
|
||||
## Iteration (37) #################
|
||||
PASSED!
|
||||
## Iteration (36) #################
|
||||
PASSED!
|
||||
## Iteration (35) #################
|
||||
PASSED!
|
||||
## Iteration (34) #################
|
||||
PASSED!
|
||||
## Iteration (33) #################
|
||||
PASSED!
|
||||
## Iteration (32) #################
|
||||
PASSED!
|
||||
## Iteration (31) #################
|
||||
PASSED!
|
||||
## Iteration (30) #################
|
||||
PASSED!
|
||||
## Iteration (29) #################
|
||||
PASSED!
|
||||
## Iteration (28) #################
|
||||
PASSED!
|
||||
## Iteration (27) #################
|
||||
PASSED!
|
||||
## Iteration (26) #################
|
||||
PASSED!
|
||||
## Iteration (25) #################
|
||||
PASSED!
|
||||
## Iteration (24) #################
|
||||
PASSED!
|
||||
## Iteration (23) #################
|
||||
PASSED!
|
||||
## Iteration (22) #################
|
||||
PASSED!
|
||||
## Iteration (21) #################
|
||||
PASSED!
|
||||
## Iteration (20) #################
|
||||
PASSED!
|
||||
## Iteration (19) #################
|
||||
PASSED!
|
||||
## Iteration (18) #################
|
||||
PASSED!
|
||||
## Iteration (17) #################
|
||||
PASSED!
|
||||
## Iteration (16) #################
|
||||
PASSED!
|
||||
## Iteration (15) #################
|
||||
PASSED!
|
||||
## Iteration (14) #################
|
||||
PASSED!
|
||||
## Iteration (13) #################
|
||||
PASSED!
|
||||
## Iteration (12) #################
|
||||
PASSED!
|
||||
## Iteration (11) #################
|
||||
PASSED!
|
||||
## Iteration (10) #################
|
||||
PASSED!
|
||||
## Iteration (9) #################
|
||||
PASSED!
|
||||
## Iteration (8) #################
|
||||
PASSED!
|
||||
## Iteration (7) #################
|
||||
PASSED!
|
||||
## Iteration (6) #################
|
||||
PASSED!
|
||||
## Iteration (5) #################
|
||||
PASSED!
|
||||
## Iteration (4) #################
|
||||
PASSED!
|
||||
## Iteration (3) #################
|
||||
PASSED!
|
||||
## Iteration (2) #################
|
||||
PASSED!
|
||||
## Iteration (1) #################
|
||||
PASSED!
|
||||
## Iteration (0) #################
|
||||
PASSED!
|
||||
4496525133195468:4496525133500748 0:0 CopyHostToDevice:3:880592
|
||||
4496525139346056:4496525140494974 0:0 CopyDeviceToHost:6:880592
|
||||
4496525158136061:4496525158439581 0:0 CopyHostToDevice:7:880592
|
||||
4496525162884573:4496525163225012 0:0 CopyDeviceToHost:10:880592
|
||||
4496525179113943:4496525179416502 0:0 CopyHostToDevice:11:880592
|
||||
4496525183856054:4496525184201293 0:0 CopyDeviceToHost:14:880592
|
||||
4496525199788429:4496525200090509 0:0 CopyHostToDevice:15:880592
|
||||
4496525204497581:4496525204837540 0:0 CopyDeviceToHost:18:880592
|
||||
4496525220040077:4496525220341836 0:0 CopyHostToDevice:19:880592
|
||||
4496525224734029:4496525225073989 0:0 CopyDeviceToHost:22:880592
|
||||
4496525240233165:4496525240535565 0:0 CopyHostToDevice:23:880592
|
||||
4496525244924718:4496525245265318 0:0 CopyDeviceToHost:26:880592
|
||||
4496525260367854:4496525260669454 0:0 CopyHostToDevice:27:880592
|
||||
4496525265052841:4496525265424319 0:0 CopyDeviceToHost:30:880592
|
||||
4496525281955173:4496525282256773 0:0 CopyHostToDevice:31:880592
|
||||
4496525286638246:4496525286978205 0:0 CopyDeviceToHost:34:880592
|
||||
4496525301587783:4496525301890022 0:0 CopyHostToDevice:35:880592
|
||||
4496525306259335:4496525306599935 0:0 CopyDeviceToHost:38:880592
|
||||
4496525322760230:4496525323062629 0:0 CopyHostToDevice:39:880592
|
||||
4496525327434982:4496525327775409 0:0 CopyDeviceToHost:42:880592
|
||||
4496525343578745:4496525343881145 0:0 CopyHostToDevice:43:880592
|
||||
4496525348304218:4496525348644498 0:0 CopyDeviceToHost:46:880592
|
||||
4496525364012316:4496525364314556 0:0 CopyHostToDevice:47:880592
|
||||
4496525368704989:4496525369045109 0:0 CopyDeviceToHost:50:880592
|
||||
4496525384281567:4496525384583167 0:0 CopyHostToDevice:51:880592
|
||||
4496525388966240:4496525389338040 0:0 CopyDeviceToHost:54:880592
|
||||
4496525404545748:4496525404848308 0:0 CopyHostToDevice:55:880592
|
||||
4496525409873459:4496525410212298 0:0 CopyDeviceToHost:58:880592
|
||||
4496525426854993:4496525427157073 0:0 CopyHostToDevice:59:880592
|
||||
4496525431525586:4496525431865866 0:0 CopyDeviceToHost:62:880592
|
||||
4496525448226961:4496525448529360 0:0 CopyHostToDevice:63:880592
|
||||
4496525452895154:4496525453237033 0:0 CopyDeviceToHost:66:880592
|
||||
4496525468131725:4496525468433645 0:0 CopyHostToDevice:67:880592
|
||||
4496525472798637:4496525473139397 0:0 CopyDeviceToHost:70:880592
|
||||
4496525488059216:4496525488361616 0:0 CopyHostToDevice:71:880592
|
||||
4496525492706930:4496525493047049 0:0 CopyDeviceToHost:74:880592
|
||||
4496525508011988:4496525508314228 0:0 CopyHostToDevice:75:880592
|
||||
4496525512687542:4496525513027661 0:0 CopyDeviceToHost:78:880592
|
||||
4496525528000600:4496525528302200 0:0 CopyHostToDevice:79:880592
|
||||
4496525532665206:4496525533005644 0:0 CopyDeviceToHost:82:880592
|
||||
4496525548020504:4496525548322744 0:0 CopyHostToDevice:83:880592
|
||||
4496525552686778:4496525553026738 0:0 CopyDeviceToHost:86:880592
|
||||
4496525568050718:4496525568352477 0:0 CopyHostToDevice:87:880592
|
||||
4496525572724831:4496525573064951 0:0 CopyDeviceToHost:90:880592
|
||||
4496525588056611:4496525588358371 0:0 CopyHostToDevice:91:880592
|
||||
4496525592722405:4496525593062524 0:0 CopyDeviceToHost:94:880592
|
||||
4496525608239172:4496525608541412 0:0 CopyHostToDevice:95:880592
|
||||
4496525612912965:4496525613254204 0:0 CopyDeviceToHost:98:880592
|
||||
4496525628224585:4496525628526984 0:0 CopyHostToDevice:99:880592
|
||||
4496525632901099:4496525633241698 0:0 CopyDeviceToHost:102:880592
|
||||
4496525648284239:4496525648586638 0:0 CopyHostToDevice:103:880592
|
||||
4496525652965552:4496525653334472 0:0 CopyDeviceToHost:106:880592
|
||||
4496525668288212:4496525668590132 0:0 CopyHostToDevice:107:880592
|
||||
4496525672962004:4496525673332842 0:0 CopyDeviceToHost:110:880592
|
||||
4496525688287542:4496525688589622 0:0 CopyHostToDevice:111:880592
|
||||
4496525692956536:4496525693329136 0:0 CopyDeviceToHost:114:880592
|
||||
4496525708328796:4496525708630396 0:0 CopyHostToDevice:115:880592
|
||||
4496525712994110:4496525713363350 0:0 CopyDeviceToHost:118:880592
|
||||
4496525728330370:4496525728631810 0:0 CopyHostToDevice:119:880592
|
||||
4496525732992804:4496525733365723 0:0 CopyDeviceToHost:122:880592
|
||||
4496525748343184:4496525748645104 0:0 CopyHostToDevice:123:880592
|
||||
4496525753010257:4496525753383337 0:0 CopyDeviceToHost:126:880592
|
||||
4496525768361559:4496525768663159 0:0 CopyHostToDevice:127:880592
|
||||
4496525773024154:4496525773395793 0:0 CopyDeviceToHost:130:880592
|
||||
4496525788520096:4496525788822335 0:0 CopyHostToDevice:131:880592
|
||||
4496525793199010:4496525793539930 0:0 CopyDeviceToHost:134:880592
|
||||
4496525810008550:4496525810310310 0:0 CopyHostToDevice:135:880592
|
||||
4496525814653983:4496525814993461 0:0 CopyDeviceToHost:138:880592
|
||||
4496525832283678:4496525832585598 0:0 CopyHostToDevice:139:880592
|
||||
4496525837594912:4496525837935351 0:0 CopyDeviceToHost:142:880592
|
||||
4496525854707649:4496525855010049 0:0 CopyHostToDevice:143:880592
|
||||
4496525859362403:4496525859701883 0:0 CopyDeviceToHost:146:880592
|
||||
4496525876603621:4496525876905220 0:0 CopyHostToDevice:147:880592
|
||||
4496525881306229:4496525881646187 0:0 CopyDeviceToHost:150:880592
|
||||
4496525897973207:4496525898274807 0:0 CopyHostToDevice:151:880592
|
||||
4496525902618202:4496525902957521 0:0 CopyDeviceToHost:154:880592
|
||||
4496525917333824:4496525917635744 0:0 CopyHostToDevice:155:880592
|
||||
4496525921975459:4496525922350138 0:0 CopyDeviceToHost:158:880592
|
||||
4496525936492201:4496525936794441 0:0 CopyHostToDevice:159:880592
|
||||
4496525941157356:4496525941500515 0:0 CopyDeviceToHost:162:880592
|
||||
4496525955677517:4496525955979757 0:0 CopyHostToDevice:163:880592
|
||||
4496525960341710:4496525960681510 0:0 CopyDeviceToHost:166:880592
|
||||
4496525976745491:4496525977047571 0:0 CopyHostToDevice:167:880592
|
||||
4496525981389366:4496525981729005 0:0 CopyDeviceToHost:170:880592
|
||||
4496525999625945:4496525999927864 0:0 CopyHostToDevice:171:880592
|
||||
4496526004313339:4496526004653619 0:0 CopyDeviceToHost:174:880592
|
||||
4496526019801138:4496526020102578 0:0 CopyHostToDevice:175:880592
|
||||
4496526024481811:4496526024821771 0:0 CopyDeviceToHost:178:880592
|
||||
4496526040023673:4496526040325593 0:0 CopyHostToDevice:179:880592
|
||||
4496526044704828:4496526045044948 0:0 CopyDeviceToHost:182:880592
|
||||
4496526060018210:4496526060319810 0:0 CopyHostToDevice:183:880592
|
||||
4496526064683365:4496526065023164 0:0 CopyDeviceToHost:186:880592
|
||||
4496526080078987:4496526080380427 0:0 CopyHostToDevice:187:880592
|
||||
4496526084744673:4496526085104791 0:0 CopyDeviceToHost:190:880592
|
||||
4496526100065734:4496526100367494 0:0 CopyHostToDevice:191:880592
|
||||
4496526104740009:4496526105080608 0:0 CopyDeviceToHost:194:880592
|
||||
4496526120132111:4496526120434351 0:0 CopyHostToDevice:195:880592
|
||||
4496526124795826:4496526125136266 0:0 CopyDeviceToHost:198:880592
|
||||
4496526140122969:4496526140424888 0:0 CopyHostToDevice:199:880592
|
||||
4496526144798843:4496526145139283 0:0 CopyDeviceToHost:202:880592
|
||||
4496526160131675:4496526160434075 0:0 CopyHostToDevice:203:880592
|
||||
4496526164805309:4496526165146708 0:0 CopyDeviceToHost:206:880592
|
||||
4496526180137732:4496526180439812 0:0 CopyHostToDevice:207:880592
|
||||
4496526184806087:4496526185146207 0:0 CopyDeviceToHost:210:880592
|
||||
4496526200143310:4496526200444910 0:0 CopyHostToDevice:211:880592
|
||||
4496526204807345:4496526205146985 0:0 CopyDeviceToHost:214:880592
|
||||
4496526220164409:4496526220466328 0:0 CopyHostToDevice:215:880592
|
||||
4496526224827377:4496526225167815 0:0 CopyDeviceToHost:218:880592
|
||||
4496526240237879:4496526240539639 0:0 CopyHostToDevice:219:880592
|
||||
4496526244914074:4496526245255794 0:0 CopyDeviceToHost:222:880592
|
||||
4496526260329698:4496526260631617 0:0 CopyHostToDevice:223:880592
|
||||
4496526265641092:4496526265982492 0:0 CopyDeviceToHost:226:880592
|
||||
4496526282735434:4496526283037674 0:0 CopyHostToDevice:227:880592
|
||||
4496526287389972:4496526287729770 0:0 CopyDeviceToHost:230:880592
|
||||
4496526304640792:4496526304943031 0:0 CopyHostToDevice:231:880592
|
||||
4496526309322907:4496526309663346 0:0 CopyDeviceToHost:234:880592
|
||||
4496526326129888:4496526326432128 0:0 CopyHostToDevice:235:880592
|
||||
4496526330838403:4496526331178843 0:0 CopyDeviceToHost:238:880592
|
||||
4496526343522029:4496526343824269 0:0 CopyHostToDevice:239:880592
|
||||
4496526348310864:4496526348650504 0:0 CopyDeviceToHost:242:880592
|
||||
4496526363968080:4496526364270160 0:0 CopyHostToDevice:243:880592
|
||||
4496526368652274:4496526368992073 0:0 CopyDeviceToHost:246:880592
|
||||
4496526384185337:4496526384486936 0:0 CopyHostToDevice:247:880592
|
||||
4496526388854172:4496526389198131 0:0 CopyDeviceToHost:250:880592
|
||||
4496526404335235:4496526404637314 0:0 CopyHostToDevice:251:880592
|
||||
4496526408997830:4496526409365789 0:0 CopyDeviceToHost:254:880592
|
||||
4496526424568493:4496526424870412 0:0 CopyHostToDevice:255:880592
|
||||
4496526429213789:4496526429553747 0:0 CopyDeviceToHost:258:880592
|
||||
4496526446042850:4496526446345090 0:0 CopyHostToDevice:259:880592
|
||||
4496526450809605:4496526451149405 0:0 CopyDeviceToHost:262:880592
|
||||
4496526467002509:4496526467304588 0:0 CopyHostToDevice:263:880592
|
||||
4496526471659664:4496526471999943 0:0 CopyDeviceToHost:266:880592
|
||||
4496526487288888:4496526487590807 0:0 CopyHostToDevice:267:880592
|
||||
4496526491942249:4496526492283807 0:0 CopyDeviceToHost:270:880592
|
||||
4496526507571795:4496526507873555 0:0 CopyHostToDevice:271:880592
|
||||
4496526512311031:4496526512651311 0:0 CopyDeviceToHost:274:880592
|
||||
4496526527812739:4496526528114819 0:0 CopyHostToDevice:275:880592
|
||||
4496526532491175:4496526532831135 0:0 CopyDeviceToHost:278:880592
|
||||
4496526547850803:4496526548153203 0:0 CopyHostToDevice:279:880592
|
||||
4496526552533079:4496526552873199 0:0 CopyDeviceToHost:282:880592
|
||||
4496526567925672:4496526568228072 0:0 CopyHostToDevice:283:880592
|
||||
4496526572602186:4496526572942306 0:0 CopyDeviceToHost:286:880592
|
||||
4496526587965332:4496526588267412 0:0 CopyHostToDevice:287:880592
|
||||
4496526592643288:4496526592983887 0:0 CopyDeviceToHost:290:880592
|
||||
4496526607978913:4496526608280673 0:0 CopyHostToDevice:291:880592
|
||||
4496526612645349:4496526612985629 0:0 CopyDeviceToHost:294:880592
|
||||
4496526628000655:4496526628302414 0:0 CopyHostToDevice:295:880592
|
||||
4496526632663325:4496526633003763 0:0 CopyDeviceToHost:298:880592
|
||||
4496526648017837:4496526648320077 0:0 CopyHostToDevice:299:880592
|
||||
4496526652678035:4496526653018155 0:0 CopyDeviceToHost:302:880592
|
||||
4496526668055108:4496526668356868 0:0 CopyHostToDevice:303:880592
|
||||
4496526672724106:4496526673064066 0:0 CopyDeviceToHost:306:880592
|
||||
4496526688755419:4496526689057339 0:0 CopyHostToDevice:307:880592
|
||||
4496526693419457:4496526693759417 0:0 CopyDeviceToHost:310:880592
|
||||
4496526710295327:4496526710597247 0:0 CopyHostToDevice:311:880592
|
||||
4496526714950082:4496526715290361 0:0 CopyDeviceToHost:314:880592
|
||||
4496526732128906:4496526732430665 0:0 CopyHostToDevice:315:880592
|
||||
4496526736777741:4496526737117701 0:0 CopyDeviceToHost:318:880592
|
||||
4496526752148407:4496526752450167 0:0 CopyHostToDevice:319:880592
|
||||
4496526756824443:4496526757165043 0:0 CopyDeviceToHost:322:880592
|
||||
4496526772241509:4496526772543908 0:0 CopyHostToDevice:323:880592
|
||||
4496526776924789:4496526777266187 0:0 CopyDeviceToHost:326:880592
|
||||
4496526792332421:4496526792634501 0:0 CopyHostToDevice:327:880592
|
||||
4496526797007499:4496526797378499 0:0 CopyDeviceToHost:330:880592
|
||||
4496526812350653:4496526812652573 0:0 CopyHostToDevice:331:880592
|
||||
4496526817024291:4496526817396411 0:0 CopyDeviceToHost:334:880592
|
||||
4496526832511765:4496526832813525 0:0 CopyHostToDevice:335:880592
|
||||
4496526837210523:4496526837551603 0:0 CopyDeviceToHost:338:880592
|
||||
4496526852574486:4496526852876246 0:0 CopyHostToDevice:339:880592
|
||||
4496526857232600:4496526857571760 0:0 CopyDeviceToHost:342:880592
|
||||
4496526872568066:4496526872869986 0:0 CopyHostToDevice:343:880592
|
||||
4496526877873701:4496526878217980 0:0 CopyDeviceToHost:346:880592
|
||||
4496526894948845:4496526895250604 0:0 CopyHostToDevice:347:880592
|
||||
4496526899604560:4496526899944040 0:0 CopyDeviceToHost:350:880592
|
||||
4496526916800780:4496526917103020 0:0 CopyHostToDevice:351:880592
|
||||
4496526921465614:4496526921806534 0:0 CopyDeviceToHost:354:880592
|
||||
4496526938344278:4496526938646198 0:0 CopyHostToDevice:355:880592
|
||||
4496526942999834:4496526943373873 0:0 CopyDeviceToHost:358:880592
|
||||
4496526960757056:4496526961059296 0:0 CopyHostToDevice:359:880592
|
||||
4496526965400292:4496526965740572 0:0 CopyDeviceToHost:362:880592
|
||||
4496526982246291:4496526982548371 0:0 CopyHostToDevice:363:880592
|
||||
4496526986906166:4496526987245965 0:0 CopyDeviceToHost:366:880592
|
||||
4496527004066590:4496527004368030 0:0 CopyHostToDevice:367:880592
|
||||
4496527008741665:4496527009081145 0:0 CopyDeviceToHost:370:880592
|
||||
4496527024023531:4496527024325611 0:0 CopyHostToDevice:371:880592
|
||||
4496527028707247:4496527029047527 0:0 CopyDeviceToHost:374:880592
|
||||
4496527043983033:4496527044285593 0:0 CopyHostToDevice:375:880592
|
||||
4496527048638701:4496527048978659 0:0 CopyDeviceToHost:378:880592
|
||||
4496527063937205:4496527064239125 0:0 CopyHostToDevice:379:880592
|
||||
4496527068606361:4496527068946161 0:0 CopyDeviceToHost:382:880592
|
||||
4496527083919907:4496527084221506 0:0 CopyHostToDevice:383:880592
|
||||
4496527088585542:4496527088926302 0:0 CopyDeviceToHost:386:880592
|
||||
4496527103947568:4496527104249487 0:0 CopyHostToDevice:387:880592
|
||||
4496527108615443:4496527108955883 0:0 CopyDeviceToHost:390:880592
|
||||
4496527124628049:4496527124930289 0:0 CopyHostToDevice:391:880592
|
||||
4496527129311123:4496527129651403 0:0 CopyDeviceToHost:394:880592
|
||||
4496527146344668:4496527146647227 0:0 CopyHostToDevice:395:880592
|
||||
4496527151012863:4496527151385303 0:0 CopyDeviceToHost:398:880592
|
||||
4496527166565609:4496527166867209 0:0 CopyHostToDevice:399:880592
|
||||
4496527171210765:4496527171550564 0:0 CopyDeviceToHost:402:880592
|
||||
4496524903280142:4496524903426608 880592:880592 hipMalloc(ptr=0x7f14c3000000, size=4194304) :1
|
||||
4496524903446365:4496524903573365 880592:880592 hipMalloc(ptr=0x7f14c2800000, size=4194304) :2
|
||||
4496524903588203:4496525133627902 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :3
|
||||
4496525134207305:4496525140607184 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :6
|
||||
4496525154755917:4496525158532879 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :7
|
||||
4496525158552125:4496525163335997 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :10
|
||||
4496525175814741:4496525179506102 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :11
|
||||
4496525179519266:4496525184300123 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :14
|
||||
4496525196393148:4496525200179318 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :15
|
||||
4496525200189638:4496525204936449 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :18
|
||||
4496525216744046:4496525220425409 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :19
|
||||
4496525220438995:4496525225172542 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :22
|
||||
4496525236900238:4496525240619832 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :23
|
||||
4496525240633478:4496525245363489 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :26
|
||||
4496525257076899:4496525260752009 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :27
|
||||
4496525260765474:4496525265528037 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :30
|
||||
4496525278601381:4496525282344690 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :31
|
||||
4496525282356923:4496525287062988 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :34
|
||||
4496525298199228:4496525301976453 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :35
|
||||
4496525301989357:4496525306701493 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :38
|
||||
4496525319442590:4496525323149401 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :39
|
||||
4496525323159049:4496525327878419 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :42
|
||||
4496525340212129:4496525343964345 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :43
|
||||
4496525343973583:4496525348741845 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :46
|
||||
4496525360729852:4496525364398150 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :47
|
||||
4496525364412076:4496525369144271 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :50
|
||||
4496525381014837:4496525384667765 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :51
|
||||
4496525384680900:4496525389438431 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :54
|
||||
4496525401191155:4496525404934986 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :55
|
||||
4496525404946919:4496525410298471 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :58
|
||||
4496525423502872:4496525427227196 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :59
|
||||
4496525427242074:4496525431931367 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :62
|
||||
4496525444940650:4496525448596826 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :63
|
||||
4496525448609119:4496525453319692 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :66
|
||||
4496525464825952:4496525468507253 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :67
|
||||
4496525468516811:4496525473221545 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :70
|
||||
4496525484776165:4496525488428212 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :71
|
||||
4496525488438561:4496525493128415 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :74
|
||||
4496525504724173:4496525508381501 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :75
|
||||
4496525508391049:4496525513111039 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :78
|
||||
4496525524703772:4496525528368253 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :79
|
||||
4496525528381448:4496525533087222 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :82
|
||||
4496525544743344:4496525548393326 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :83
|
||||
4496525548405820:4496525553108910 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :86
|
||||
4496525564776353:4496525568419633 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :87
|
||||
4496525568431526:4496525573146797 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :90
|
||||
4496525584773363:4496525588428076 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :91
|
||||
4496525588441251:4496525593151694 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :94
|
||||
4496525604951276:4496525608612772 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :95
|
||||
4496525608626318:4496525613335408 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :98
|
||||
4496525624925244:4496525628593993 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :99
|
||||
4496525628604623:4496525633324024 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :102
|
||||
4496525644955148:4496525648653793 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :103
|
||||
4496525648663752:4496525653416343 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :106
|
||||
4496525665022200:4496525668655764 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :107
|
||||
4496525668669820:4496525673415239 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :110
|
||||
4496525685021526:4496525688656242 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :111
|
||||
4496525688669547:4496525693410117 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :114
|
||||
4496525705048605:4496525708697446 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :115
|
||||
4496525708710921:4496525713450741 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :118
|
||||
4496525725057529:4496525728701911 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :119
|
||||
4496525728714956:4496525733453982 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :122
|
||||
4496525745101558:4496525748716245 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :123
|
||||
4496525748728729:4496525753469559 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :126
|
||||
4496525765097578:4496525768729748 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :127
|
||||
4496525768739346:4496525773486869 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :130
|
||||
4496525785153981:4496525788889836 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :131
|
||||
4496525788899334:4496525793625478 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :134
|
||||
4496525806694594:4496525810379663 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :135
|
||||
4496525810389762:4496525815062083 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :138
|
||||
4496525829025717:4496525832652416 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :139
|
||||
4496525832666232:4496525838020960 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :142
|
||||
4496525851335449:4496525855079079 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :143
|
||||
4496525855095120:4496525859768263 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :146
|
||||
4496525873262060:4496525876972367 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :147
|
||||
4496525876986173:4496525881732493 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :150
|
||||
4496525894813432:4496525898344842 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :151
|
||||
4496525898357967:4496525903025769 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :154
|
||||
4496525914049899:4496525917708138 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :155
|
||||
4496525917721533:4496525922419974 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :158
|
||||
4496525933083213:4496525936861418 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :159
|
||||
4496525936871808:4496525941569528 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :162
|
||||
4496525952271099:4496525956051338 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :163
|
||||
4496525956061647:4496525960747143 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :166
|
||||
4496525973345872:4496525977113719 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :167
|
||||
4496525977127134:4496525981829511 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :170
|
||||
4496525996273891:4496525999995089 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :171
|
||||
4496526000008564:4496526004736400 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :174
|
||||
4496526016480127:4496526020175927 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :175
|
||||
4496526020189152:4496526024907620 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :178
|
||||
4496526036737941:4496526040392402 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :179
|
||||
4496526040406088:4496526045126310 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :182
|
||||
4496526056750972:4496526060387640 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :183
|
||||
4496526060400274:4496526065106138 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :186
|
||||
4496526076767389:4496526080447340 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :187
|
||||
4496526080456357:4496526085186276 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :190
|
||||
4496526096786342:4496526100434311 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :191
|
||||
4496526100443629:4496526105161156 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :194
|
||||
4496526116801218:4496526120501776 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :195
|
||||
4496526120511514:4496526125217650 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :198
|
||||
4496526136832464:4496526140494920 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :199
|
||||
4496526140509047:4496526145220241 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :202
|
||||
4496526156843100:4496526160502872 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :203
|
||||
4496526160515847:4496526165229165 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :206
|
||||
4496526176867433:4496526180510052 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :207
|
||||
4496526180522345:4496526185229233 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :210
|
||||
4496526196882068:4496526200511312 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :211
|
||||
4496526200522904:4496526205229030 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :214
|
||||
4496526216885692:4496526220532459 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :215
|
||||
4496526220544051:4496526225248913 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :218
|
||||
4496526236953526:4496526240608640 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :219
|
||||
4496526240618608:4496526245336946 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :222
|
||||
4496526257032562:4496526260697764 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :223
|
||||
4496526260706901:4496526266068673 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :226
|
||||
4496526279332747:4496526283108167 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :227
|
||||
4496526283124067:4496526287796549 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :230
|
||||
4496526301304112:4496526305009459 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :231
|
||||
4496526305023315:4496526309749930 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :234
|
||||
4496526322801743:4496526326516899 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :235
|
||||
4496526326534903:4496526331245847 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :238
|
||||
4496526340107579:4496526343906984 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :239
|
||||
4496526343924247:4496526348732695 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :242
|
||||
4496526360672632:4496526364342794 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :243
|
||||
4496526364356309:4496526369073956 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :246
|
||||
4496526380921729:4496526384554339 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :247
|
||||
4496526384564297:4496526389280813 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :250
|
||||
4496526401055448:4496526404706814 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :251
|
||||
4496526404716291:4496526409453775 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :254
|
||||
4496526421231786:4496526424938237 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :255
|
||||
4496526424947284:4496526429644452 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :258
|
||||
4496526442708689:4496526446413777 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :259
|
||||
4496526446426842:4496526451217265 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :262
|
||||
4496526463698402:4496526467373874 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :263
|
||||
4496526467389634:4496526472081051 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :266
|
||||
4496526484017822:4496526487658527 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :267
|
||||
4496526487672484:4496526492365545 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :270
|
||||
4496526504217596:4496526507940206 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :271
|
||||
4496526507952269:4496526512732904 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :274
|
||||
4496526524481750:4496526528181919 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :275
|
||||
4496526528194643:4496526532912630 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :278
|
||||
4496526544545337:4496526548219016 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :279
|
||||
4496526548228644:4496526552953164 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :282
|
||||
4496526564603244:4496526568298141 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :283
|
||||
4496526568308411:4496526573022752 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :286
|
||||
4496526584675307:4496526588335099 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :287
|
||||
4496526588344907:4496526593063886 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :290
|
||||
4496526604685893:4496526608347148 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :291
|
||||
4496526608360543:4496526613066458 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :294
|
||||
4496526624712120:4496526628367043 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :295
|
||||
4496526628379787:4496526633084449 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :298
|
||||
4496526644736553:4496526648383901 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :299
|
||||
4496526648395974:4496526653098974 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :302
|
||||
4496526664769242:4496526668420808 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :303
|
||||
4496526668433191:4496526673144235 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :306
|
||||
4496526684807961:4496526689128249 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :307
|
||||
4496526689138038:4496526693842629 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :310
|
||||
4496526706935721:4496526710671858 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :311
|
||||
4496526710682528:4496526715357624 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :314
|
||||
4496526728844507:4496526732497026 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :315
|
||||
4496526732507246:4496526737198843 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :318
|
||||
4496526748886113:4496526752517561 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :319
|
||||
4496526752530505:4496526757248855 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :322
|
||||
4496526768940533:4496526772608911 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :323
|
||||
4496526772622196:4496526777347416 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :326
|
||||
4496526789025531:4496526792698958 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :327
|
||||
4496526792712093:4496526797464554 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :330
|
||||
4496526809125575:4496526812718051 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :331
|
||||
4496526812731336:4496526817481033 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :334
|
||||
4496526829138567:4496526832879692 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :335
|
||||
4496526832891444:4496526837636884 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :338
|
||||
4496526849206473:4496526852946255 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :339
|
||||
4496526852954871:4496526857658531 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :342
|
||||
4496526869245494:4496526873593022 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :343
|
||||
4496526873604183:4496526878299818 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :346
|
||||
4496526891597135:4496526895318503 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :347
|
||||
4496526895329664:4496526900009158 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :350
|
||||
4496526913485803:4496526917173899 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :351
|
||||
4496526917187725:4496526921890463 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :354
|
||||
4496526934966763:4496526938716654 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :355
|
||||
4496526938729629:4496526943439361 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :358
|
||||
4496526957411730:4496526961125205 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :359
|
||||
4496526961138059:4496526965826390 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :362
|
||||
4496526978879125:4496526982620470 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :363
|
||||
4496526982632643:4496526987311768 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :366
|
||||
4496527000778785:4496527004436191 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :367
|
||||
4496527004448955:4496527009161343 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :370
|
||||
4496527020723638:4496527024395191 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :371
|
||||
4496527024404639:4496527029129138 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :374
|
||||
4496527040699580:4496527044353630 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :375
|
||||
4496527044363048:4496527049060306 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :378
|
||||
4496527060632609:4496527064307921 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :379
|
||||
4496527064321907:4496527069027702 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :382
|
||||
4496527080621436:4496527084288380 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :383
|
||||
4496527084301195:4496527089011037 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :386
|
||||
4496527100657080:4496527104315719 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :387
|
||||
4496527104328433:4496527109036020 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :390
|
||||
4496527120665231:4496527125002391 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :391
|
||||
4496527125016488:4496527129737942 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :394
|
||||
4496527143046309:4496527146717213 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :395
|
||||
4496527146730868:4496527151451590 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :398
|
||||
4496527163132549:4496527166937384 880592:880592 hipMemcpy(dst=0x7f14c3000000, src=0x7f14c3cff010, sizeBytes=4194304, kind=1) :399
|
||||
4496527166947223:4496527171616068 880592:880592 hipMemcpy(dst=0x7f14c38fe010, src=0x7f14c2800000, sizeBytes=4194304, kind=2) :402
|
||||
4496527185123250:4496527185153196 880592:880592 hipFree(ptr=0x7f14c3000000) :403
|
||||
4496527185154519:4496527185168716 880592:880592 hipFree(ptr=0x7f14c2800000) :404
|
||||
@@ -0,0 +1,274 @@
|
||||
+ ROCP_CTRL_RATE=10:100000:1000000 ./test/MatrixTranspose
|
||||
ROCTracer (pid=1983):
|
||||
ROCTracer: trace control: delay(10us), length(100000us), rate(1000000us)
|
||||
3802699747119708
|
||||
HIP-trace()
|
||||
Device name Device 687f
|
||||
## Iteration (99) #################
|
||||
PASSED!
|
||||
## Iteration (98) #################
|
||||
PASSED!
|
||||
## Iteration (97) #################
|
||||
PASSED!
|
||||
## Iteration (96) #################
|
||||
PASSED!
|
||||
## Iteration (95) #################
|
||||
PASSED!
|
||||
## Iteration (94) #################
|
||||
PASSED!
|
||||
## Iteration (93) #################
|
||||
PASSED!
|
||||
## Iteration (92) #################
|
||||
PASSED!
|
||||
## Iteration (91) #################
|
||||
PASSED!
|
||||
## Iteration (90) #################
|
||||
PASSED!
|
||||
## Iteration (89) #################
|
||||
PASSED!
|
||||
## Iteration (88) #################
|
||||
PASSED!
|
||||
## Iteration (87) #################
|
||||
PASSED!
|
||||
## Iteration (86) #################
|
||||
PASSED!
|
||||
## Iteration (85) #################
|
||||
PASSED!
|
||||
## Iteration (84) #################
|
||||
PASSED!
|
||||
## Iteration (83) #################
|
||||
PASSED!
|
||||
## Iteration (82) #################
|
||||
PASSED!
|
||||
## Iteration (81) #################
|
||||
PASSED!
|
||||
## Iteration (80) #################
|
||||
PASSED!
|
||||
## Iteration (79) #################
|
||||
PASSED!
|
||||
## Iteration (78) #################
|
||||
PASSED!
|
||||
## Iteration (77) #################
|
||||
PASSED!
|
||||
## Iteration (76) #################
|
||||
PASSED!
|
||||
## Iteration (75) #################
|
||||
PASSED!
|
||||
## Iteration (74) #################
|
||||
PASSED!
|
||||
## Iteration (73) #################
|
||||
PASSED!
|
||||
## Iteration (72) #################
|
||||
PASSED!
|
||||
## Iteration (71) #################
|
||||
PASSED!
|
||||
## Iteration (70) #################
|
||||
PASSED!
|
||||
## Iteration (69) #################
|
||||
PASSED!
|
||||
## Iteration (68) #################
|
||||
PASSED!
|
||||
## Iteration (67) #################
|
||||
PASSED!
|
||||
## Iteration (66) #################
|
||||
PASSED!
|
||||
## Iteration (65) #################
|
||||
PASSED!
|
||||
## Iteration (64) #################
|
||||
PASSED!
|
||||
## Iteration (63) #################
|
||||
PASSED!
|
||||
## Iteration (62) #################
|
||||
PASSED!
|
||||
## Iteration (61) #################
|
||||
PASSED!
|
||||
## Iteration (60) #################
|
||||
PASSED!
|
||||
## Iteration (59) #################
|
||||
PASSED!
|
||||
## Iteration (58) #################
|
||||
PASSED!
|
||||
## Iteration (57) #################
|
||||
PASSED!
|
||||
## Iteration (56) #################
|
||||
PASSED!
|
||||
## Iteration (55) #################
|
||||
PASSED!
|
||||
## Iteration (54) #################
|
||||
PASSED!
|
||||
## Iteration (53) #################
|
||||
PASSED!
|
||||
## Iteration (52) #################
|
||||
PASSED!
|
||||
## Iteration (51) #################
|
||||
PASSED!
|
||||
## Iteration (50) #################
|
||||
PASSED!
|
||||
## Iteration (49) #################
|
||||
PASSED!
|
||||
## Iteration (48) #################
|
||||
PASSED!
|
||||
## Iteration (47) #################
|
||||
PASSED!
|
||||
## Iteration (46) #################
|
||||
PASSED!
|
||||
## Iteration (45) #################
|
||||
PASSED!
|
||||
## Iteration (44) #################
|
||||
PASSED!
|
||||
## Iteration (43) #################
|
||||
PASSED!
|
||||
## Iteration (42) #################
|
||||
PASSED!
|
||||
## Iteration (41) #################
|
||||
PASSED!
|
||||
## Iteration (40) #################
|
||||
PASSED!
|
||||
## Iteration (39) #################
|
||||
PASSED!
|
||||
## Iteration (38) #################
|
||||
PASSED!
|
||||
## Iteration (37) #################
|
||||
PASSED!
|
||||
## Iteration (36) #################
|
||||
PASSED!
|
||||
## Iteration (35) #################
|
||||
PASSED!
|
||||
## Iteration (34) #################
|
||||
PASSED!
|
||||
## Iteration (33) #################
|
||||
PASSED!
|
||||
## Iteration (32) #################
|
||||
PASSED!
|
||||
## Iteration (31) #################
|
||||
PASSED!
|
||||
## Iteration (30) #################
|
||||
PASSED!
|
||||
## Iteration (29) #################
|
||||
PASSED!
|
||||
## Iteration (28) #################
|
||||
PASSED!
|
||||
## Iteration (27) #################
|
||||
PASSED!
|
||||
## Iteration (26) #################
|
||||
PASSED!
|
||||
## Iteration (25) #################
|
||||
PASSED!
|
||||
## Iteration (24) #################
|
||||
PASSED!
|
||||
## Iteration (23) #################
|
||||
PASSED!
|
||||
## Iteration (22) #################
|
||||
PASSED!
|
||||
## Iteration (21) #################
|
||||
PASSED!
|
||||
## Iteration (20) #################
|
||||
PASSED!
|
||||
## Iteration (19) #################
|
||||
PASSED!
|
||||
## Iteration (18) #################
|
||||
PASSED!
|
||||
## Iteration (17) #################
|
||||
PASSED!
|
||||
## Iteration (16) #################
|
||||
PASSED!
|
||||
## Iteration (15) #################
|
||||
PASSED!
|
||||
## Iteration (14) #################
|
||||
PASSED!
|
||||
## Iteration (13) #################
|
||||
PASSED!
|
||||
## Iteration (12) #################
|
||||
PASSED!
|
||||
## Iteration (11) #################
|
||||
PASSED!
|
||||
## Iteration (10) #################
|
||||
PASSED!
|
||||
## Iteration (9) #################
|
||||
PASSED!
|
||||
## Iteration (8) #################
|
||||
PASSED!
|
||||
## Iteration (7) #################
|
||||
PASSED!
|
||||
## Iteration (6) #################
|
||||
PASSED!
|
||||
## Iteration (5) #################
|
||||
PASSED!
|
||||
## Iteration (4) #################
|
||||
PASSED!
|
||||
## Iteration (3) #################
|
||||
PASSED!
|
||||
## Iteration (2) #################
|
||||
PASSED!
|
||||
## Iteration (1) #################
|
||||
PASSED!
|
||||
## Iteration (0) #################
|
||||
PASSED!
|
||||
3802699751533941:3802699751541991 1983:1983 hipGetDevicePropertiesR0600(props=, device=0)
|
||||
3802699752571489:3802699752686289 1983:1983 hipMalloc(ptr=0x7f6c121ff010, size=4194304)
|
||||
3802699752688639:3802699752749390 1983:1983 hipMalloc(ptr=0x7fffefcadf28, size=4194304)
|
||||
3802699752763840:3802700027958750 1983:1983 hipMemcpy(dst=0x7f6c11400000, src=0x7f6c121ff010, sizeBytes=4194304, kind=1)
|
||||
3802700932447414:3802700934135107 1983:1983 hipMemcpy(dst=0x7f6c11400000, src=0x7f6c121ff010, sizeBytes=4194304, kind=1)
|
||||
3802700934143817:3802700934144527 1983:1983 __hipPushCallConfiguration(gridDim=, blockDim=, sharedMem=0, stream=0)
|
||||
3802700934146607:3802700934147267 1983:1983 __hipPopCallConfiguration(gridDim=, blockDim=, sharedMem=140106682958042, stream=0xd8282e03f3099)
|
||||
3802700934158787:3802700934164967 1983:1983 hipLaunchKernel(function_address=0x401030, numBlocks=, dimBlocks=, args=0x3b9aca00, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int)
|
||||
3802700934192847:3802700936775947 1983:1983 hipMemcpy(dst=0x7f6c11dfe010, src=0x7f6c10e00000, sizeBytes=4194304, kind=2)
|
||||
3802700943795998:3802700945501111 1983:1983 hipMemcpy(dst=0x7f6c11400000, src=0x7f6c121ff010, sizeBytes=4194304, kind=1)
|
||||
3802700945517031:3802700945517901 1983:1983 __hipPushCallConfiguration(gridDim=, blockDim=, sharedMem=0, stream=0)
|
||||
3802700945519841:3802700945520521 1983:1983 __hipPopCallConfiguration(gridDim=, blockDim=, sharedMem=140106682958042, stream=0xd8282e0ecbb86)
|
||||
3802700945522671:3802700945530171 1983:1983 hipLaunchKernel(function_address=0x401030, numBlocks=, dimBlocks=, args=0x3b9aca00, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int)
|
||||
3802700945534701:3802700948131020 1983:1983 hipMemcpy(dst=0x7f6c11dfe010, src=0x7f6c10e00000, sizeBytes=4194304, kind=2)
|
||||
3802700955136442:3802700956839355 1983:1983 hipMemcpy(dst=0x7f6c11400000, src=0x7f6c121ff010, sizeBytes=4194304, kind=1)
|
||||
3802700956847725:3802700956848495 1983:1983 __hipPushCallConfiguration(gridDim=, blockDim=, sharedMem=0, stream=0)
|
||||
3802700956850235:3802700956850825 1983:1983 __hipPopCallConfiguration(gridDim=, blockDim=, sharedMem=140106682958042, stream=0xd8282e1999f61)
|
||||
3802700956860545:3802700956868795 1983:1983 hipLaunchKernel(function_address=0x401030, numBlocks=, dimBlocks=, args=0x3b9aca00, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int)
|
||||
3802700956872065:3802700959479235 1983:1983 hipMemcpy(dst=0x7f6c11dfe010, src=0x7f6c10e00000, sizeBytes=4194304, kind=2)
|
||||
3802700966505397:3802700968203670 1983:1983 hipMemcpy(dst=0x7f6c11400000, src=0x7f6c121ff010, sizeBytes=4194304, kind=1)
|
||||
3802700968219030:3802700968219770 1983:1983 __hipPushCallConfiguration(gridDim=, blockDim=, sharedMem=0, stream=0)
|
||||
3802700968221700:3802700968222280 1983:1983 __hipPopCallConfiguration(gridDim=, blockDim=, sharedMem=140106682958042, stream=0xd8282e247222e)
|
||||
3802700968225090:3802700968233560 1983:1983 hipLaunchKernel(function_address=0x401030, numBlocks=, dimBlocks=, args=0x3b9aca00, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int)
|
||||
3802700968241120:3802700970853059 1983:1983 hipMemcpy(dst=0x7f6c11dfe010, src=0x7f6c10e00000, sizeBytes=4194304, kind=2)
|
||||
3802700977859821:3802700979559833 1983:1983 hipMemcpy(dst=0x7f6c11400000, src=0x7f6c121ff010, sizeBytes=4194304, kind=1)
|
||||
3802700979567803:3802700979568553 1983:1983 __hipPushCallConfiguration(gridDim=, blockDim=, sharedMem=0, stream=0)
|
||||
3802700979570433:3802700979571073 1983:1983 __hipPopCallConfiguration(gridDim=, blockDim=, sharedMem=140106682958042, stream=0xd8282e2f44d18)
|
||||
3802700979581243:3802700979589274 1983:1983 hipLaunchKernel(function_address=0x401030, numBlocks=, dimBlocks=, args=0x3b9aca00, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int)
|
||||
3802700979592044:3802700982222943 1983:1983 hipMemcpy(dst=0x7f6c11dfe010, src=0x7f6c10e00000, sizeBytes=4194304, kind=2)
|
||||
3802700989239045:3802700990944838 1983:1983 hipMemcpy(dst=0x7f6c11400000, src=0x7f6c121ff010, sizeBytes=4194304, kind=1)
|
||||
3802700990960008:3802700990960828 1983:1983 __hipPushCallConfiguration(gridDim=, blockDim=, sharedMem=0, stream=0)
|
||||
3802700990963068:3802700990963638 1983:1983 __hipPopCallConfiguration(gridDim=, blockDim=, sharedMem=140106682958042, stream=0xd8282e3a221d9)
|
||||
3802700990966328:3802700990975628 1983:1983 hipLaunchKernel(function_address=0x401030, numBlocks=, dimBlocks=, args=0x3b9aca00, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int)
|
||||
3802700990978718:3802700993694078 1983:1983 hipMemcpy(dst=0x7f6c11dfe010, src=0x7f6c10e00000, sizeBytes=4194304, kind=2)
|
||||
3802701000919212:3802701002625515 1983:1983 hipMemcpy(dst=0x7f6c11400000, src=0x7f6c121ff010, sizeBytes=4194304, kind=1)
|
||||
3802701002633405:3802701002634215 1983:1983 __hipPushCallConfiguration(gridDim=, blockDim=, sharedMem=0, stream=0)
|
||||
3802701002635935:3802701002636515 1983:1983 __hipPopCallConfiguration(gridDim=, blockDim=, sharedMem=140106682958042, stream=0xd8282e45440c4)
|
||||
3802701002649885:3802701002657855 1983:1983 hipLaunchKernel(function_address=0x401030, numBlocks=, dimBlocks=, args=0x3b9aca00, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int)
|
||||
3802701002660835:3802701005267024 1983:1983 hipMemcpy(dst=0x7f6c11dfe010, src=0x7f6c10e00000, sizeBytes=4194304, kind=2)
|
||||
3802701012322026:3802701014008789 1983:1983 hipMemcpy(dst=0x7f6c11400000, src=0x7f6c121ff010, sizeBytes=4194304, kind=1)
|
||||
3802701014023469:3802701014024239 1983:1983 __hipPushCallConfiguration(gridDim=, blockDim=, sharedMem=0, stream=0)
|
||||
3802701014028089:3802701014028669 1983:1983 __hipPopCallConfiguration(gridDim=, blockDim=, sharedMem=140106682958042, stream=0xd8282e5020cc5)
|
||||
3802701014031569:3802701014039849 1983:1983 hipLaunchKernel(function_address=0x401030, numBlocks=, dimBlocks=, args=0x3b9aca00, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int)
|
||||
3802701014042919:3802701016640288 1983:1983 hipMemcpy(dst=0x7f6c11dfe010, src=0x7f6c10e00000, sizeBytes=4194304, kind=2)
|
||||
3802701023688501:3802701025398903 1983:1983 hipMemcpy(dst=0x7f6c11400000, src=0x7f6c121ff010, sizeBytes=4194304, kind=1)
|
||||
3802701025407454:3802701025408214 1983:1983 __hipPushCallConfiguration(gridDim=, blockDim=, sharedMem=0, stream=0)
|
||||
3802701025410224:3802701025411104 1983:1983 __hipPopCallConfiguration(gridDim=, blockDim=, sharedMem=140106682958042, stream=0xd8282e5afc125)
|
||||
3802701025412944:3802701025420534 1983:1983 hipLaunchKernel(function_address=0x401030, numBlocks=, dimBlocks=, args=0x3b9aca00, sharedMemBytes=0, stream=0) kernel=matrixTranspose(float*, float*, int)
|
||||
3802701025431374:3802701028050563 1983:1983 hipMemcpy(dst=0x7f6c11dfe010, src=0x7f6c10e00000, sizeBytes=4194304, kind=2)
|
||||
3802700025923715:3802700027953920 0:0 CopyHostToDevice:4:1983
|
||||
3802700932468645:3802700934131397 0:0 CopyHostToDevice:159:1983
|
||||
3802700934202858:3802700936764597 0:0 CopyDeviceToHost:165:1983
|
||||
3802700943841248:3802700945497221 0:0 CopyHostToDevice:166:1983
|
||||
3802700945569841:3802700948120440 0:0 CopyDeviceToHost:172:1983
|
||||
3802700955175473:3802700956835555 0:0 CopyHostToDevice:173:1983
|
||||
3802700956907066:3802700959467615 0:0 CopyDeviceToHost:179:1983
|
||||
3802700966543517:3802700968200020 0:0 CopyHostToDevice:180:1983
|
||||
3802700968270720:3802700970841439 0:0 CopyDeviceToHost:186:1983
|
||||
3802700977897221:3802700979556403 0:0 CopyHostToDevice:187:1983
|
||||
3802700979628944:3802700982210583 0:0 CopyDeviceToHost:193:1983
|
||||
3802700989276246:3802700990941188 0:0 CopyHostToDevice:194:1983
|
||||
3802700991012848:3802700993682128 0:0 CopyDeviceToHost:200:1983
|
||||
3802701000959152:3802701002622075 0:0 CopyHostToDevice:201:1983
|
||||
3802701002693645:3802701005254464 0:0 CopyDeviceToHost:207:1983
|
||||
3802701012346926:3802701014005359 0:0 CopyHostToDevice:208:1983
|
||||
3802701014077439:3802701016629358 0:0 CopyDeviceToHost:214:1983
|
||||
3802701023726221:3802701025394963 0:0 CopyHostToDevice:215:1983
|
||||
3802701025467214:3802701028039843 0:0 CopyDeviceToHost:221:1983
|
||||
+2769
Tá difríocht comhad cosc orthu toisc go bhfuil sé ró-mhór
Difríocht Luchtaigh
@@ -0,0 +1,205 @@
|
||||
+ LD_PRELOAD=libkfdwrapper64.so ./test/MatrixTranspose
|
||||
ROCTracer (pid=1963):
|
||||
KFD-trace()
|
||||
3802694735152956
|
||||
Device name Device 687f
|
||||
## Iteration (99) #################
|
||||
PASSED!
|
||||
## Iteration (98) #################
|
||||
PASSED!
|
||||
## Iteration (97) #################
|
||||
PASSED!
|
||||
## Iteration (96) #################
|
||||
PASSED!
|
||||
## Iteration (95) #################
|
||||
PASSED!
|
||||
## Iteration (94) #################
|
||||
PASSED!
|
||||
## Iteration (93) #################
|
||||
PASSED!
|
||||
## Iteration (92) #################
|
||||
PASSED!
|
||||
## Iteration (91) #################
|
||||
PASSED!
|
||||
## Iteration (90) #################
|
||||
PASSED!
|
||||
## Iteration (89) #################
|
||||
PASSED!
|
||||
## Iteration (88) #################
|
||||
PASSED!
|
||||
## Iteration (87) #################
|
||||
PASSED!
|
||||
## Iteration (86) #################
|
||||
PASSED!
|
||||
## Iteration (85) #################
|
||||
PASSED!
|
||||
## Iteration (84) #################
|
||||
PASSED!
|
||||
## Iteration (83) #################
|
||||
PASSED!
|
||||
## Iteration (82) #################
|
||||
PASSED!
|
||||
## Iteration (81) #################
|
||||
PASSED!
|
||||
## Iteration (80) #################
|
||||
PASSED!
|
||||
## Iteration (79) #################
|
||||
PASSED!
|
||||
## Iteration (78) #################
|
||||
PASSED!
|
||||
## Iteration (77) #################
|
||||
PASSED!
|
||||
## Iteration (76) #################
|
||||
PASSED!
|
||||
## Iteration (75) #################
|
||||
PASSED!
|
||||
## Iteration (74) #################
|
||||
PASSED!
|
||||
## Iteration (73) #################
|
||||
PASSED!
|
||||
## Iteration (72) #################
|
||||
PASSED!
|
||||
## Iteration (71) #################
|
||||
PASSED!
|
||||
## Iteration (70) #################
|
||||
PASSED!
|
||||
## Iteration (69) #################
|
||||
PASSED!
|
||||
## Iteration (68) #################
|
||||
PASSED!
|
||||
## Iteration (67) #################
|
||||
PASSED!
|
||||
## Iteration (66) #################
|
||||
PASSED!
|
||||
## Iteration (65) #################
|
||||
PASSED!
|
||||
## Iteration (64) #################
|
||||
PASSED!
|
||||
## Iteration (63) #################
|
||||
PASSED!
|
||||
## Iteration (62) #################
|
||||
PASSED!
|
||||
## Iteration (61) #################
|
||||
PASSED!
|
||||
## Iteration (60) #################
|
||||
PASSED!
|
||||
## Iteration (59) #################
|
||||
PASSED!
|
||||
## Iteration (58) #################
|
||||
PASSED!
|
||||
## Iteration (57) #################
|
||||
PASSED!
|
||||
## Iteration (56) #################
|
||||
PASSED!
|
||||
## Iteration (55) #################
|
||||
PASSED!
|
||||
## Iteration (54) #################
|
||||
PASSED!
|
||||
## Iteration (53) #################
|
||||
PASSED!
|
||||
## Iteration (52) #################
|
||||
PASSED!
|
||||
## Iteration (51) #################
|
||||
PASSED!
|
||||
## Iteration (50) #################
|
||||
PASSED!
|
||||
## Iteration (49) #################
|
||||
PASSED!
|
||||
## Iteration (48) #################
|
||||
PASSED!
|
||||
## Iteration (47) #################
|
||||
PASSED!
|
||||
## Iteration (46) #################
|
||||
PASSED!
|
||||
## Iteration (45) #################
|
||||
PASSED!
|
||||
## Iteration (44) #################
|
||||
PASSED!
|
||||
## Iteration (43) #################
|
||||
PASSED!
|
||||
## Iteration (42) #################
|
||||
PASSED!
|
||||
## Iteration (41) #################
|
||||
PASSED!
|
||||
## Iteration (40) #################
|
||||
PASSED!
|
||||
## Iteration (39) #################
|
||||
PASSED!
|
||||
## Iteration (38) #################
|
||||
PASSED!
|
||||
## Iteration (37) #################
|
||||
PASSED!
|
||||
## Iteration (36) #################
|
||||
PASSED!
|
||||
## Iteration (35) #################
|
||||
PASSED!
|
||||
## Iteration (34) #################
|
||||
PASSED!
|
||||
## Iteration (33) #################
|
||||
PASSED!
|
||||
## Iteration (32) #################
|
||||
PASSED!
|
||||
## Iteration (31) #################
|
||||
PASSED!
|
||||
## Iteration (30) #################
|
||||
PASSED!
|
||||
## Iteration (29) #################
|
||||
PASSED!
|
||||
## Iteration (28) #################
|
||||
PASSED!
|
||||
## Iteration (27) #################
|
||||
PASSED!
|
||||
## Iteration (26) #################
|
||||
PASSED!
|
||||
## Iteration (25) #################
|
||||
PASSED!
|
||||
## Iteration (24) #################
|
||||
PASSED!
|
||||
## Iteration (23) #################
|
||||
PASSED!
|
||||
## Iteration (22) #################
|
||||
PASSED!
|
||||
## Iteration (21) #################
|
||||
PASSED!
|
||||
## Iteration (20) #################
|
||||
PASSED!
|
||||
## Iteration (19) #################
|
||||
PASSED!
|
||||
## Iteration (18) #################
|
||||
PASSED!
|
||||
## Iteration (17) #################
|
||||
PASSED!
|
||||
## Iteration (16) #################
|
||||
PASSED!
|
||||
## Iteration (15) #################
|
||||
PASSED!
|
||||
## Iteration (14) #################
|
||||
PASSED!
|
||||
## Iteration (13) #################
|
||||
PASSED!
|
||||
## Iteration (12) #################
|
||||
PASSED!
|
||||
## Iteration (11) #################
|
||||
PASSED!
|
||||
## Iteration (10) #################
|
||||
PASSED!
|
||||
## Iteration (9) #################
|
||||
PASSED!
|
||||
## Iteration (8) #################
|
||||
PASSED!
|
||||
## Iteration (7) #################
|
||||
PASSED!
|
||||
## Iteration (6) #################
|
||||
PASSED!
|
||||
## Iteration (5) #################
|
||||
PASSED!
|
||||
## Iteration (4) #################
|
||||
PASSED!
|
||||
## Iteration (3) #################
|
||||
PASSED!
|
||||
## Iteration (2) #################
|
||||
PASSED!
|
||||
## Iteration (1) #################
|
||||
PASSED!
|
||||
## Iteration (0) #################
|
||||
PASSED!
|
||||
Tá difríocht comhad cosc orthu toisc go bhfuil sé ró-mhór
Difríocht Luchtaigh
Tá difríocht comhad cosc orthu toisc go bhfuil sé ró-mhór
Difríocht Luchtaigh
Tá difríocht comhad cosc orthu toisc go bhfuil sé ró-mhór
Difríocht Luchtaigh
Tá difríocht comhad cosc orthu toisc go bhfuil sé ró-mhór
Difríocht Luchtaigh
@@ -0,0 +1,65 @@
|
||||
<hipSetDevice id(186) correlation_id(1) on-enter pid(877336) tid(877336)>
|
||||
<hipSetDevice id(186) correlation_id(1) on-exit pid(877336) tid(877336)>
|
||||
<__hipPushCallConfiguration id(2) correlation_id(2) on-enter pid(877336) tid(877336)>
|
||||
<__hipPushCallConfiguration id(2) correlation_id(2) on-exit pid(877336) tid(877336)>
|
||||
<__hipPopCallConfiguration id(1) correlation_id(3) on-enter pid(877336) tid(877336)>
|
||||
<__hipPopCallConfiguration id(1) correlation_id(3) on-exit pid(877336) tid(877336)>
|
||||
<hipLaunchKernel id(107) correlation_id(4) on-enter pid(877336) tid(877336)>
|
||||
<hipLaunchKernel id(107) correlation_id(4) on-exit pid(877336) tid(877336)>
|
||||
<hipDeviceSynchronize id(48) correlation_id(5) on-enter pid(877336) tid(877336)>
|
||||
<hipDeviceSynchronize id(48) correlation_id(5) on-exit pid(877336) tid(877336)>
|
||||
<hipSetDevice id(186) correlation_id(6) on-enter pid(877336) tid(877336)>
|
||||
<hipSetDevice id(186) correlation_id(6) on-exit pid(877336) tid(877336)>
|
||||
<__hipPushCallConfiguration id(2) correlation_id(7) on-enter pid(877336) tid(877336)>
|
||||
<__hipPushCallConfiguration id(2) correlation_id(7) on-exit pid(877336) tid(877336)>
|
||||
<__hipPopCallConfiguration id(1) correlation_id(8) on-enter pid(877336) tid(877336)>
|
||||
<__hipPopCallConfiguration id(1) correlation_id(8) on-exit pid(877336) tid(877336)>
|
||||
<hipLaunchKernel id(107) correlation_id(9) on-enter pid(877336) tid(877336)>
|
||||
<hipLaunchKernel id(107) correlation_id(9) on-exit pid(877336) tid(877336)>
|
||||
<hipDeviceSynchronize id(48) correlation_id(10) on-enter pid(877336) tid(877336)>
|
||||
<hipDeviceSynchronize id(48) correlation_id(10) on-exit pid(877336) tid(877336)>
|
||||
hipSetDevice correlation_id(6) time_ns(861794298279896:861794298283613)
|
||||
__hipPushCallConfiguration correlation_id(7) time_ns(861794298290125:861794298293211)
|
||||
__hipPopCallConfiguration correlation_id(8) time_ns(861794298293903:861794298295325)
|
||||
hipLaunchKernel correlation_id(9) time_ns(861794298296377:861794298313029)
|
||||
hipDeviceSynchronize correlation_id(10) time_ns(861794298313470:861794298331113)
|
||||
hipSetDevice correlation_id(11) time_ns(861794298565986:861794298566277)
|
||||
__hipPushCallConfiguration correlation_id(12) time_ns(861794298566738:861794298567148)
|
||||
__hipPopCallConfiguration correlation_id(13) time_ns(861794298567569:861794298568010)
|
||||
hipLaunchKernel correlation_id(14) time_ns(861794298568391:861794298577638)
|
||||
hipDeviceSynchronize correlation_id(15) time_ns(861794298578069:861794298594841)
|
||||
<hipSetDevice id(186) correlation_id(16) on-enter pid(877336) tid(877336)>
|
||||
<hipSetDevice id(186) correlation_id(16) on-exit pid(877336) tid(877336)>
|
||||
<__hipPushCallConfiguration id(2) correlation_id(17) on-enter pid(877336) tid(877336)>
|
||||
<__hipPushCallConfiguration id(2) correlation_id(17) on-exit pid(877336) tid(877336)>
|
||||
<__hipPopCallConfiguration id(1) correlation_id(18) on-enter pid(877336) tid(877336)>
|
||||
<__hipPopCallConfiguration id(1) correlation_id(18) on-exit pid(877336) tid(877336)>
|
||||
<hipLaunchKernel id(107) correlation_id(19) on-enter pid(877336) tid(877336)>
|
||||
<hipLaunchKernel id(107) correlation_id(19) on-exit pid(877336) tid(877336)>
|
||||
<hipDeviceSynchronize id(48) correlation_id(20) on-enter pid(877336) tid(877336)>
|
||||
<hipDeviceSynchronize id(48) correlation_id(20) on-exit pid(877336) tid(877336)>
|
||||
<hipSetDevice id(186) correlation_id(21) on-enter pid(877336) tid(877336)>
|
||||
<hipSetDevice id(186) correlation_id(21) on-exit pid(877336) tid(877336)>
|
||||
<__hipPushCallConfiguration id(2) correlation_id(22) on-enter pid(877336) tid(877336)>
|
||||
<__hipPushCallConfiguration id(2) correlation_id(22) on-exit pid(877336) tid(877336)>
|
||||
<__hipPopCallConfiguration id(1) correlation_id(23) on-enter pid(877336) tid(877336)>
|
||||
<__hipPopCallConfiguration id(1) correlation_id(23) on-exit pid(877336) tid(877336)>
|
||||
<hipLaunchKernel id(107) correlation_id(24) on-enter pid(877336) tid(877336)>
|
||||
<hipLaunchKernel id(107) correlation_id(24) on-exit pid(877336) tid(877336)>
|
||||
<hipDeviceSynchronize id(48) correlation_id(25) on-enter pid(877336) tid(877336)>
|
||||
<hipDeviceSynchronize id(48) correlation_id(25) on-exit pid(877336) tid(877336)>
|
||||
hipSetDevice correlation_id(21) time_ns(861794299364583:861794299365585)
|
||||
__hipPushCallConfiguration correlation_id(22) time_ns(861794299366106:861794299367329)
|
||||
__hipPopCallConfiguration correlation_id(23) time_ns(861794299367830:861794299369082)
|
||||
hipLaunchKernel correlation_id(24) time_ns(861794299369523:861794299377227)
|
||||
hipDeviceSynchronize correlation_id(25) time_ns(861794299377748:861794299394730)
|
||||
<hipSetDevice id(186) correlation_id(26) on-enter pid(877336) tid(877336)>
|
||||
<hipSetDevice id(186) correlation_id(26) on-exit pid(877336) tid(877336)>
|
||||
<__hipPushCallConfiguration id(2) correlation_id(27) on-enter pid(877336) tid(877336)>
|
||||
<__hipPushCallConfiguration id(2) correlation_id(27) on-exit pid(877336) tid(877336)>
|
||||
<__hipPopCallConfiguration id(1) correlation_id(28) on-enter pid(877336) tid(877336)>
|
||||
<__hipPopCallConfiguration id(1) correlation_id(28) on-exit pid(877336) tid(877336)>
|
||||
<hipLaunchKernel id(107) correlation_id(29) on-enter pid(877336) tid(877336)>
|
||||
<hipLaunchKernel id(107) correlation_id(29) on-exit pid(877336) tid(877336)>
|
||||
<hipDeviceSynchronize id(48) correlation_id(30) on-enter pid(877336) tid(877336)>
|
||||
<hipDeviceSynchronize id(48) correlation_id(30) on-exit pid(877336) tid(877336)>
|
||||
@@ -0,0 +1,16 @@
|
||||
ROCTracer (pid=882619): input from "test/input.xml"
|
||||
0x560905682a90 agent cpu
|
||||
0x560905685470 agent gpu
|
||||
0x5609056bd500 agent gpu
|
||||
538167097046266
|
||||
HSA-trace( hsa_agent_get_info hsa_amd_memory_pool_allocate)
|
||||
HSA-activity-trace()
|
||||
538167097315584:538167097318359 882619:882619 hsa_agent_get_info({handle=94596745407120}, 0, 0x7fffa4fa77a8) = 0
|
||||
538167097322808:538167097323088 882619:882619 hsa_agent_get_info({handle=94596745407120}, 17, 0x7fffa4fa7790) = 0
|
||||
538167097333378:538167097334620 882619:882619 hsa_agent_get_info({handle=94596745417840}, 0, 0x7fffa4fa77a8) = 0
|
||||
538167097335141:538167097335411 882619:882619 hsa_agent_get_info({handle=94596745417840}, 17, 0x7fffa4fa7790) = 0
|
||||
538167097338377:538167097338798 882619:882619 hsa_agent_get_info({handle=94596745647360}, 0, 0x7fffa4fa77a8) = 0
|
||||
538167097339319:538167097339569 882619:882619 hsa_agent_get_info({handle=94596745647360}, 17, 0x7fffa4fa7790) = 0
|
||||
538167119333344:538167119348423 882619:882622 hsa_amd_memory_pool_allocate({handle=94596745407920}, 72, 0, 0x7f7d8f7f9c30) = 0
|
||||
538167119471866:538167119484039 882619:882622 hsa_amd_memory_pool_allocate({handle=94596745407920}, 256, 0, 0x7f7d8f7f9c30) = 0
|
||||
538167119592754:538167119603945 882619:882622 hsa_amd_memory_pool_allocate({handle=94596745407920}, 256, 0, 0x7f7d8f7f9c30) = 0
|
||||
@@ -0,0 +1,68 @@
|
||||
ROCTracer (pid=566828):
|
||||
0x55e1b9d507c0 agent cpu
|
||||
0x55e1b9d4eeb0 agent gpu
|
||||
0x55e1b9d8b540 agent gpu
|
||||
975779239309496
|
||||
HSA-trace()
|
||||
HSA-activity-trace()
|
||||
975779240024464:975779240024815 566828:566828 hsa_agent_get_info(, 0, 0x7ffdd25cc9a8) = 0 :6
|
||||
975779240029173:975779240029274 566828:566828 hsa_agent_get_info(, 17, 0x7ffdd25cc990) = 0 :7
|
||||
975779240035816:975779240036187 566828:566828 hsa_amd_memory_pool_get_info(, 0, 0x7ffdd25cc6bc) = 0 :9
|
||||
975779240036667:975779240036768 566828:566828 hsa_amd_memory_pool_get_info(, 1, 0x7ffdd25cc6c0) = 0 :10
|
||||
975779240037219:975779240037319 566828:566828 hsa_amd_memory_pool_get_info(, 2, 0x7ffdd25cc6e0) = 0 :11
|
||||
975779240037760:975779240037860 566828:566828 hsa_amd_memory_pool_get_info(, 6, 0x7ffdd25cc6e8) = 0 :12
|
||||
975779240039823:975779240039914 566828:566828 hsa_amd_memory_pool_get_info(, 0, 0x7ffdd25cc6bc) = 0 :13
|
||||
975779240040455:975779240040555 566828:566828 hsa_amd_memory_pool_get_info(, 1, 0x7ffdd25cc6c0) = 0 :14
|
||||
975779240041076:975779240041156 566828:566828 hsa_amd_memory_pool_get_info(, 2, 0x7ffdd25cc6e0) = 0 :15
|
||||
975779240041697:975779240041777 566828:566828 hsa_amd_memory_pool_get_info(, 6, 0x7ffdd25cc6e8) = 0 :16
|
||||
975779240042619:975779240042709 566828:566828 hsa_amd_memory_pool_get_info(, 0, 0x7ffdd25cc6bc) = 0 :17
|
||||
975779240043250:975779240043350 566828:566828 hsa_amd_memory_pool_get_info(, 1, 0x7ffdd25cc6c0) = 0 :18
|
||||
975779240043871:975779240043961 566828:566828 hsa_amd_memory_pool_get_info(, 2, 0x7ffdd25cc6e0) = 0 :19
|
||||
975779240044482:975779240044562 566828:566828 hsa_amd_memory_pool_get_info(, 6, 0x7ffdd25cc6e8) = 0 :20
|
||||
975779240044482:975779240045264 566828:566828 hsa_amd_agent_iterate_memory_pools(, 1, 0x7ffdd25cc9e8) = 0 :8
|
||||
975779240048430:975779240049341 566828:566828 hsa_agent_get_info(, 0, 0x7ffdd25cc9a8) = 0 :21
|
||||
975779240049822:975779240049932 566828:566828 hsa_agent_get_info(, 17, 0x7ffdd25cc990) = 0 :22
|
||||
975779240050654:975779240050744 566828:566828 hsa_amd_memory_pool_get_info(, 0, 0x7ffdd25cc62c) = 0 :24
|
||||
975779240051275:975779240051365 566828:566828 hsa_amd_memory_pool_get_info(, 1, 0x7ffdd25cc630) = 0 :25
|
||||
975779240051786:975779240051866 566828:566828 hsa_amd_memory_pool_get_info(, 2, 0x7ffdd25cc650) = 0 :26
|
||||
975779240052287:975779240052377 566828:566828 hsa_amd_memory_pool_get_info(, 6, 0x7ffdd25cc658) = 0 :27
|
||||
975779240053048:975779240053159 566828:566828 hsa_amd_memory_pool_get_info(, 0, 0x7ffdd25cc62c) = 0 :28
|
||||
975779240053048:975779240053609 566828:566828 hsa_amd_agent_iterate_memory_pools(, 1, 0x7ffdd25cc9e8) = 0 :23
|
||||
975779240055373:975779240055663 566828:566828 hsa_agent_get_info(, 0, 0x7ffdd25cc9a8) = 0 :29
|
||||
975779240056144:975779240056234 566828:566828 hsa_agent_get_info(, 17, 0x7ffdd25cc990) = 0 :30
|
||||
975779240056986:975779240057076 566828:566828 hsa_amd_memory_pool_get_info(, 0, 0x7ffdd25cc62c) = 0 :32
|
||||
975779240057507:975779240057587 566828:566828 hsa_amd_memory_pool_get_info(, 1, 0x7ffdd25cc630) = 0 :33
|
||||
975779240058008:975779240058088 566828:566828 hsa_amd_memory_pool_get_info(, 2, 0x7ffdd25cc650) = 0 :34
|
||||
975779240058509:975779240058589 566828:566828 hsa_amd_memory_pool_get_info(, 6, 0x7ffdd25cc658) = 0 :35
|
||||
975779240061504:975779240061605 566828:566828 hsa_amd_memory_pool_get_info(, 0, 0x7ffdd25cc62c) = 0 :36
|
||||
975779240061504:975779240062035 566828:566828 hsa_amd_agent_iterate_memory_pools(, 1, 0x7ffdd25cc9e8) = 0 :31
|
||||
975779240061504:975779240063528 566828:566828 hsa_iterate_agents(1, 0) = 0 :5
|
||||
975779240167184:975779249865797 566828:566834 hsa_queue_create(, 1024, 1, 0, 0, 0, 0, 0x7f70535fdbc8) = 0 :37
|
||||
975779249901595:975779249990022 566828:566834 hsa_code_object_reader_create_from_file(8, 0x7f70535fdbf8) = 0 :38
|
||||
975779249990984:975779250001383 566828:566834 hsa_executable_create_alt(1, 0, 0x0, 0x7f70535fdc00) = 0 :27
|
||||
975779250002345:975779250430202 566828:566834 hsa_executable_load_agent_code_object(, , , 0x0, 0) = 0 :28
|
||||
975779250432296:975779250926909 566828:566834 hsa_executable_freeze(, 0x0) = 0 :29
|
||||
975779250929755:975779250931207 566828:566834 hsa_executable_get_symbol_by_name(, 0x7f7054002950, 0x7f70535fdb28, 0x7f70535fdb48) = 1013 :2a
|
||||
975779250931979:975779250932690 566828:566834 hsa_executable_get_symbol_by_name(, 0x7f7054003850, 0x7f70535fdb28, 0x7f70535fdb48) = 0 :2b
|
||||
975779250934403:975779250934854 566828:566834 hsa_executable_symbol_get_info(, 22, 0x7f70535fdc10) = 0 :44
|
||||
975779250935425:975779250935536 566828:566834 hsa_executable_symbol_get_info(, 14, 0x7f70535fdc18) = 0 :45
|
||||
975779250936097:975779250936177 566828:566834 hsa_executable_symbol_get_info(, 13, 0x7f70535fdc1c) = 0 :46
|
||||
975779250936728:975779250936798 566828:566834 hsa_executable_symbol_get_info(, 11, 0x7f70535fdc20) = 0 :47
|
||||
975779250937349:975779250937419 566828:566834 hsa_executable_symbol_get_info(, 12, 0x7f70535fdc24) = 0 :48
|
||||
975779250938321:975779250956876 566828:566834 hsa_amd_memory_pool_allocate(, 72, 0, 0x7f70535fdb70) = 0 :49
|
||||
975779250958098:975779251048298 566828:566834 hsa_amd_agents_allow_access(3, 0x55e1b9df4c30, 0, 0x7f7261070000) = 0 :50
|
||||
975779251049150:975779251065531 566828:566834 hsa_amd_memory_pool_allocate(, 256, 0, 0x7f70535fdb70) = 0 :51
|
||||
975779251066232:975779251149319 566828:566834 hsa_amd_agents_allow_access(3, 0x55e1b9df4c30, 0, 0x7f726106e000) = 0 :52
|
||||
975779251150000:975779251165960 566828:566834 hsa_amd_memory_pool_allocate(, 256, 0, 0x7f70535fdb70) = 0 :53
|
||||
975779251166531:975779251256912 566828:566834 hsa_amd_agents_allow_access(3, 0x55e1b9df4c30, 0, 0x7f726106c000) = 0 :54
|
||||
975779251258114:975779251261601 566828:566834 hsa_amd_signal_create(1, 0, 0, 0, 0x7f70535fdbd0) = 0 :55
|
||||
975779251262923:975779251263204 566828:566834 hsa_queue_load_write_index_relaxed(0x7f726109e000) = 0 :56
|
||||
975779251264065:975779251264276 566828:566834 hsa_queue_load_read_index_relaxed(0x7f726109e000) = 0 :57
|
||||
975779251264937:975779251265178 566828:566834 hsa_queue_store_write_index_screlease(0x7f726109e000, 1) = void :58
|
||||
975779251265969:975779251266951 566828:566834 hsa_signal_store_screlease(, 0) = void :59
|
||||
975779251267472:975779251283773 566828:566834 hsa_signal_wait_scacquire(, 0, 0, 18446744073709551615, 0) = 0 :60
|
||||
975779251284654:975779251286848 566828:566834 hsa_signal_destroy() = 0 :61
|
||||
975779251290806:975779251322035 566828:566834 hsa_memory_free(0x7f726106e000) = 0 :62
|
||||
975779251322646:975779251341261 566828:566834 hsa_memory_free(0x7f726106c000) = 0 :63
|
||||
975779251342043:975779251389061 566828:566834 hsa_executable_destroy() = 0 :64
|
||||
975779251389843:975779251392488 566828:566834 hsa_code_object_reader_destroy() = 0 :65
|
||||
@@ -0,0 +1,22 @@
|
||||
ROCTracer (pid=566858):
|
||||
0x55ae2fa607c0 agent cpu
|
||||
0x55ae2fa5eeb0 agent gpu
|
||||
0x55ae2fa9b540 agent gpu
|
||||
975785718853775
|
||||
HSA-trace()
|
||||
HSA-activity-trace()
|
||||
975785719398623:975785719398824 566858:566858 hsa_agent_get_info(, 17, 0x7ffe30b97814) = 0 :6
|
||||
975785719403482:975785719403643 566858:566858 hsa_agent_get_info(, 17, 0x7ffe30b97814) = 0 :7
|
||||
975785719404274:975785719404364 566858:566858 hsa_agent_get_info(, 17, 0x7ffe30b97814) = 0 :8
|
||||
975785719404274:975785719404885 566858:566858 hsa_iterate_agents(1, 0) = 0 :5
|
||||
ROCTracer (pid=566858):
|
||||
0x55ae2fa607c0 agent cpu
|
||||
0x55ae2fb02cc0 agent gpu
|
||||
0x55ae2fa62970 agent gpu
|
||||
975785742239830
|
||||
HSA-trace()
|
||||
HSA-activity-trace()
|
||||
975785742436120:975785742436310 566858:566858 hsa_agent_get_info(, 17, 0x7ffe30b97814) = 0 :14
|
||||
975785742437352:975785742437472 566858:566858 hsa_agent_get_info(, 17, 0x7ffe30b97814) = 0 :15
|
||||
975785742437963:975785742438053 566858:566858 hsa_agent_get_info(, 17, 0x7ffe30b97814) = 0 :16
|
||||
975785742437963:975785742438464 566858:566858 hsa_iterate_agents(1, 0) = 0 :13
|
||||
@@ -0,0 +1,20 @@
|
||||
:CopyHostToDevice : correlation_id(1) time_ns(109660008446578:109660008452178)
|
||||
:hipMemcpy : correlation_id(1) time_ns(109659777462237:109660008474607)
|
||||
:CopyHostToDevice : correlation_id(2) time_ns(109660011646881:109660011651041)
|
||||
:hipMemcpy : correlation_id(2) time_ns(109660011115400:109660011817555)
|
||||
:CopyHostToDevice : correlation_id(3) time_ns(109660011942080:109660011946240)
|
||||
:hipMemcpy : correlation_id(3) time_ns(109660011846359:109660011951538)
|
||||
:CopyHostToDevice : correlation_id(4) time_ns(109660011985759:109660011989919)
|
||||
:hipMemcpy : correlation_id(4) time_ns(109660011961286:109660011994288)
|
||||
:CopyHostToDevice : correlation_id(5) time_ns(109660012053439:109660012057599)
|
||||
:hipMemcpy : correlation_id(5) time_ns(109660012029645:109660012062688)
|
||||
:CopyHostToDevice : correlation_id(6) time_ns(109660012096639:109660012100799)
|
||||
:hipMemcpy : correlation_id(6) time_ns(109660012073037:109660012105278)
|
||||
:CopyHostToDevice : correlation_id(7) time_ns(109660012138239:109660012142879)
|
||||
:hipMemcpy : correlation_id(7) time_ns(109660012114796:109660012147087)
|
||||
:CopyHostToDevice : correlation_id(8) time_ns(109660012180158:109660012184478)
|
||||
:hipMemcpy : correlation_id(8) time_ns(109660012156274:109660012188795)
|
||||
:CopyHostToDevice : correlation_id(9) time_ns(109660012221438:109660012225758)
|
||||
:hipMemcpy : correlation_id(9) time_ns(109660012198213:109660012230234)
|
||||
:CopyHostToDevice : correlation_id(10) time_ns(109660012262398:109660012266878)
|
||||
:hipMemcpy : correlation_id(10) time_ns(109660012239211:109660012271171)
|
||||
@@ -0,0 +1,18 @@
|
||||
ROCTracer (pid=993231):
|
||||
rocTX-trace()
|
||||
0xce5450 agent cpu
|
||||
0xd1d520 agent gpu
|
||||
0xd1fe80 agent gpu
|
||||
628584618590744
|
||||
628584859661999 993231:993231 1:0:"NestedRangeA"
|
||||
628584859674021 993231:993231 2:0:""
|
||||
628584859674693 993231:993231 1:0:"NestedRangeB"
|
||||
628584859675344 993231:993231 1:0:"NestedRangeC"
|
||||
628584859676115 993231:993231 3:1:"StartStopRangeA"
|
||||
628584859678390 993231:993231 2:0:""
|
||||
628584859678921 993231:993231 2:0:""
|
||||
628584859755545 993231:993233 4:1:""
|
||||
628584859819756 993231:993231 1:0:"NestedRangeD"
|
||||
628584859820708 993231:993231 1:0:"NestedRangeE"
|
||||
628584859821219 993231:993231 2:0:""
|
||||
628584859824095 993231:993231 2:0:""
|
||||
@@ -0,0 +1,24 @@
|
||||
# dummy
|
||||
MatrixTranspose_dryrun_trace --check-none
|
||||
copy_dryrun_trace --check-none
|
||||
MatrixTranspose_ctest_trace --check-count .*
|
||||
MatrixTranspose_test_trace --check-count .* --ignore-count KernelExecution
|
||||
MatrixTranspose_hipaact_test_trace --check-count .* --ignore-count KernelExecution|hipMemcpy|__hipPushCallConfiguration|hipLaunchKernel|__hipPopCallConfiguration
|
||||
MatrixTranspose_mgpu_trace --check-events .*
|
||||
MatrixTranspose_sys_trace --check-count .* --ignore-count matrixTranspose|hsa_.*
|
||||
MatrixTranspose_sys_hsa_trace --check-count .* --ignore-count hsa_.*
|
||||
MatrixTranspose_hip_period_trace --check-events .* --ignore-event hipMalloc|hipFree
|
||||
MatrixTranspose_hip_flush_trace --check-count .* --ignore-count matrixTranspose
|
||||
MatrixTranspose_kfd_trace --check-events .*
|
||||
MatrixTranspose_hip_input_trace --check-events .*
|
||||
copy_hsa_trace --check-events .*
|
||||
copy_hsa_input_trace --check-events .*
|
||||
load_unload_reload_trace --check-order .* --ignore-count hsa_agent_get_info
|
||||
code_obj_trace --check-none
|
||||
trace_buffer --check-none
|
||||
memory_pool --check-none
|
||||
activity_and_callback_trace --check-order .*
|
||||
multi_pool_activities_trace --check-order .*
|
||||
roctx_test_trace --check-count .*
|
||||
backward_compat_test_trace --check-none
|
||||
dlopen --check-none
|
||||
@@ -0,0 +1,147 @@
|
||||
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include <iostream>
|
||||
|
||||
// hip header file
|
||||
#include <hip/hip_runtime.h>
|
||||
#include "roctracer_ext.h"
|
||||
// roctx header file
|
||||
#include <roctx.h>
|
||||
|
||||
#define HIP_CALL(call) \
|
||||
do { \
|
||||
hipError_t err = call; \
|
||||
if (err != hipSuccess) { \
|
||||
fprintf(stderr, "%s\n", hipGetErrorString(err)); \
|
||||
abort(); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define WIDTH 1024
|
||||
|
||||
|
||||
#define NUM (WIDTH * WIDTH)
|
||||
|
||||
#define THREADS_PER_BLOCK_X 4
|
||||
#define THREADS_PER_BLOCK_Y 4
|
||||
#define THREADS_PER_BLOCK_Z 1
|
||||
|
||||
// Device (Kernel) function, it must be void
|
||||
__global__ void matrixTranspose(float* out, float* in, const int width) {
|
||||
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
|
||||
int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
|
||||
|
||||
out[y * width + x] = in[x * width + y];
|
||||
}
|
||||
|
||||
// CPU implementation of matrix transpose
|
||||
void matrixTransposeCPUReference(float* output, float* input, const unsigned int width) {
|
||||
for (unsigned int j = 0; j < width; j++) {
|
||||
for (unsigned int i = 0; i < width; i++) {
|
||||
output[i * width + j] = input[j * width + i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int main() {
|
||||
float* Matrix;
|
||||
float* TransposeMatrix;
|
||||
float* cpuTransposeMatrix;
|
||||
|
||||
float* gpuMatrix;
|
||||
float* gpuTransposeMatrix;
|
||||
|
||||
hipDeviceProp_t devProp;
|
||||
HIP_CALL(hipGetDeviceProperties(&devProp, 0));
|
||||
|
||||
std::cerr << "Device name " << devProp.name << std::endl;
|
||||
|
||||
int i;
|
||||
int errors;
|
||||
|
||||
Matrix = (float*)malloc(NUM * sizeof(float));
|
||||
TransposeMatrix = (float*)malloc(NUM * sizeof(float));
|
||||
cpuTransposeMatrix = (float*)malloc(NUM * sizeof(float));
|
||||
|
||||
// initialize the input data
|
||||
for (i = 0; i < NUM; i++) {
|
||||
Matrix[i] = (float)i * 10.0f;
|
||||
}
|
||||
|
||||
// allocate the memory on the device side
|
||||
HIP_CALL(hipMalloc((void**)&gpuMatrix, NUM * sizeof(float)));
|
||||
HIP_CALL(hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)));
|
||||
|
||||
uint32_t iterations = 100;
|
||||
while (iterations-- > 0) {
|
||||
std::cerr << "## Iteration (" << iterations << ") #################" << std::endl;
|
||||
|
||||
// Memory transfer from host to device
|
||||
HIP_CALL(hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice));
|
||||
|
||||
roctxMark("before hipLaunchKernel");
|
||||
int rangeId = roctxRangeStart("hipLaunchKernel range");
|
||||
roctxRangePush("hipLaunchKernel");
|
||||
// Lauching kernel from host
|
||||
hipLaunchKernelGGL(
|
||||
matrixTranspose, dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y),
|
||||
dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, gpuTransposeMatrix, gpuMatrix, WIDTH);
|
||||
roctxMark("after hipLaunchKernel");
|
||||
|
||||
// Memory transfer from device to host
|
||||
roctxRangePush("hipMemcpy");
|
||||
|
||||
HIP_CALL(
|
||||
hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost));
|
||||
|
||||
roctxRangePop(); // for "hipMemcpy"
|
||||
roctxRangePop(); // for "hipLaunchKernel"
|
||||
roctxRangeStop(rangeId);
|
||||
|
||||
// CPU MatrixTranspose computation
|
||||
matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH);
|
||||
|
||||
// verify the results
|
||||
errors = 0;
|
||||
double eps = 1.0E-6;
|
||||
for (i = 0; i < NUM; i++) {
|
||||
if (std::abs(TransposeMatrix[i] - cpuTransposeMatrix[i]) > eps) {
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
if (errors != 0) {
|
||||
fprintf(stderr, "FAILED: %d errors\n", errors);
|
||||
} else {
|
||||
fprintf(stderr, "PASSED!\n");
|
||||
}
|
||||
}
|
||||
|
||||
// free the resources on device side
|
||||
HIP_CALL(hipFree(gpuMatrix));
|
||||
HIP_CALL(hipFree(gpuTransposeMatrix));
|
||||
|
||||
// free the resources on host side
|
||||
free(Matrix);
|
||||
free(TransposeMatrix);
|
||||
free(cpuTransposeMatrix);
|
||||
|
||||
return errors;
|
||||
}
|
||||
@@ -0,0 +1,24 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
__kernel void copy(__global unsigned int* a, __global unsigned int* b) {
|
||||
uint tid = get_global_id(0);
|
||||
a[tid] = b[tid];
|
||||
}
|
||||
@@ -0,0 +1,454 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include <hsa/hsa.h>
|
||||
#include <hsa/hsa_ext_amd.h>
|
||||
#include <hsa/hsa_ext_image.h>
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
#include <thread>
|
||||
|
||||
#define CHECK(x) \
|
||||
do { \
|
||||
if ((x) != HSA_STATUS_SUCCESS) { \
|
||||
assert(false); \
|
||||
abort(); \
|
||||
} \
|
||||
} while (false);
|
||||
|
||||
struct Device {
|
||||
struct Memory {
|
||||
hsa_amd_memory_pool_t pool;
|
||||
bool fine;
|
||||
bool kernarg;
|
||||
size_t size;
|
||||
size_t granule;
|
||||
};
|
||||
|
||||
hsa_agent_t agent;
|
||||
char name[64];
|
||||
std::vector<Memory> pools;
|
||||
uint32_t fine;
|
||||
uint32_t coarse;
|
||||
static std::vector<hsa_agent_t> all_devices;
|
||||
};
|
||||
std::vector<hsa_agent_t> Device::all_devices;
|
||||
|
||||
struct Kernel {
|
||||
uint64_t handle;
|
||||
uint32_t scratch;
|
||||
uint32_t group;
|
||||
uint32_t kernarg_size;
|
||||
uint32_t kernarg_align;
|
||||
};
|
||||
|
||||
// Assumes bitfield layout is little endian.
|
||||
// Assumes std::atomic<uint16_t> is binary compatible with uint16_t and uses HW atomics.
|
||||
union AqlHeader {
|
||||
struct {
|
||||
uint16_t type : 8;
|
||||
uint16_t barrier : 1;
|
||||
uint16_t acquire : 2;
|
||||
uint16_t release : 2;
|
||||
uint16_t reserved : 3;
|
||||
};
|
||||
uint16_t raw;
|
||||
};
|
||||
|
||||
struct BarrierValue {
|
||||
AqlHeader header;
|
||||
uint8_t AmdFormat;
|
||||
uint8_t reserved;
|
||||
uint32_t reserved1;
|
||||
hsa_signal_t signal;
|
||||
hsa_signal_value_t value;
|
||||
hsa_signal_value_t mask;
|
||||
uint32_t cond;
|
||||
uint32_t reserved2;
|
||||
uint64_t reserved3;
|
||||
uint64_t reserved4;
|
||||
hsa_signal_t completion_signal;
|
||||
};
|
||||
|
||||
union Aql {
|
||||
AqlHeader header;
|
||||
hsa_kernel_dispatch_packet_t dispatch;
|
||||
hsa_barrier_and_packet_t barrier_and;
|
||||
hsa_barrier_or_packet_t barrier_or;
|
||||
BarrierValue barrier_value;
|
||||
};
|
||||
|
||||
struct OCLHiddenArgs {
|
||||
uint64_t offset_x;
|
||||
uint64_t offset_y;
|
||||
uint64_t offset_z;
|
||||
void* printf_buffer;
|
||||
void* enqueue;
|
||||
void* enqueue2;
|
||||
void* multi_grid;
|
||||
};
|
||||
|
||||
struct hip_hiddens {
|
||||
uint64_t offset_x;
|
||||
uint64_t offset_y;
|
||||
uint64_t offset_z;
|
||||
uint64_t _;
|
||||
uint64_t _2;
|
||||
uint64_t _3;
|
||||
uint64_t multi_grid_sync;
|
||||
};
|
||||
|
||||
|
||||
std::vector<Device> cpu, gpu;
|
||||
Device::Memory kernarg;
|
||||
|
||||
struct CodeObject {
|
||||
hsa_file_t file;
|
||||
hsa_code_object_reader_t code_obj_rdr;
|
||||
hsa_executable_t executable;
|
||||
};
|
||||
|
||||
bool DeviceDiscovery() {
|
||||
hsa_status_t err;
|
||||
|
||||
err = hsa_iterate_agents(
|
||||
[](hsa_agent_t agent, void*) {
|
||||
hsa_status_t err;
|
||||
|
||||
Device dev;
|
||||
dev.agent = agent;
|
||||
|
||||
dev.fine = -1u;
|
||||
dev.coarse = -1u;
|
||||
|
||||
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, dev.name);
|
||||
CHECK(err);
|
||||
|
||||
hsa_device_type_t type;
|
||||
err = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &type);
|
||||
CHECK(err);
|
||||
|
||||
err = hsa_amd_agent_iterate_memory_pools(
|
||||
agent,
|
||||
[](hsa_amd_memory_pool_t pool, void* data) {
|
||||
std::vector<Device::Memory>& pools =
|
||||
*reinterpret_cast<std::vector<Device::Memory>*>(data);
|
||||
hsa_status_t err;
|
||||
|
||||
hsa_amd_segment_t segment;
|
||||
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment);
|
||||
CHECK(err);
|
||||
|
||||
if (segment != HSA_AMD_SEGMENT_GLOBAL) return HSA_STATUS_SUCCESS;
|
||||
|
||||
uint32_t flags;
|
||||
err =
|
||||
hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flags);
|
||||
CHECK(err);
|
||||
|
||||
Device::Memory mem;
|
||||
mem.pool = pool;
|
||||
mem.fine = (flags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED);
|
||||
mem.kernarg = (flags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT);
|
||||
|
||||
err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE, &mem.size);
|
||||
CHECK(err);
|
||||
|
||||
err = hsa_amd_memory_pool_get_info(
|
||||
pool, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE, &mem.granule);
|
||||
CHECK(err);
|
||||
|
||||
pools.push_back(mem);
|
||||
return HSA_STATUS_SUCCESS;
|
||||
},
|
||||
(void*)&dev.pools);
|
||||
|
||||
if (!dev.pools.empty()) {
|
||||
for (size_t i = 0; i < dev.pools.size(); i++) {
|
||||
if (dev.pools[i].fine && dev.pools[i].kernarg && dev.fine == -1u) dev.fine = i;
|
||||
if (dev.pools[i].fine && !dev.pools[i].kernarg) dev.fine = i;
|
||||
if (!dev.pools[i].fine) dev.coarse = i;
|
||||
}
|
||||
|
||||
if (type == HSA_DEVICE_TYPE_CPU)
|
||||
cpu.push_back(dev);
|
||||
else
|
||||
gpu.push_back(dev);
|
||||
|
||||
Device::all_devices.push_back(dev.agent);
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
},
|
||||
nullptr);
|
||||
|
||||
[]() {
|
||||
for (auto& dev : cpu) {
|
||||
for (auto& mem : dev.pools) {
|
||||
if (mem.fine && mem.kernarg) {
|
||||
kernarg = mem;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}();
|
||||
CHECK(err);
|
||||
|
||||
if (cpu.empty() || gpu.empty() || kernarg.pool.handle == 0) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool LoadCodeObject(std::string filename, hsa_agent_t agent, CodeObject& code_object) {
|
||||
hsa_status_t err;
|
||||
|
||||
code_object.file = open(filename.c_str(), O_RDONLY);
|
||||
if (code_object.file == -1) return false;
|
||||
|
||||
err = hsa_code_object_reader_create_from_file(code_object.file, &code_object.code_obj_rdr);
|
||||
CHECK(err);
|
||||
|
||||
err = hsa_executable_create_alt(HSA_PROFILE_FULL, HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT,
|
||||
nullptr, &code_object.executable);
|
||||
CHECK(err);
|
||||
|
||||
err = hsa_executable_load_agent_code_object(code_object.executable, agent,
|
||||
code_object.code_obj_rdr, nullptr, nullptr);
|
||||
if (err != HSA_STATUS_SUCCESS) return false;
|
||||
|
||||
err = hsa_executable_freeze(code_object.executable, nullptr);
|
||||
CHECK(err);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool GetKernel(const CodeObject& code_object, std::string kernel, hsa_agent_t agent, Kernel& kern) {
|
||||
hsa_executable_symbol_t symbol;
|
||||
hsa_status_t err =
|
||||
hsa_executable_get_symbol_by_name(code_object.executable, kernel.c_str(), &agent, &symbol);
|
||||
if (err != HSA_STATUS_SUCCESS) {
|
||||
err = hsa_executable_get_symbol_by_name(code_object.executable, (kernel + ".kd").c_str(),
|
||||
&agent, &symbol);
|
||||
if (err != HSA_STATUS_SUCCESS) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
err = hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT,
|
||||
&kern.handle);
|
||||
CHECK(err);
|
||||
|
||||
err = hsa_executable_symbol_get_info(
|
||||
symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE, &kern.scratch);
|
||||
CHECK(err);
|
||||
// printf("Scratch: %d\n", kern.scratch);
|
||||
|
||||
err = hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE,
|
||||
&kern.group);
|
||||
CHECK(err);
|
||||
// printf("LDS: %d\n", kern.group);
|
||||
|
||||
// Remaining needs code object v2 or comgr.
|
||||
err = hsa_executable_symbol_get_info(
|
||||
symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE, &kern.kernarg_size);
|
||||
CHECK(err);
|
||||
// printf("Kernarg Size: %d\n", kern.kernarg_size);
|
||||
|
||||
err = hsa_executable_symbol_get_info(
|
||||
symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_ALIGNMENT, &kern.kernarg_align);
|
||||
CHECK(err);
|
||||
// printf("Kernarg Align: %d\n", kern.kernarg_align);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Not for parallel insertion.
|
||||
bool SubmitPacket(hsa_queue_t* queue, Aql& pkt) {
|
||||
size_t mask = queue->size - 1;
|
||||
Aql* ring = (Aql*)queue->base_address;
|
||||
|
||||
uint64_t write = hsa_queue_load_write_index_relaxed(queue);
|
||||
uint64_t read = hsa_queue_load_read_index_relaxed(queue);
|
||||
if (write - read + 1 > queue->size) return false;
|
||||
|
||||
Aql& dst = ring[write & mask];
|
||||
|
||||
uint16_t header = pkt.header.raw;
|
||||
pkt.header.raw = dst.header.raw;
|
||||
dst = pkt;
|
||||
__atomic_store_n(&dst.header.raw, header, __ATOMIC_RELEASE);
|
||||
pkt.header.raw = header;
|
||||
|
||||
hsa_queue_store_write_index_release(queue, write + 1);
|
||||
hsa_signal_store_screlease(queue->doorbell_signal, write);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void* hsaMalloc(size_t size, const Device::Memory& mem) {
|
||||
void* ret;
|
||||
hsa_status_t err = hsa_amd_memory_pool_allocate(mem.pool, size, 0, &ret);
|
||||
CHECK(err);
|
||||
err = hsa_amd_agents_allow_access(Device::all_devices.size(), &Device::all_devices[0], nullptr,
|
||||
ret);
|
||||
CHECK(err);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void* hsaMalloc(size_t size, const Device& dev, bool fine) {
|
||||
uint32_t index = fine ? dev.fine : dev.coarse;
|
||||
assert(index != -1u && "Memory type unavailable.");
|
||||
return hsaMalloc(size, dev.pools[index]);
|
||||
}
|
||||
|
||||
void test_func(int kiter, int diter, int agents) {
|
||||
for (int device_index = 0; device_index < agents; ++device_index) {
|
||||
hsa_status_t err;
|
||||
|
||||
hsa_queue_t* queue;
|
||||
err = hsa_queue_create(gpu[device_index].agent, 1024, HSA_QUEUE_TYPE_SINGLE, nullptr, nullptr,
|
||||
0, 0, &queue);
|
||||
CHECK(err);
|
||||
|
||||
CodeObject code_object;
|
||||
if (!LoadCodeObject(std::string(gpu[device_index].name) + "_copy.hsaco",
|
||||
gpu[device_index].agent, code_object)) {
|
||||
printf("Kernel file not found or not usable with given agent.\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
Kernel copy;
|
||||
if (!GetKernel(code_object, "copy", gpu[device_index].agent, copy)) {
|
||||
printf("Test kernel not found.\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
for (int i = 0; i < kiter; ++i) {
|
||||
struct args_t {
|
||||
uint32_t* a;
|
||||
uint32_t* b;
|
||||
OCLHiddenArgs hidden;
|
||||
};
|
||||
|
||||
args_t* args;
|
||||
args = (args_t*)hsaMalloc(sizeof(args_t), kernarg);
|
||||
memset(args, 0, sizeof(args_t));
|
||||
|
||||
uint32_t* a = (uint32_t*)hsaMalloc(64 * sizeof(uint32_t), kernarg);
|
||||
uint32_t* b = (uint32_t*)hsaMalloc(64 * sizeof(uint32_t), kernarg);
|
||||
|
||||
memset(a, 0, 64 * sizeof(uint32_t));
|
||||
memset(b, 1, 64 * sizeof(uint32_t));
|
||||
|
||||
hsa_signal_t signal;
|
||||
// Use interrupts.
|
||||
err = hsa_amd_signal_create(1, 0, nullptr, 0, &signal);
|
||||
CHECK(err);
|
||||
|
||||
for (int j = 1; j <= diter; ++j) {
|
||||
Aql packet{};
|
||||
packet.header.type = HSA_PACKET_TYPE_KERNEL_DISPATCH;
|
||||
packet.header.barrier = 1;
|
||||
packet.header.acquire = HSA_FENCE_SCOPE_SYSTEM;
|
||||
packet.header.release = HSA_FENCE_SCOPE_SYSTEM;
|
||||
|
||||
packet.dispatch.setup = 1;
|
||||
packet.dispatch.workgroup_size_x = 64;
|
||||
packet.dispatch.workgroup_size_y = 1;
|
||||
packet.dispatch.workgroup_size_z = 1;
|
||||
packet.dispatch.grid_size_x = 64;
|
||||
packet.dispatch.grid_size_y = 1;
|
||||
packet.dispatch.grid_size_z = 1;
|
||||
|
||||
packet.dispatch.group_segment_size = copy.group;
|
||||
packet.dispatch.private_segment_size = copy.scratch;
|
||||
packet.dispatch.kernel_object = copy.handle;
|
||||
|
||||
packet.dispatch.kernarg_address = args;
|
||||
if (j == diter) packet.dispatch.completion_signal = signal;
|
||||
|
||||
args->a = a;
|
||||
args->b = b;
|
||||
SubmitPacket(queue, packet);
|
||||
}
|
||||
hsa_signal_wait_acquire(signal, HSA_SIGNAL_CONDITION_EQ, 0, -1, HSA_WAIT_STATE_BLOCKED);
|
||||
err = hsa_signal_destroy(signal);
|
||||
CHECK(err);
|
||||
|
||||
for (int i = 0; i < 64; i++) {
|
||||
if (a[i] != b[i]) {
|
||||
printf("error at %d: expected %d, got %d\n", i, b[i], a[i]);
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
err = hsa_memory_free(a);
|
||||
CHECK(err);
|
||||
err = hsa_memory_free(b);
|
||||
CHECK(err);
|
||||
}
|
||||
|
||||
err = hsa_executable_destroy(code_object.executable);
|
||||
CHECK(err);
|
||||
err = hsa_code_object_reader_destroy(code_object.code_obj_rdr);
|
||||
CHECK(err);
|
||||
close(code_object.file);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
const char* kiter_s = getenv("ROCP_KITER");
|
||||
const char* diter_s = getenv("ROCP_DITER");
|
||||
const char* agents_s = getenv("ROCP_AGENTS");
|
||||
const char* threads_s = getenv("ROCP_THRS");
|
||||
|
||||
int kiter = (kiter_s != nullptr) ? atoi(kiter_s) : 1;
|
||||
int diter = (diter_s != nullptr) ? atoi(diter_s) : 1;
|
||||
int agents = (agents_s != nullptr) ? atoi(agents_s) : 1;
|
||||
int threads = (threads_s != nullptr) ? atoi(threads_s) : 1;
|
||||
|
||||
hsa_status_t err;
|
||||
err = hsa_init();
|
||||
CHECK(err);
|
||||
|
||||
if (!DeviceDiscovery()) {
|
||||
printf("Usable devices not found.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::vector<std::thread> t(threads);
|
||||
for (int n = 0; n < threads; ++n)
|
||||
t[n] = std::thread(test_func, kiter, diter, std::min(agents, (int)gpu.size()));
|
||||
for (int n = 0; n < threads; ++n) t[n].join();
|
||||
|
||||
err = hsa_shut_down();
|
||||
CHECK(err);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,50 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include <hsa/hsa.h>
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdlib>
|
||||
|
||||
#define CHECK(x) \
|
||||
do { \
|
||||
if ((x) != HSA_STATUS_SUCCESS) { \
|
||||
assert(false); \
|
||||
abort(); \
|
||||
} \
|
||||
} while (false);
|
||||
|
||||
int main() {
|
||||
// Run 2 loops of {hsa_init(); hsa_iterate_agents(); hsa_shut_down()} to test that the
|
||||
// tracer tool correctly unloaded after the 1st iteration and then reloaded for the 2nd
|
||||
// iteration.
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
hsa_init();
|
||||
|
||||
CHECK(hsa_iterate_agents(
|
||||
[](hsa_agent_t agent, void*) {
|
||||
hsa_device_type_t type;
|
||||
return hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &type);
|
||||
},
|
||||
nullptr));
|
||||
|
||||
hsa_shut_down();
|
||||
}
|
||||
}
|
||||
+206
@@ -0,0 +1,206 @@
|
||||
#!/bin/sh
|
||||
|
||||
################################################################################
|
||||
# Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
# THE SOFTWARE.
|
||||
################################################################################
|
||||
|
||||
# cd to build directory
|
||||
BIN_NAME=`basename $0`
|
||||
BIN_DIR=`dirname $0`
|
||||
|
||||
#To enable symbol lookup in .dynsyn section after llvm-strip
|
||||
export LOADER_USE_DYNSYM=1
|
||||
|
||||
cd $BIN_DIR
|
||||
|
||||
if [ -z "$ROCTRACER_LIB_PATH" ] ; then
|
||||
if test -f "${BIN_DIR}/../../lib/libroctracer64.so" ; then
|
||||
ROCTRACER_LIB_PATH="${BIN_DIR}/../../lib"
|
||||
fi
|
||||
fi
|
||||
|
||||
# enable tools load failure reporting
|
||||
export HSA_TOOLS_REPORT_LOAD_FAILURE=1
|
||||
# paths to ROC profiler and other libraries
|
||||
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PWD
|
||||
if [ -n "$ROCTRACER_LIB_PATH" ] ; then
|
||||
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$ROCTRACER_LIB_PATH
|
||||
fi
|
||||
if [ -z "$ROCTRACER_LIB_PATH" ] ; then
|
||||
ROCTRACER_LIB_PATH="."
|
||||
fi
|
||||
if [ -z "$ROCTRACER_TOOL_PATH" ] ; then
|
||||
if test -f "${BIN_DIR}/../../lib/roctracer/libroctracer_tool.so" ; then
|
||||
ROCTRACER_TOOL_PATH="${BIN_DIR}/../../lib/roctracer"
|
||||
else
|
||||
ROCTRACER_TOOL_PATH="."
|
||||
fi
|
||||
fi
|
||||
|
||||
# test filter input
|
||||
test_filter=-1
|
||||
check_trace_flag=1
|
||||
if [ -n "$1" ] ; then
|
||||
test_filter=$1
|
||||
shift
|
||||
fi
|
||||
if [ "$2" = "-n" ] ; then
|
||||
check_trace_flag=0
|
||||
fi
|
||||
|
||||
# test check routin
|
||||
test_status=0
|
||||
test_runnum=0
|
||||
test_number=0
|
||||
failed_tests="Failed tests:"
|
||||
|
||||
xeval_test() {
|
||||
test_number=$test_number
|
||||
}
|
||||
|
||||
ncolors=$(tput colors || echo 0)
|
||||
if [ -n "$ncolors" ] && [ $ncolors -ge 8 ]; then
|
||||
bright="$(tput bold || echo)"
|
||||
red="$(tput setaf 1 || echo)"
|
||||
green="$(tput setaf 2 || echo)"
|
||||
blue="$(tput setaf 4 || echo)"
|
||||
normal="$(tput sgr0 || echo)"
|
||||
fi
|
||||
|
||||
eval_test() {
|
||||
label=$1
|
||||
cmdline=$2
|
||||
test_name=$3
|
||||
|
||||
if [ $test_filter = -1 -o $test_filter = $test_number ] ; then
|
||||
echo "test $test_number: $test_name \"$label\""
|
||||
echo "CMD: \"$cmdline\""
|
||||
mkdir -p /tmp/test/out
|
||||
test_runnum=$((test_runnum + 1))
|
||||
eval "$cmdline" 1>/tmp/test/out/$test_name.out 2>/tmp/test/out/$test_name.err
|
||||
is_failed=$?
|
||||
if [ $is_failed != 0 ] ; then
|
||||
echo "--- stdout ---"
|
||||
cat /tmp/test/out/$test_name.out
|
||||
echo "--- stderr ---"
|
||||
cat /tmp/test/out/$test_name.err
|
||||
fi
|
||||
if [ $is_failed = 0 ] ; then
|
||||
python3 ./test/check_trace.py -in $test_name -ck $check_trace_flag
|
||||
is_failed=$?
|
||||
if [ $is_failed != 0 ] ; then
|
||||
echo "Trace checker error:"
|
||||
python3 ./test/check_trace.py -v -in $test_name -ck $check_trace_flag
|
||||
fi
|
||||
fi
|
||||
if [ $is_failed = 0 ] ; then
|
||||
echo "${bright:-}${blue:-}$test_name: ${green:-}PASSED${normal:-}"
|
||||
else
|
||||
echo "${bright:-}${blue:-}$test_name: ${red:-}FAILED${normal:-}"
|
||||
failed_tests="$failed_tests\n $test_number: $test_name - \"$label\""
|
||||
test_status=$(($test_status + 1))
|
||||
fi
|
||||
fi
|
||||
|
||||
test_number=$((test_number + 1))
|
||||
}
|
||||
|
||||
# Tests dry run
|
||||
eval_test "MatrixTranspose dry run" ./test/MatrixTranspose MatrixTranspose_dryrun_trace
|
||||
eval_test "copy dry run" ./test/copy copy_dryrun_trace
|
||||
|
||||
# Standalone test
|
||||
# ROCtracer is used explicitely by test
|
||||
eval_test "standalone C test" "./test/MatrixTranspose_ctest" MatrixTranspose_ctest_trace
|
||||
eval_test "standalone HIP test" "./test/MatrixTranspose_test" MatrixTranspose_test_trace
|
||||
eval_test "standalone HIP hipaact test" "./test/MatrixTranspose_hipaact_test" MatrixTranspose_hipaact_test_trace
|
||||
eval_test "standalone HIP MGPU test" "./test/MatrixTranspose_mgpu" MatrixTranspose_mgpu_trace
|
||||
|
||||
# Tool test
|
||||
# ROCtracer/tool is loaded by HSA runtime
|
||||
export LD_PRELOAD="$ROCTRACER_TOOL_PATH/libroctracer_tool.so"
|
||||
|
||||
# ROCTX test
|
||||
export ROCTRACER_DOMAIN="roctx"
|
||||
eval_test "roctx test" ./test/roctx_test roctx_test_trace
|
||||
|
||||
# SYS test
|
||||
export ROCTRACER_DOMAIN="sys:roctx"
|
||||
eval_test "tool SYS test" ./test/MatrixTranspose MatrixTranspose_sys_trace
|
||||
export ROCTRACER_DOMAIN="sys:hsa:roctx"
|
||||
eval_test "tool SYS/HSA test" ./test/MatrixTranspose MatrixTranspose_sys_hsa_trace
|
||||
# Tracing control <delay:length:rate>
|
||||
export ROCTRACER_DOMAIN="hip"
|
||||
eval_test "tool period test" "ROCP_CTRL_RATE=10:50000:500000 ./test/MatrixTranspose" MatrixTranspose_hip_period_trace
|
||||
eval_test "tool flushing test" "ROCP_FLUSH_RATE=100000 ./test/MatrixTranspose" MatrixTranspose_hip_flush_trace
|
||||
|
||||
#API records filtering
|
||||
echo "<trace name=\"HIP\"><parameters api=\"hipFree, hipMalloc, hipMemcpy\"></parameters></trace>" > /tmp/input.xml
|
||||
export ROCP_INPUT=/tmp/input.xml
|
||||
eval_test "tool HIP test input" ./test/MatrixTranspose MatrixTranspose_hip_input_trace
|
||||
unset ROCP_INPUT
|
||||
|
||||
# HSA test
|
||||
export ROCTRACER_DOMAIN="hsa"
|
||||
# test trace
|
||||
export ROC_TEST_TRACE=1
|
||||
# kernels loading iterations
|
||||
export ROCP_KITER=1
|
||||
# kernels dispatching iterations per kernel load
|
||||
# dispatching to the same queue
|
||||
export ROCP_DITER=1
|
||||
# GPU agents number
|
||||
export ROCP_AGENTS=1
|
||||
# host threads number
|
||||
# each thread creates a queue pre GPU agent
|
||||
export ROCP_THRS=1
|
||||
|
||||
eval_test "tool HSA test" ./test/copy copy_hsa_trace
|
||||
|
||||
echo "<trace name=\"HSA\"><parameters api=\"hsa_agent_get_info, hsa_amd_memory_pool_allocate\"></parameters></trace>" > /tmp/input.xml
|
||||
export ROCP_INPUT=/tmp/input.xml
|
||||
eval_test "tool HSA test input" ./test/copy copy_hsa_input_trace
|
||||
unset ROCP_INPUT
|
||||
|
||||
# Check that the tracer tool can be unloaded and then reloaded.
|
||||
eval_test "Load/Unload/Reload the tracer tool" ./test/load_unload_reload_test load_unload_reload_trace
|
||||
|
||||
export LD_PRELOAD=${BIN_DIR}/test/libcodeobj_test.so
|
||||
eval_test "tool tracer codeobj" ./test/MatrixTranspose code_obj_trace
|
||||
|
||||
unset LD_PRELOAD
|
||||
#valgrind --leak-check=full $tbin
|
||||
#valgrind --tool=massif $tbin
|
||||
#ms_print massif.out.<N>
|
||||
|
||||
eval_test "directed TraceBuffer test" ./test/trace_buffer trace_buffer
|
||||
eval_test "directed MemoryPool test" ./test/memory_pool memory_pool
|
||||
eval_test "enable/disable callbacks and activities test" ./test/activity_and_callback activity_and_callback_trace
|
||||
eval_test "use multiple memory pools in HIP activities test" ./test/multi_pool_activities multi_pool_activities_trace
|
||||
eval_test "Dynamically load the tracer library test" ./test/dlopen dlopen
|
||||
|
||||
eval_test "backward compatibility tests" ./test/backward_compat_test backward_compat_test_trace
|
||||
|
||||
echo "$test_number tests total / $test_runnum tests run / $test_status tests failed"
|
||||
if [ $test_status != 0 ] ; then
|
||||
echo $failed_tests
|
||||
fi
|
||||
exit $test_status
|
||||
@@ -0,0 +1,81 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include <roctracer_hip.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdlib>
|
||||
#include <fstream>
|
||||
#include <iterator>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
// Create as many threads as there are cores, half changing the hipSetDevice roctracer API callback
|
||||
// and argument, and the other half calling hipSetDevice, all running concurrently. If there is a
|
||||
// race when setting the API callback and argument, the test aborts.
|
||||
|
||||
constexpr int N_ITER = 1000000;
|
||||
|
||||
namespace {
|
||||
|
||||
std::ifstream cpuinfo("/proc/cpuinfo");
|
||||
const int num_cpu_cores =
|
||||
std::count(std::istream_iterator<std::string>(cpuinfo), std::istream_iterator<std::string>(),
|
||||
std::string("processor"));
|
||||
|
||||
template <std::size_t N> void callback(uint32_t, uint32_t, const void*, void* arg) {
|
||||
// The callback argument must match the callback function.
|
||||
if (arg != callback<N>) abort();
|
||||
}
|
||||
|
||||
template <std::size_t... Is> constexpr auto create_callbacks(std::index_sequence<Is...>) {
|
||||
return std::array{&callback<Is>...};
|
||||
}
|
||||
|
||||
template <std::size_t N> constexpr auto create_callbacks() {
|
||||
return create_callbacks(std::make_index_sequence<N>{});
|
||||
}
|
||||
|
||||
constexpr auto callbacks = create_callbacks<128>();
|
||||
|
||||
} // namespace
|
||||
|
||||
int main() {
|
||||
if (hipSetDevice(0) != hipSuccess) abort();
|
||||
|
||||
std::vector<std::thread> threads;
|
||||
for (int i = 0; i < std::max(2, num_cpu_cores / 2); ++i) {
|
||||
threads.emplace_back(
|
||||
[](auto callback) {
|
||||
for (int n = 0; n < N_ITER; ++n)
|
||||
roctracer_enable_op_callback(ACTIVITY_DOMAIN_HIP_API, HIP_API_ID_hipSetDevice, callback,
|
||||
reinterpret_cast<void*>(callback));
|
||||
},
|
||||
callbacks[i % callbacks.size()]);
|
||||
threads.emplace_back([]() {
|
||||
for (int n = 0; n < N_ITER; ++n)
|
||||
if (hipSetDevice(0) != hipSuccess) abort();
|
||||
});
|
||||
}
|
||||
for (auto&& thread : threads) thread.join();
|
||||
|
||||
roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HIP_API);
|
||||
return 0;
|
||||
}
|
||||
Tagairt in Eagrán Nua
Cuir bac ar úsáideoir