Merge branch 'amd-master' into rkebichi-trace-compare
Этот коммит содержится в:
@@ -8,6 +8,8 @@ b
|
||||
build
|
||||
inc/hsa_prof_str.h
|
||||
inc/kfd_prof_str.h
|
||||
inc/basic_ostream_ops.h
|
||||
inc/kfd_ostream_ops.h
|
||||
test/hsa
|
||||
test/MatrixTranspose/MatrixTranspose
|
||||
test/MatrixTranspose_test/MatrixTranspose
|
||||
|
||||
+72
-33
@@ -28,6 +28,9 @@ cmake_minimum_required ( VERSION 3.5.0 )
|
||||
## Verbose output.
|
||||
set ( CMAKE_VERBOSE_MAKEFILE TRUE CACHE BOOL "Verbose Output" FORCE )
|
||||
|
||||
# Install prefix
|
||||
set(CMAKE_INSTALL_PREFIX "/opt/rocm" CACHE PATH "Install path prefix, prepended onto install directories")
|
||||
|
||||
## Set module name and project name.
|
||||
set ( ROCTRACER_NAME "roctracer" )
|
||||
set ( ROCTRACER_TARGET "${ROCTRACER_NAME}64" )
|
||||
@@ -43,17 +46,25 @@ include ( env )
|
||||
|
||||
## Setup the package version.
|
||||
get_version ( "1.0.0" )
|
||||
message ( "-- LIB-VERSION: ${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH}" )
|
||||
|
||||
set ( BUILD_VERSION_MAJOR ${VERSION_MAJOR} )
|
||||
set ( BUILD_VERSION_MINOR ${VERSION_MINOR} )
|
||||
set ( BUILD_VERSION_PATCH ${VERSION_PATCH} )
|
||||
set ( LIB_VERSION_STRING "${BUILD_VERSION_MAJOR}.${BUILD_VERSION_MINOR}.${BUILD_VERSION_PATCH}" )
|
||||
|
||||
if ( DEFINED VERSION_BUILD AND NOT ${VERSION_BUILD} STREQUAL "" )
|
||||
message ( "VERSION BUILD DEFINED ${VERSION_BUILD}" )
|
||||
set ( BUILD_VERSION_PATCH "${BUILD_VERSION_PATCH}-${VERSION_BUILD}" )
|
||||
endif ()
|
||||
set ( BUILD_VERSION_STRING "${BUILD_VERSION_MAJOR}.${BUILD_VERSION_MINOR}.${BUILD_VERSION_PATCH}" )
|
||||
|
||||
set ( LIB_VERSION_MAJOR ${BUILD_VERSION_MAJOR} )
|
||||
set ( LIB_VERSION_MINOR ${BUILD_VERSION_MINOR} )
|
||||
if (DEFINED ENV{ROCM_LIBPATCH_VERSION})
|
||||
set (LIB_VERSION_PATCH $ENV{ROCM_LIBPATCH_VERSION} )
|
||||
else ()
|
||||
set (LIB_VERSION_PATCH ${BUILD_VERSION_PATCH} )
|
||||
endif()
|
||||
set ( LIB_VERSION_STRING "${LIB_VERSION_MAJOR}.${LIB_VERSION_MINOR}.${LIB_VERSION_PATCH}" )
|
||||
message ( "-- LIB-VERSION: ${LIB_VERSION_MAJOR}.${LIB_VERSION_MINOR}.${LIB_VERSION_PATCH}" )
|
||||
|
||||
## Set target and root/lib/test directory
|
||||
set ( TARGET_NAME "${ROCTRACER_TARGET}" )
|
||||
@@ -66,53 +77,81 @@ include ( ${LIB_DIR}/CMakeLists.txt )
|
||||
|
||||
## Set the VERSION and SOVERSION values
|
||||
set_property ( TARGET ${TARGET_NAME} PROPERTY VERSION "${LIB_VERSION_STRING}" )
|
||||
set_property ( TARGET ${TARGET_NAME} PROPERTY SOVERSION "${BUILD_VERSION_MAJOR}" )
|
||||
set_property ( TARGET ${TARGET_NAME} PROPERTY SOVERSION "${LIB_VERSION_MAJOR}" )
|
||||
|
||||
# If the library is a release, strip the target library
|
||||
if ( "${CMAKE_BUILD_TYPE}" STREQUAL release )
|
||||
add_custom_command ( TARGET ${ROCTRACER_TARGET} POST_BUILD COMMAND ${CMAKE_STRIP} *.so )
|
||||
endif ()
|
||||
#if ( "${CMAKE_BUILD_TYPE}" STREQUAL release )
|
||||
# add_custom_command ( TARGET ${ROCTRACER_TARGET} POST_BUILD COMMAND ${CMAKE_STRIP} *.so )
|
||||
#endif ()
|
||||
|
||||
## Build tests
|
||||
add_subdirectory ( ${TEST_DIR} ${PROJECT_BINARY_DIR}/test )
|
||||
|
||||
## Install and packaging
|
||||
set ( CMAKE_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX}/${ROCTRACER_NAME} )
|
||||
message ( "---------Install-Dir: ${CMAKE_INSTALL_PREFIX}" )
|
||||
## pbulic headers
|
||||
set ( PUBLIC_HEADERS
|
||||
roctracer.h
|
||||
roctracer_hip.h
|
||||
roctracer_hcc.h
|
||||
roctracer_ext.h
|
||||
ext/prof_protocol.h
|
||||
ext/hsa_rt_utils.hpp
|
||||
roctx.h
|
||||
)
|
||||
|
||||
add_custom_target ( inc-link ALL WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
|
||||
COMMAND ${CMAKE_COMMAND} -E create_symlink ../${ROCTRACER_NAME}/include inc-link )
|
||||
## Installation and packaging
|
||||
set ( DEST_NAME ${ROCPROFILER_NAME} )
|
||||
if ( DEFINED CPACK_PACKAGING_INSTALL_PREFIX )
|
||||
get_filename_component ( DEST_NAME ${CPACK_PACKAGING_INSTALL_PREFIX} NAME )
|
||||
get_filename_component ( DEST_DIR ${CPACK_PACKAGING_INSTALL_PREFIX} DIRECTORY )
|
||||
set ( CPACK_PACKAGING_INSTALL_PREFIX ${DEST_DIR} )
|
||||
endif ()
|
||||
message ( "-----------Dest-name: ${DEST_NAME}" )
|
||||
message ( "------Install-prefix: ${CMAKE_INSTALL_PREFIX}" )
|
||||
message ( "-----------CPACK-dir: ${CPACK_PACKAGING_INSTALL_PREFIX}" )
|
||||
|
||||
#add_custom_target ( inc-link ALL WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
|
||||
# COMMAND ${CMAKE_COMMAND} -E create_symlink ../${DEST_NAME}/include inc-link )
|
||||
add_custom_target ( so-link ALL WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
|
||||
COMMAND ${CMAKE_COMMAND} -E create_symlink ../${ROCTRACER_NAME}/lib/${ROCTRACER_LIBRARY}.so so-link )
|
||||
COMMAND ${CMAKE_COMMAND} -E create_symlink ../${DEST_NAME}/lib/${ROCTRACER_LIBRARY}.so so-link )
|
||||
add_custom_target ( so-major-link ALL WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
|
||||
COMMAND ${CMAKE_COMMAND} -E create_symlink ../${DEST_NAME}/lib/${ROCTRACER_LIBRARY}.so.${LIB_VERSION_MAJOR} so-major-link )
|
||||
add_custom_target ( so-patch-link ALL WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
|
||||
COMMAND ${CMAKE_COMMAND} -E create_symlink ../${DEST_NAME}/lib/${ROCTRACER_LIBRARY}.so.${LIB_VERSION_STRING} so-patch-link )
|
||||
|
||||
## Install information
|
||||
install ( TARGETS ${ROCTRACER_TARGET} LIBRARY DESTINATION lib )
|
||||
install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/roctracer.h DESTINATION include )
|
||||
install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/roctracer_hip.h DESTINATION include )
|
||||
install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/roctracer_hcc.h DESTINATION include )
|
||||
install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/roctracer_ext.h DESTINATION include )
|
||||
install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/ext/prof_protocol.h DESTINATION include/ext )
|
||||
install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/ext/hsa_rt_utils.hpp DESTINATION include/ext )
|
||||
install ( FILES ${PROJECT_BINARY_DIR}/inc-link DESTINATION ../include RENAME ${ROCTRACER_NAME} )
|
||||
install ( FILES ${PROJECT_BINARY_DIR}/so-link DESTINATION ../lib RENAME ${ROCTRACER_LIBRARY}.so )
|
||||
install ( FILES ${PROJECT_BINARY_DIR}/test/libtracer_tool.so DESTINATION tool )
|
||||
install ( TARGETS ${ROCTRACER_TARGET} LIBRARY DESTINATION ${DEST_NAME}/lib )
|
||||
foreach ( header ${PUBLIC_HEADERS} )
|
||||
install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/${header} DESTINATION ${DEST_NAME}/include )
|
||||
install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/${header} DESTINATION include/${DEST_NAME} )
|
||||
endforeach ()
|
||||
#install ( FILES ${PROJECT_BINARY_DIR}/inc-link DESTINATION include RENAME ${DEST_NAME} )
|
||||
install ( FILES ${PROJECT_BINARY_DIR}/so-link DESTINATION lib RENAME ${ROCTRACER_LIBRARY}.so )
|
||||
install ( FILES ${PROJECT_BINARY_DIR}/so-major-link DESTINATION lib RENAME ${ROCTRACER_LIBRARY}.so.${LIB_VERSION_MAJOR} )
|
||||
install ( FILES ${PROJECT_BINARY_DIR}/so-patch-link DESTINATION lib RENAME ${ROCTRACER_LIBRARY}.so.${LIB_VERSION_STRING} )
|
||||
install ( FILES ${PROJECT_BINARY_DIR}/test/libtracer_tool.so DESTINATION ${DEST_NAME}/tool )
|
||||
|
||||
## rocTX
|
||||
set ( ROCTX_TARGET "roctx64" )
|
||||
set ( ROCTX_LIBRARY "lib${ROCTX_TARGET}" )
|
||||
|
||||
add_custom_target ( so-roctx-link ALL WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
|
||||
COMMAND ${CMAKE_COMMAND} -E create_symlink ../${ROCTRACER_NAME}/lib/${ROCTX_LIBRARY}.so so-roctx-link )
|
||||
## Set the VERSION and SOVERSION values
|
||||
set_property ( TARGET ${ROCTX_TARGET} PROPERTY VERSION "${LIB_VERSION_STRING}" )
|
||||
set_property ( TARGET ${ROCTX_TARGET} PROPERTY SOVERSION "${LIB_VERSION_MAJOR}" )
|
||||
|
||||
install ( TARGETS "roctx64" LIBRARY DESTINATION lib )
|
||||
install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/roctx.h DESTINATION include )
|
||||
install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/roctracer_roctx.h DESTINATION include )
|
||||
install ( FILES ${PROJECT_BINARY_DIR}/so-roctx-link DESTINATION ../lib RENAME ${ROCTX_LIBRARY}.so )
|
||||
add_custom_target ( so-roctx-link ALL WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
|
||||
COMMAND ${CMAKE_COMMAND} -E create_symlink ../${DEST_NAME}/lib/${ROCTX_LIBRARY}.so so-roctx-link )
|
||||
add_custom_target ( so-roctx-major-link ALL WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
|
||||
COMMAND ${CMAKE_COMMAND} -E create_symlink ../${DEST_NAME}/lib/${ROCTX_LIBRARY}.so.${LIB_VERSION_MAJOR} so-roctx-major-link )
|
||||
add_custom_target ( so-roctx-patch-link ALL WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
|
||||
COMMAND ${CMAKE_COMMAND} -E create_symlink ../${DEST_NAME}/lib/${ROCTX_LIBRARY}.so.${LIB_VERSION_STRING} so-roctx-patch-link )
|
||||
|
||||
install ( TARGETS "roctx64" LIBRARY DESTINATION ${DEST_NAME}/lib )
|
||||
install ( FILES ${PROJECT_BINARY_DIR}/so-roctx-link DESTINATION lib RENAME ${ROCTX_LIBRARY}.so )
|
||||
install ( FILES ${PROJECT_BINARY_DIR}/so-roctx-major-link DESTINATION lib RENAME ${ROCTX_LIBRARY}.so.${LIB_VERSION_MAJOR} )
|
||||
install ( FILES ${PROJECT_BINARY_DIR}/so-roctx-patch-link DESTINATION lib RENAME ${ROCTX_LIBRARY}.so.${LIB_VERSION_STRING} )
|
||||
|
||||
## KFD wrapper
|
||||
if ( DEFINED KFD_WRAPPER )
|
||||
install ( TARGETS "kfdwrapper64" LIBRARY DESTINATION lib )
|
||||
endif ()
|
||||
install ( TARGETS "kfdwrapper64" LIBRARY DESTINATION ${DEST_NAME}/lib )
|
||||
|
||||
## Packaging directives
|
||||
set ( CPACK_GENERATOR "DEB" "RPM" "TGZ" )
|
||||
@@ -129,7 +168,7 @@ set ( CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE" )
|
||||
set ( CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/postinst;${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/prerm" )
|
||||
|
||||
## RPM package specific variables
|
||||
set ( CPACK_RPM_PRE_INSTALL_SCRIPT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/RPM/rpm_post" )
|
||||
set ( CPACK_RPM_POST_INSTALL_SCRIPT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/RPM/rpm_post" )
|
||||
set ( CPACK_RPM_POST_UNINSTALL_SCRIPT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/RPM/rpm_postun" )
|
||||
|
||||
include ( CPack )
|
||||
|
||||
@@ -3,7 +3,10 @@
|
||||
set -e
|
||||
|
||||
do_ldconfig() {
|
||||
echo /opt/rocm/roctracer/lib > /etc/ld.so.conf.d/libroctracer64.conf && ldconfig
|
||||
INSTALL_PATH=/opt/rocm/roctracer
|
||||
if [ -e "${INSTALL_PATH}" ] ; then
|
||||
echo /opt/rocm/roctracer/lib > /etc/ld.so.conf.d/libroctracer64.conf && ldconfig
|
||||
fi
|
||||
}
|
||||
|
||||
case "$1" in
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
|
||||
[MITx11 License]
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
|
||||
+38
-15
@@ -1,52 +1,75 @@
|
||||
# ROC-tracer
|
||||
```
|
||||
ROC-tracer library, Runtimes Generic Callback/Activity APIs.
|
||||
ROC-tracer library: Runtimes Generic Callback/Activity APIs.
|
||||
The goal of the implementation is to provide a generic independent from
|
||||
specific runtime profiler to trace API and asyncronous activity.
|
||||
|
||||
The API provides functionality for registering the runtimes API callbacks and
|
||||
asyncronous activity records pool support.
|
||||
|
||||
ROC-TX librray: code annotation evemts API
|
||||
Includes basic API: roctxMark, roctxRangePush, roctxRangePop.
|
||||
```
|
||||
|
||||
## Usage
|
||||
```
|
||||
rocTracer API:
|
||||
To use the rocTracer API you need the API header and to link your application with roctracer .so librray:
|
||||
- the API header: /opt/rocm/roctracer/include/roctracer.h
|
||||
- the .so library: /opt/rocm/lib/libroctracer64.so
|
||||
|
||||
rocTX API:
|
||||
To use the rocTX API you need the API header and to link your application with roctx .so librray:
|
||||
- the API header: /opt/rocm/roctracer/include/roctx.h
|
||||
- the .so library: /opt/rocm/lib/libroctx64.so
|
||||
```
|
||||
|
||||
## The library source tree
|
||||
```
|
||||
- inc/roctracer.h - Library public API
|
||||
- doc - documentation
|
||||
- inc/roctracer.h - rocTacer library public API header
|
||||
- inc/roctx.h - rocTX library puiblic API header
|
||||
- src - Library sources
|
||||
- core - Library API sources
|
||||
- util - Library utils sources
|
||||
- core - rocTracer library API sources
|
||||
- roctx - rocTX library API sources
|
||||
- util - library utils sources
|
||||
- test - test suit
|
||||
- MatrixTranspose - test based on HIP MatrixTranspose sample
|
||||
```
|
||||
|
||||
## Documentation
|
||||
```
|
||||
- API description: inc/roctracer.h
|
||||
- Code example: test/MatrixTranspose_test/MatrixTranspose.cpp
|
||||
```
|
||||
- API description:
|
||||
- ['roctracer' / 'rocTX' profiling C API specification](doc/roctracer_spec.md)
|
||||
- Code examples:
|
||||
- [test/MatrixTranspose_test/MatrixTranspose.cpp](test/MatrixTranspose_test/MatrixTranspose.cpp)
|
||||
- [test/MatrixTranspose/MatrixTranspose.cpp](test/MatrixTranspose/MatrixTranspose.cpp)
|
||||
|
||||
## To build and run test
|
||||
```
|
||||
- ROCm-2.3 or higher is required
|
||||
|
||||
cd <your path>
|
||||
- ROCm is required
|
||||
|
||||
- Python modules requirements: CppHeaderParser, argparse.
|
||||
To install:
|
||||
sudo pip install CppHeaderParser argparse
|
||||
|
||||
- CLone development branch of roctracer:
|
||||
git clone -b amd-master https://github.com/ROCm-Developer-Tools/roctracer
|
||||
|
||||
- Set environment:
|
||||
export CMAKE_PREFIX_PATH=/opt/rocm
|
||||
- To use custom HIP/HCC versions:
|
||||
export HIP_PATH=/opt/rocm/hip
|
||||
export HCC_HOME=/opt/rocm/hcc
|
||||
export CMAKE_PREFIX_PATH=/opt/rocm
|
||||
|
||||
- Build ROCtracer
|
||||
- To build roctracer library:
|
||||
export CMAKE_BUILD_TYPE=<debug|release> # release by default
|
||||
cd <your path>/roctracer && mkdir build && cd build && cmake -DCMAKE_INSTALL_PREFIX=/opt/rocm .. && make -j <nproc>
|
||||
|
||||
- To build and run test
|
||||
- To build and run test:
|
||||
make mytest
|
||||
run.sh
|
||||
|
||||
- To install
|
||||
- To install:
|
||||
make install
|
||||
or
|
||||
make package && dpkg -i *.deb
|
||||
|
||||
+4
-1
@@ -1 +1,4 @@
|
||||
echo /opt/rocm/roctracer/lib > /etc/ld.so.conf.d/libroctracer64.conf && ldconfig
|
||||
INSTALL_PATH=/opt/rocm/roctracer
|
||||
if [ -e "${INSTALL_PATH}" ] ; then
|
||||
echo /opt/rocm/roctracer/lib > /etc/ld.so.conf.d/libroctracer64.conf && ldconfig
|
||||
fi
|
||||
|
||||
+4
-2
@@ -1,7 +1,8 @@
|
||||
#!/bin/bash -x
|
||||
SRC_DIR=`dirname $0`
|
||||
COMPONENT="roctracer"
|
||||
ROCM_PATH="/opt/rocm"
|
||||
ROCM_PATH="${ROCM_PATH:=/opt/rocm}"
|
||||
LD_RUNPATH_FLAG=" -Wl,--enable-new-dtags -Wl,--rpath,$ROCM_PATH/lib:$ROCM_PATH/lib64"
|
||||
|
||||
fatal() {
|
||||
echo "$1"
|
||||
@@ -19,9 +20,9 @@ if [ -z "$PACKAGE_ROOT" ] ; then PACKAGE_ROOT=$ROCM_PATH; fi
|
||||
if [ -z "$PACKAGE_PREFIX" ] ; then PACKAGE_PREFIX="$ROCM_PATH/$COMPONENT"; fi
|
||||
if [ -z "$PREFIX_PATH" ] ; then PREFIX_PATH=$PACKAGE_ROOT; fi
|
||||
if [ -n "$HIP_VDI" ] ; then HIP_VDI_OPT="-DHIP_VDI=1"; fi
|
||||
if [ -n "$ROCM_RPATH" ] ; then LD_RUNPATH_FLAG=" -Wl,--enable-new-dtags -Wl,--rpath,${ROCM_RPATH}"; fi
|
||||
|
||||
ROCTRACER_ROOT=$(cd $ROCTRACER_ROOT && echo $PWD)
|
||||
MAKE_OPTS="-j 8 -C $BUILD_DIR"
|
||||
|
||||
mkdir -p $BUILD_DIR
|
||||
pushd $BUILD_DIR
|
||||
@@ -33,6 +34,7 @@ cmake \
|
||||
-DCMAKE_INSTALL_PREFIX=$PACKAGE_ROOT \
|
||||
-DCPACK_PACKAGING_INSTALL_PREFIX=$PACKAGE_PREFIX \
|
||||
-DCPACK_GENERATOR="DEB;RPM" \
|
||||
-DCMAKE_SHARED_LINKER_FLAGS="$LD_RUNPATH_FLAG" \
|
||||
$HIP_VDI_OPT \
|
||||
$ROCTRACER_ROOT
|
||||
make
|
||||
|
||||
@@ -50,11 +50,6 @@ else()
|
||||
set ( HIP_VDI 0 )
|
||||
endif()
|
||||
|
||||
## Enable KFD wrapper
|
||||
if ( DEFINED KFD_WRAPPER )
|
||||
add_definitions ( -DKFD_WRAPPER=${KFD_WRAPPER} )
|
||||
endif()
|
||||
|
||||
## Enable HIP/HCC local build
|
||||
if ( DEFINED LOCAL_BUILD )
|
||||
add_definitions ( -DLOCAL_BUILD=${LOCAL_BUILD} )
|
||||
@@ -123,6 +118,7 @@ endif ()
|
||||
find_library ( HSA_KMT_LIB "libhsakmt.so" )
|
||||
get_filename_component ( HSA_KMT_LIB_PATH ${HSA_KMT_LIB} DIRECTORY )
|
||||
set ( HSA_KMT_INC_PATH "${HSA_KMT_LIB_PATH}/../include" )
|
||||
set ( ROCM_INC_PATH ${HSA_KMT_INC_PATH} )
|
||||
|
||||
## Basic Tool Chain Information
|
||||
message ( "----------------NBIT: ${NBIT}" )
|
||||
@@ -136,6 +132,5 @@ message ( "-------------HCC-Inc: ${HCC_INC_DIR}" )
|
||||
message ( "-------------HIP-Inc: ${HIP_INC_DIR}" )
|
||||
message ( "-------------KFD-Inc: ${HSA_KMT_INC_PATH}" )
|
||||
message ( "-------------HIP-VDI: ${HIP_VDI}" )
|
||||
message ( "---------KFD_WRAPPER: ${KFD_WRAPPER}" )
|
||||
message ( "-----CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}" )
|
||||
message ( "---CMAKE_PREFIX_PATH: ${CMAKE_PREFIX_PATH}" )
|
||||
|
||||
@@ -0,0 +1,748 @@
|
||||
# ROC Tracer / ROC-TX Libraries Specification
|
||||
```
|
||||
ROC Tracer API version 2
|
||||
ROC-TX API version 1
|
||||
|
||||
- The rocTracer API is agnostic to specific runtime and may trace
|
||||
the runtime API calls and asynchronous GPU activity.
|
||||
- The rocTX API is provided for application code annotation.
|
||||
```
|
||||
## 1. High level overview
|
||||
```
|
||||
The goal of the implementation is to provide a runtime independent API
|
||||
for tracing of runtime calls and asynchronous activity, like GPU kernel
|
||||
dispatches and memory moves. The tracing includes callback API for
|
||||
runtime API tracing and activity API for asynchronous activity records
|
||||
logging.
|
||||
|
||||
Depending on particular runtime intercepting mechanism, the rocTracer
|
||||
library can be dynamically linked, dynamically loaded by the runtime as
|
||||
a plugin or some API wrapper can be loaded using LD_PRELOAD.
|
||||
The library has a C API.
|
||||
|
||||
The rocTracer library is an API that intercepts runtime API calls and
|
||||
traces asynchronous activity. The activity tracing results are recorded
|
||||
in a ring buffer.
|
||||
|
||||
The rocTX contains application code instrumentation API to support high
|
||||
level correlation of runtime API/activity events. The API includes mark
|
||||
and nested ranges.
|
||||
```
|
||||
## 2. General API
|
||||
### 2.1. Description
|
||||
```
|
||||
The library supports method for getting the error number and error string
|
||||
of the last failed library API call. It allows to check the conformance
|
||||
of used library API header and the library binary, the version macros and
|
||||
API methods can be used.
|
||||
|
||||
Returning the error and error string methods:
|
||||
• roctracer_status_t – error code enumeration
|
||||
• roctracer_error_string – method for returning the error string
|
||||
|
||||
Library version:
|
||||
• ROCTRACER_VERSION_MAJOR – API major version macro
|
||||
• ROCTRACER_VERSION_MINOR – API minor version macro
|
||||
• roctracer_version_major – library major version
|
||||
• roctracer_version_minor – library minor version
|
||||
```
|
||||
### 2.2. Error codes and error string methods
|
||||
```
|
||||
Error code enumeration:
|
||||
typedef enum {
|
||||
ROCTRACER_STATUS_SUCCESS = 0,
|
||||
ROCTRACER_STATUS_ERROR = 1,
|
||||
ROCTRACER_STATUS_UNINIT = 2,
|
||||
ROCTRACER_STATUS_BREAK = 3,
|
||||
ROCTRACER_STATUS_BAD_DOMAIN = 4,
|
||||
ROCTRACER_STATUS_BAD_PARAMETER = 5,
|
||||
ROCTRACER_STATUS_HIP_API_ERR = 6,
|
||||
ROCTRACER_STATUS_HCC_OPS_ERR = 7,
|
||||
ROCTRACER_STATUS_ROCTX_ERR = 8,
|
||||
} roctracer_status_t;
|
||||
|
||||
Return error string:
|
||||
const char* roctracer_error_string();
|
||||
```
|
||||
### 2.3. Library version
|
||||
```
|
||||
The library provides major and minor versions. Major version is for
|
||||
incompatible API changes and minor version for bug fixes.
|
||||
|
||||
API version macros defined in the library API header ‘roctracer.h’:
|
||||
ROCTRACER_VERSION_MAJOR
|
||||
ROCTRACER_VERSION_MINOR
|
||||
|
||||
Methods to check library major and minor venison:
|
||||
uint32_t roctracer_major_version();
|
||||
uint32_t roctracer_minor_version();
|
||||
```
|
||||
## 3. Frontend API
|
||||
### 3.1. Description
|
||||
```
|
||||
The rocTracer provides support for runtime API callbacks and activity
|
||||
records logging. The APIs of different runtimes at different levels
|
||||
are considered as different API domains with assigned domain IDs. For
|
||||
example, language level and driver level. The API callbacks provide
|
||||
the API calls arguments and are called on two phases on “enter” and
|
||||
on “exit”. The activity records are logged to the ring buffer and can
|
||||
be associated with the respective API calls using the correlation ID.
|
||||
Activity API can be used to enable collecting of the records with
|
||||
timestamping data for API calls and asynchronous activity like the
|
||||
kernel submits, memory copies and barriers
|
||||
|
||||
Tracing domains:
|
||||
• roctracer_domain_t – runtime API domains, HIP, HSA, etc…
|
||||
• roctracer_op_string – Return Op string by given domain and
|
||||
activity Op code
|
||||
• roctracer_op_code – Return Op code and kind by given string
|
||||
|
||||
Callback API:
|
||||
• roctracer_rtapi_callback_t – runtime API callback type
|
||||
• roctracer_enable_op_callback – enable runtime API callback
|
||||
by domain and Op code
|
||||
• roctracer_enable_domain_callback – enable runtime API callback
|
||||
by domain for all Ops
|
||||
• roctracer_enable_callback – enable runtime API callback for
|
||||
all domains, all Ops
|
||||
• roctracer_disable_op_callback – disable runtime API callback
|
||||
by domain and Op code
|
||||
• roctracer_enable_op_callback – enable runtime API callback
|
||||
by domain for all Ops
|
||||
• roctracer_enable_op_callback – enable runtime API callback for
|
||||
all domains, all Ops
|
||||
|
||||
Activity API:
|
||||
• roctracer_record_t – activity record
|
||||
• roctracer_pool_t – records pool type
|
||||
• roctracer_allocator_t – tracer allocator type
|
||||
• roctracer_buffer_callback_t – pool callback type
|
||||
• roctracer_open_pool[_expl] – create records pool
|
||||
• roctracer_close_pool[_expl] – close records pool
|
||||
• roctracer_default_pool[_expl] – get/set default pool
|
||||
• roctracer_properties_t – tracer properties
|
||||
• roctracer_enable_op_activity[_expl] – enable activity records logging
|
||||
• roctracer_enable_domain_activity[_expl] – enable activity records logging
|
||||
• roctracer_enable_activity[_expl] – enable activity records logging
|
||||
• roctracer_disable_op_activity – disable activity records logging
|
||||
• roctracer_disable_domain_activity – disable activity records logging
|
||||
• roctracer_disable_activity – disable activity records logging
|
||||
• roctracer_flush_activity[_expl] – disable activity records logging
|
||||
• roctracer_next_record – return next record
|
||||
• roctracer_get_timestamp – return correlated GPU/CPU system timestamp
|
||||
|
||||
External correlation ID API:
|
||||
• roctracer_activity_push_external_correlation_id - push an external
|
||||
correlation id for the calling thread
|
||||
• roctracer_activity_pop_external_correlation_id - pop an external
|
||||
correlation id for the calling thread
|
||||
|
||||
Tracing control API:
|
||||
• roctracer_start – tracing start
|
||||
• roctracer_stop – tracer stop
|
||||
|
||||
```
|
||||
### 3.2. Tracing Domains
|
||||
```
|
||||
Various tracing domains are supported. Each domain is assigned with
|
||||
a domain ID. The domains include HSA, HIP, and HCC runtime levels.
|
||||
|
||||
Traced API domains:
|
||||
typedef enum {
|
||||
ACTIVITY_DOMAIN_HSA_API = 0, // HSA API domain
|
||||
ACTIVITY_DOMAIN_HSA_OPS = 1, // HSA async activity domain
|
||||
ACTIVITY_DOMAIN_HIP_API = 2, // HIP API domain
|
||||
ACTIVITY_DOMAIN_HIP_OPS = 3, // HIP async activity domain
|
||||
ACTIVITY_DOMAIN_KFD_API = 4, // KFD API domain
|
||||
ACTIVITY_DOMAIN_EXT_API = 5, // External ID domain
|
||||
ACTIVITY_DOMAIN_ROCTX = 6, // ROCTX domain
|
||||
ACTIVITY_DOMAIN_NUMBER = 7
|
||||
} activity_domain_t;
|
||||
|
||||
Return name by given domain and Op code:
|
||||
const char* roctracer_op_string( // NULL returned on error and error number is set
|
||||
uint32_t domain, // tracing domain
|
||||
uint32_t op, // activity op code
|
||||
uint32_t kind); // activity kind
|
||||
Return Op code and kind by given string:
|
||||
roctracer_status_t roctracer_op_code(
|
||||
uint32_t domain, // tracing domain
|
||||
const char* str, // [in] op string
|
||||
uint32_t* op, // [out] op code
|
||||
uint32_t* kind); // [out] op kind code if not NULL
|
||||
```
|
||||
### 3.3. Callback API
|
||||
```
|
||||
The tracer provides support for runtime API callbacks and activity records
|
||||
logging. The API callbacks provide the API calls arguments and are called
|
||||
on two phases on “enter”, on “exit”.
|
||||
|
||||
API phase passed to the callbacks:
|
||||
typedef enum {
|
||||
ROCTRACER_API_PHASE_ENTER,
|
||||
ROCTRACER_API_PHASE_EXIT,
|
||||
} roctracer_api_phase_t;
|
||||
|
||||
Runtime API callback type:
|
||||
typedef void (*roctracer_rtapi_callback_t)(
|
||||
uint32_t domain, // runtime API domain
|
||||
uint32_t cid, // API call ID
|
||||
const void* data, // [in] callback data with correlation id and the call
|
||||
// arguments
|
||||
void* arg); // [in/out] user passed data
|
||||
|
||||
Enable runtime API callbacks:
|
||||
roctracer_status_t roctracer_enable_op_callback(
|
||||
activity_domain_t domain, // tracing domain
|
||||
uint32_t op, // API call ID
|
||||
activity_rtapi_callback_t callback, // callback function pointer
|
||||
void* arg); // [in/out] callback arg
|
||||
|
||||
roctracer_status_t roctracer_enable_domain_callback(
|
||||
activity_domain_t domain, // tracing domain
|
||||
activity_rtapi_callback_t callback, // callback function pointer
|
||||
void* arg); // [in/out] callback arg
|
||||
|
||||
|
||||
roctracer_status_t roctracer_enable_callback(
|
||||
activity_rtapi_callback_t callback, // callback function pointer
|
||||
void* arg); // [in/out] callback arg
|
||||
|
||||
Disable runtime API callbacks:
|
||||
roctracer_status_t roctracer_disable_op_callback(
|
||||
activity_domain_t domain, // tracing domain
|
||||
uint32_t op); // API call ID
|
||||
|
||||
roctracer_status_t roctracer_disable_domain_callback(
|
||||
activity_domain_t domain); // tracing domain
|
||||
|
||||
roctracer_status_t roctracer_disable_callback();
|
||||
```
|
||||
### 3.4 Activity API
|
||||
```
|
||||
The activity records are asynchronously logged to the pool and can be
|
||||
associated with the respective API callbacks using the correlation ID.
|
||||
Activity API can be used to enable collecting the records with
|
||||
timestamp data for API calls and GPU activity like kernel submits,
|
||||
memory copies, and barriers.
|
||||
|
||||
// Correlation id
|
||||
typedef uint64_t activity_correlation_id_t;
|
||||
|
||||
Activity record type:
|
||||
|
||||
// Activity record type
|
||||
struct activity_record_t {
|
||||
uint32_t domain; // activity domain id
|
||||
activity_kind_t kind; // activity kind
|
||||
activity_op_t op; // activity op
|
||||
activity_correlation_id_t correlation_id; // activity ID
|
||||
uint64_t begin_ns; // host begin timestamp
|
||||
uint64_t end_ns; // host end timestamp
|
||||
union {
|
||||
struct {
|
||||
int device_id; // device id
|
||||
uint64_t queue_id; // queue id
|
||||
};
|
||||
struct {
|
||||
uint32_t process_id; // device id
|
||||
uint32_t thread_id; // thread id
|
||||
};
|
||||
struct {
|
||||
activity_correlation_id_t external_id; // external correlation id
|
||||
};
|
||||
};
|
||||
size_t bytes; // data size bytes
|
||||
};
|
||||
|
||||
Return next record:
|
||||
static inline int roctracer_next_record(
|
||||
const activity_record_t* record, // [in] record ptr
|
||||
const activity_record_t** next); // [out] next record ptr
|
||||
|
||||
Tracer allocator type:
|
||||
typedef void (*roctracer_allocator_t)(
|
||||
char** ptr, // memory pointer
|
||||
size_t size, // memory size
|
||||
void* arg); // allocator arg
|
||||
|
||||
Pool callback type:
|
||||
typedef void (*roctracer_buffer_callback_t)(
|
||||
const char* begin, // [in] available buffered trace records
|
||||
const char* end, // [in] end of buffered trace records
|
||||
void* arg); // [in/out] callback arg
|
||||
|
||||
Tracer properties:
|
||||
typedef struct {
|
||||
uint32_t mode; // roctracer mode
|
||||
size_t buffer_size; // buffer size
|
||||
// power of 2
|
||||
roctracer_allocator_t alloc_fun; // memory allocator
|
||||
// function pointer
|
||||
void* alloc_arg; // memory allocator
|
||||
// function pointer
|
||||
roctracer_buffer_callback_t buffer_callback_fun; // tracer record
|
||||
// callback function
|
||||
void* buffer_callback_arg; // tracer record
|
||||
// callback arg
|
||||
} roctracer_properties_t;
|
||||
|
||||
Tracer memory pool handle type:
|
||||
typedef void roctracer_pool_t;
|
||||
|
||||
Create tracer memory pool:
|
||||
roctracer_status_t roctracer_open_pool(
|
||||
const roctracer_properties_t* properties); // tracer pool properties
|
||||
|
||||
roctracer_status_t roctracer_open_pool_expl(
|
||||
const roctracer_properties_t* properties, // tracer pool properties
|
||||
roctracer_pool_t** pool); // [out] returns tracer pool if
|
||||
// not NULL, otherwise sets the
|
||||
// default one if it is not set
|
||||
// yet; otherwise the error is
|
||||
// generated
|
||||
|
||||
Close tracer memory pool:
|
||||
roctracer_status_t roctracer_close_pool();
|
||||
|
||||
roctracer_status_t roctracer_close_pool_expl(
|
||||
roctracer_pool_t* pool); // memory pool, NULL means default pool
|
||||
|
||||
Return current default pool. Set new default pool if the argument is not NULL:
|
||||
roctracer_pool_t* roctracer_default_pool();
|
||||
|
||||
roctracer_pool_t* roctracer_default_pool_expl(
|
||||
roctracer_pool_t* pool); // new default pool if not NULL
|
||||
```
|
||||
Enable activity records logging:
|
||||
```
|
||||
roctracer_status_t roctracer_enable_op_activity(
|
||||
activity_domain_t domain, // tracing domain
|
||||
uint32_t op); // activity op ID
|
||||
|
||||
roctracer_status_t roctracer_enable_op_activity_expl(
|
||||
activity_domain_t domain, // tracing domain
|
||||
uint32_t op, // activity op ID
|
||||
roctracer_pool_t* pool); // memory pool, NULL means default pool
|
||||
|
||||
roctracer_status_t roctracer_enable_domain_activity(
|
||||
activity_domain_t domain); // tracing domain
|
||||
|
||||
roctracer_status_t roctracer_enable_domain_activity_expl(
|
||||
activity_domain_t domain, // tracing domain
|
||||
roctracer_pool_t* pool); // memory pool, NULL means default pool
|
||||
|
||||
roctracer_status_t roctracer_enable_activity();
|
||||
|
||||
roctracer_status_t roctracer_enable_activity_expl(
|
||||
roctracer_pool_t* pool); // memory pool, NULL means default pool
|
||||
|
||||
Disable activity records logging:
|
||||
roctracer_status_t roctracer_disable_op_activity(
|
||||
activity_domain_t domain, // tracing domain
|
||||
uint32_t op); // activity op ID
|
||||
|
||||
roctracer_status_t roctracer_disable_domain_activity(
|
||||
activity_domain_t domain); // tracing domain
|
||||
|
||||
roctracer_status_t roctracer_disable_activity();
|
||||
|
||||
Flush available activity records:
|
||||
roctracer_status_t roctracer_flush_activity();
|
||||
|
||||
roctracer_status_t roctracer_flush_activity_expl(
|
||||
roctracer_pool_t* pool); // memory pool, NULL means default pool
|
||||
|
||||
Return correlated GPU/CPU system timestamp:
|
||||
roctracer_status_t roctracer_get_timestamp(
|
||||
uint64_t* timestamp); // [out] return timestamp
|
||||
```
|
||||
External correlation ID API
|
||||
```
|
||||
The API provides activity records to associate rocTracer correlation IDs with
|
||||
IDs provided by external APIs. The external ID records are identified by
|
||||
ACTIVITY_DOMAIN_EXT_API domain value.
|
||||
Using the ‘push’ method an external ID is pushed to a per CPU thread stack and
|
||||
the ‘pop’ method can be used to remove the last pushed ID.
|
||||
An external ID record is inserted before any generated rocTracer activity record
|
||||
if the same CPU external ID stack is non-empty.
|
||||
|
||||
Notifies that the calling thread is entering an external API region.
|
||||
Push an external correlation id for the calling thread.
|
||||
roctracer_status_t roctracer_activity_push_external_correlation_id(
|
||||
activity_correlation_id_t id); // external correlation id
|
||||
|
||||
Notifies that the calling thread is leaving an external API region.
|
||||
Pop an external correlation id for the calling thread.
|
||||
roctracer_status_t roctracer_activity_pop_external_correlation_id(
|
||||
activity_correlation_id_t* last_id); // returns the last external correlation id
|
||||
// if not NULL
|
||||
```
|
||||
Tracing control API
|
||||
```
|
||||
Tracing start:
|
||||
void roctracer_start();
|
||||
|
||||
Tracing stop:
|
||||
void roctracer_stop();
|
||||
```
|
||||
## 4. rocTracer Usage Code Examples
|
||||
### 4.1. HIP API and HCC ops, GPU Activity Tracing
|
||||
```
|
||||
#include <inc/roctracer_hip.h>
|
||||
#include <inc/roctracer_hcc.h>
|
||||
|
||||
// HIP API callback function
|
||||
void hip_api_callback(
|
||||
uint32_t domain,
|
||||
uint32_t cid,
|
||||
const void* callback_data,
|
||||
void* arg)
|
||||
{
|
||||
(void)arg;
|
||||
const hip_api_data_t* data = reinterpret_cast <const hip_api_data_t*>
|
||||
(callback_data);
|
||||
fprintf(stdout, "<%s id(%u)\tcorrelation_id(%lu) %s> ",
|
||||
roctracer_id_string(ACTIVITY_DOMAIN_HIP_API, cid),
|
||||
cid,
|
||||
data->correlation_id,
|
||||
(data->phase == ACTIVITY_API_PHASE_ENTER) ? "on-enter" : "on-exit");
|
||||
<some code . . .>
|
||||
}
|
||||
|
||||
// Activity tracing callback
|
||||
void activity_callback(const char* begin, const char* end, void* arg) {
|
||||
const roctracer_record_t* record = reinterpret_cast<const
|
||||
roctracer_record_t*>(begin);
|
||||
const roctracer_record_t* end_record = reinterpret_cast<const
|
||||
roctracer_record_t*>(end);
|
||||
fprintf(stdout, "\tActivity records:\n");
|
||||
while (record < end_record) {
|
||||
const char * name = roctracer_op_string(record->domain,
|
||||
record->activity_id, 0);
|
||||
fprintf(stdout, "\t%s\tcorrelation_id(%lu) time_ns(%lu:%lu)
|
||||
device_id(%d) stream_id(%lu)\n",
|
||||
name,
|
||||
record->correlation_id,
|
||||
record->begin_ns,
|
||||
record->end_ns,
|
||||
record->device_id,
|
||||
record->stream_id
|
||||
);
|
||||
<some code . . .>
|
||||
ROCTRACER_CALL(roctracer_next_record(record, &record));
|
||||
}
|
||||
}
|
||||
|
||||
int main() {
|
||||
// Allocating tracing pool
|
||||
roctracer_properties_t properties{};
|
||||
properties.buffer_size = 12;
|
||||
properties.buffer_callback_fun = activity_callback;
|
||||
ROCTRACER_CALL(roctracer_open_pool(&properties));
|
||||
|
||||
// Enable HIP API callbacks. HIP_API_ID_ANY can be used to trace all HIP
|
||||
// API calls.
|
||||
ROCTRACER_CALL(roctracer_enable_op_callback(ACTIVITY_DOMAIN_HIP_API,
|
||||
HIP_API_ID_hipModuleLaunchKernel,
|
||||
hip_api_callback, NULL));
|
||||
ROCTRACER_CALL(roctracer_enable_op_acticity(ACTIVITY_DOMAIN_HIP_API,
|
||||
HIP_API_ID_hipModuleLaunchKernel));
|
||||
// Enable HIP kernel dispatch activity tracing
|
||||
ROCTRACER_CALL(roctracer_enable_op_activity(ACTIVITY_DOMAIN_HCC_OPS,
|
||||
hc::HSA_OP_ID_DISPATCH));
|
||||
|
||||
<test code>
|
||||
|
||||
// Disable tracing and closing the pool
|
||||
ROCTRACER_CALL(roctracer_disable_callback());
|
||||
ROCTRACER_CALL(roctracer_disable_activity());
|
||||
ROCTRACER_CALL(roctracer_close_pool());
|
||||
}
|
||||
```
|
||||
### 4.2. MatrixTranspose HIP sample with all APIs/activity tracing enabled
|
||||
```
|
||||
This shows a MatrixTranspose HIP sample with enabled tracing of
|
||||
all HIP API and all GPU asynchronous activity.
|
||||
|
||||
/*
|
||||
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
|
||||
// hip header file
|
||||
#include <hip/hip_runtime.h>
|
||||
|
||||
#ifndef ITERATIONS
|
||||
# define ITERATIONS 100
|
||||
#endif
|
||||
#define WIDTH 1024
|
||||
|
||||
|
||||
#define NUM (WIDTH * WIDTH)
|
||||
|
||||
#define THREADS_PER_BLOCK_X 4
|
||||
#define THREADS_PER_BLOCK_Y 4
|
||||
#define THREADS_PER_BLOCK_Z 1
|
||||
|
||||
// Device (Kernel) function, it must be void
|
||||
// hipLaunchParm provides the execution configuration
|
||||
__global__ void matrixTranspose(hipLaunchParm lp, float* out, float* in,
|
||||
const int width) {
|
||||
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
|
||||
int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
|
||||
|
||||
out[y * width + x] = in[x * width + y];
|
||||
}
|
||||
|
||||
// CPU implementation of matrix transpose
|
||||
void matrixTransposeCPUReference(float* output, float* input, const unsigned
|
||||
int width) {
|
||||
for (unsigned int j = 0; j < width; j++) {
|
||||
for (unsigned int i = 0; i < width; i++) {
|
||||
output[i * width + j] = input[j * width + i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int iterations = ITERATIONS;
|
||||
void start_tracing();
|
||||
void stop_tracing();
|
||||
|
||||
int main() {
|
||||
float* Matrix;
|
||||
float* TransposeMatrix;
|
||||
float* cpuTransposeMatrix;
|
||||
|
||||
float* gpuMatrix;
|
||||
float* gpuTransposeMatrix;
|
||||
|
||||
hipDeviceProp_t devProp;
|
||||
hipGetDeviceProperties(&devProp, 0);
|
||||
|
||||
std::cout << "Device name " << devProp.name << std::endl;
|
||||
|
||||
int i;
|
||||
int errors;
|
||||
|
||||
while (iterations-- > 0) {
|
||||
start_tracing();
|
||||
|
||||
Matrix = (float*)malloc(NUM * sizeof(float));
|
||||
TransposeMatrix = (float*)malloc(NUM * sizeof(float));
|
||||
cpuTransposeMatrix = (float*)malloc(NUM * sizeof(float));
|
||||
|
||||
// initialize the input data
|
||||
for (i = 0; i < NUM; i++) {
|
||||
Matrix[i] = (float)i * 10.0f;
|
||||
}
|
||||
|
||||
// allocate the memory on the device side
|
||||
hipMalloc((void**)&gpuMatrix, NUM * sizeof(float));
|
||||
hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float));
|
||||
|
||||
// Memory transfer from host to device
|
||||
hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float),
|
||||
hipMemcpyHostToDevice);
|
||||
|
||||
// Lauching kernel from host
|
||||
hipLaunchKernel(matrixTranspose,
|
||||
dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH /
|
||||
THREADS_PER_BLOCK_Y),
|
||||
dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0,
|
||||
gpuTransposeMatrix, gpuMatrix, WIDTH);
|
||||
|
||||
// Memory transfer from device to host
|
||||
hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float),
|
||||
hipMemcpyDeviceToHost);
|
||||
|
||||
// CPU MatrixTranspose computation
|
||||
matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH);
|
||||
|
||||
// verify the results
|
||||
errors = 0;
|
||||
double eps = 1.0E-6;
|
||||
for (i = 0; i < NUM; i++) {
|
||||
if (std::abs(TransposeMatrix[i] - cpuTransposeMatrix[i]) > eps) {
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
if (errors != 0) {
|
||||
printf("FAILED: %d errors\n", errors);
|
||||
} else {
|
||||
printf("PASSED!\n");
|
||||
}
|
||||
|
||||
// free the resources on device side
|
||||
hipFree(gpuMatrix);
|
||||
hipFree(gpuTransposeMatrix);
|
||||
|
||||
// free the resources on host side
|
||||
free(Matrix);
|
||||
free(TransposeMatrix);
|
||||
free(cpuTransposeMatrix);
|
||||
|
||||
stop_tracing();
|
||||
}
|
||||
|
||||
return errors;
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// HIP/HCC Callbacks/Activity tracing
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
#include <inc/roctracer_hip.h>
|
||||
#include <inc/roctracer_hcc.h>
|
||||
|
||||
// Macro to check ROC-tracer calls status
|
||||
#define ROCTRACER_CALL(call) \
|
||||
do { \
|
||||
int err = call; \
|
||||
if (err != 0) { \
|
||||
std::cerr << roctracer_error_string() << std::endl << std::flush; \
|
||||
abort(); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
// HIP API callback function
|
||||
void hip_api_callback(
|
||||
uint32_t domain,
|
||||
uint32_t cid,
|
||||
const void* callback_data,
|
||||
void* arg)
|
||||
{
|
||||
(void)arg;
|
||||
const hip_api_data_t* data = reinterpret_cast<const hip_api_data_t*>
|
||||
(callback_data);
|
||||
fprintf(stdout, "<%s id(%u)\tcorrelation_id(%lu) %s> ",
|
||||
roctracer_op_string(ACTIVITY_DOMAIN_HIP_API, cid, 0),
|
||||
cid,
|
||||
data->correlation_id,
|
||||
(data->phase == ACTIVITY_API_PHASE_ENTER) ? "on-enter" : "on-exit");
|
||||
if (data->phase == ACTIVITY_API_PHASE_ENTER) {
|
||||
switch (cid) {
|
||||
case HIP_API_ID_hipMemcpy:
|
||||
fprintf(stdout, "dst(%p) src(%p) size(0x%x) kind(%u)",
|
||||
data->args.hipMemcpy.dst,
|
||||
data->args.hipMemcpy.src,
|
||||
(uint32_t)(data->args.hipMemcpy.sizeBytes),
|
||||
(uint32_t)(data->args.hipMemcpy.kind));
|
||||
break;
|
||||
case HIP_API_ID_hipMalloc:
|
||||
fprintf(stdout, "ptr(%p) size(0x%x)",
|
||||
data->args.hipMalloc.ptr,
|
||||
(uint32_t)(data->args.hipMalloc.size));
|
||||
break;
|
||||
case HIP_API_ID_hipFree:
|
||||
fprintf(stdout, "ptr(%p),
|
||||
data->args.hipFree.ptr);
|
||||
break;
|
||||
case HIP_API_ID_hipModuleLaunchKernel:
|
||||
fprintf(stdout, "kernel(\"%s\") stream(%p)",
|
||||
hipKernelNameRef(data->args.hipModuleLaunchKernel.f),
|
||||
data->args.hipModuleLaunchKernel.stream);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
switch (cid) {
|
||||
case HIP_API_ID_hipMalloc:
|
||||
fprintf(stdout, "*ptr(0x%p)",
|
||||
*(data->args.hipMalloc.ptr));
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
fprintf(stdout, "\n"); fflush(stdout);
|
||||
}
|
||||
|
||||
// Activity tracing callback
|
||||
// hipMalloc id(3) correlation_id(1):
|
||||
// begin_ns(1525888652762640464) end_ns(1525888652762877067)
|
||||
void activity_callback(const char* begin, const char* end, void* arg) {
|
||||
const roctracer_record_t* record = reinterpret_cast
|
||||
<const roctracer_record_t*>(begin);
|
||||
const roctracer_record_t* end_record = reinterpret_cast
|
||||
<const roctracer_record_t*>(end);
|
||||
fprintf(stdout, "\tActivity records:\n"); fflush(stdout);
|
||||
while (record < end_record) {
|
||||
const char * name = roctracer_op_string(record->domain,
|
||||
record->activity_id, 0);
|
||||
fprintf(stdout, "\t%s\tcorrelation_id(%lu) time_ns(%lu:%lu) \
|
||||
device_id(%d) stream_id(%lu)",
|
||||
name,
|
||||
record->correlation_id,
|
||||
record->begin_ns,
|
||||
record->end_ns,
|
||||
record->device_id,
|
||||
record->stream_id
|
||||
);
|
||||
if (record->kind == hc::HSA_OP_ID_COPY)
|
||||
fprintf(stdout, " bytes(0x%zx)", record->bytes);
|
||||
fprintf(stdout, "\n");
|
||||
fflush(stdout);
|
||||
ROCTRACER_CALL(roctracer_next_record(record, &record));
|
||||
}
|
||||
}
|
||||
|
||||
// Start tracing routine
|
||||
void start_tracing() {
|
||||
std::cout << "# START #############################" << std::endl
|
||||
<< std::flush;
|
||||
// Allocating tracing pool
|
||||
roctracer_properties_t properties{};
|
||||
properties.buffer_size = 0x1000;
|
||||
properties.buffer_callback_fun = activity_callback;
|
||||
ROCTRACER_CALL(roctracer_open_pool(&properties));
|
||||
// Enable API callbacks, all domains
|
||||
ROCTRACER_CALL(roctracer_enable_callback(hip_api_callback, NULL));
|
||||
// Enable activity tracing, all domains
|
||||
ROCTRACER_CALL(roctracer_enable_activity());
|
||||
}
|
||||
|
||||
// Stop tracing routine
|
||||
void stop_tracing() {
|
||||
ROCTRACER_CALL(roctracer_disable_api_callback());
|
||||
ROCTRACER_CALL(roctracer_disable_api_activity());
|
||||
ROCTRACER_CALL(roctracer_close_pool());
|
||||
std::cout << "# STOP #############################" << std::endl
|
||||
<< std::flush;
|
||||
}
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
```
|
||||
## 5. rocTX application code annotation API
|
||||
```
|
||||
Basic annotation API: markers and nested ranges.
|
||||
// A marker created by given ASCII massage
|
||||
void roctxMark(const char* message);
|
||||
|
||||
// Returns the 0 based level of a nested range being started by given message associated to this range.
|
||||
// A negative value is returned on the error.
|
||||
int roctxRangePush(const char* message);
|
||||
|
||||
// Marks the end of a nested range.
|
||||
// Returns the 0 based level the range.
|
||||
// A negative value is returned on the error.
|
||||
int roctxRangePop();
|
||||
```
|
||||
@@ -29,9 +29,10 @@ THE SOFTWARE.
|
||||
typedef enum {
|
||||
ACTIVITY_DOMAIN_HSA_API = 0, // HSA API domain
|
||||
ACTIVITY_DOMAIN_HSA_OPS = 1, // HSA async activity domain
|
||||
ACTIVITY_DOMAIN_HCC_OPS = 2, // HCC async activity domain
|
||||
ACTIVITY_DOMAIN_HIP_OPS = 2, // HIP async activity domain
|
||||
ACTIVITY_DOMAIN_HCC_OPS = ACTIVITY_DOMAIN_HIP_OPS, // HCC async activity domain
|
||||
ACTIVITY_DOMAIN_HIP_VDI = ACTIVITY_DOMAIN_HIP_OPS, // HIP VDI async activity domain
|
||||
ACTIVITY_DOMAIN_HIP_API = 3, // HIP API domain
|
||||
ACTIVITY_DOMAIN_HIP_VDI = ACTIVITY_DOMAIN_HCC_OPS, // HIP VDI domain
|
||||
ACTIVITY_DOMAIN_KFD_API = 4, // KFD API domain
|
||||
ACTIVITY_DOMAIN_EXT_API = 5, // External ID domain
|
||||
ACTIVITY_DOMAIN_ROCTX = 6, // ROCTX domain
|
||||
@@ -60,13 +61,22 @@ typedef enum {
|
||||
typedef uint64_t activity_correlation_id_t;
|
||||
|
||||
// Activity record type
|
||||
struct activity_record_t {
|
||||
typedef struct activity_record_s {
|
||||
uint32_t domain; // activity domain id
|
||||
activity_kind_t kind; // activity kind
|
||||
activity_op_t op; // activity op
|
||||
activity_correlation_id_t correlation_id; // activity ID
|
||||
uint64_t begin_ns; // host begin timestamp
|
||||
uint64_t end_ns; // host end timestamp
|
||||
union {
|
||||
struct {
|
||||
activity_correlation_id_t correlation_id; // activity ID
|
||||
uint64_t begin_ns; // host begin timestamp
|
||||
uint64_t end_ns; // host end timestamp
|
||||
};
|
||||
struct {
|
||||
uint32_t se; // sampled SE
|
||||
uint64_t cycle; // sample cycle
|
||||
uint64_t pc; // sample PC
|
||||
} pc_sample;
|
||||
};
|
||||
union {
|
||||
struct {
|
||||
int device_id; // device id
|
||||
@@ -81,7 +91,7 @@ struct activity_record_t {
|
||||
};
|
||||
};
|
||||
size_t bytes; // data size bytes
|
||||
};
|
||||
} activity_record_t;
|
||||
|
||||
// Activity sync calback type
|
||||
typedef void* (*activity_sync_callback_t)(uint32_t cid, activity_record_t* record, const void* data, void* arg);
|
||||
|
||||
+60
-26
@@ -38,10 +38,13 @@ THE SOFTWARE.
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#ifndef __cplusplus
|
||||
#include <stdbool.h>
|
||||
#endif
|
||||
|
||||
#include "ext/prof_protocol.h"
|
||||
|
||||
#define ROCTRACER_VERSION_MAJOR 1
|
||||
#define ROCTRACER_VERSION_MAJOR 2
|
||||
#define ROCTRACER_VERSION_MINOR 0
|
||||
|
||||
#ifdef __cplusplus
|
||||
@@ -64,6 +67,7 @@ typedef enum {
|
||||
ROCTRACER_STATUS_BAD_PARAMETER = 5,
|
||||
ROCTRACER_STATUS_HIP_API_ERR = 6,
|
||||
ROCTRACER_STATUS_HCC_OPS_ERR = 7,
|
||||
ROCTRACER_STATUS_HSA_ERR = 7,
|
||||
ROCTRACER_STATUS_ROCTX_ERR = 8,
|
||||
} roctracer_status_t;
|
||||
|
||||
@@ -89,7 +93,7 @@ roctracer_status_t roctracer_op_code(
|
||||
uint32_t domain, // tracing domain
|
||||
const char* str, // [in] op string
|
||||
uint32_t* op, // [out] op code
|
||||
uint32_t* kind = NULL); // [out] op kind code
|
||||
uint32_t* kind); // [out] op kind code if not NULL
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Callback API
|
||||
@@ -137,7 +141,7 @@ roctracer_status_t roctracer_disable_callback();
|
||||
typedef activity_record_t roctracer_record_t;
|
||||
|
||||
// Return next record
|
||||
static inline int roctracer_next_record(
|
||||
static inline roctracer_status_t roctracer_next_record(
|
||||
const activity_record_t* record, // [in] record ptr
|
||||
const activity_record_t** next) // [out] next record ptr
|
||||
{
|
||||
@@ -172,31 +176,59 @@ typedef void roctracer_pool_t;
|
||||
|
||||
// Create tracer memory pool
|
||||
// The first invocation sets the default pool
|
||||
roctracer_status_t roctracer_open_pool(
|
||||
roctracer_status_t roctracer_open_pool_expl(
|
||||
const roctracer_properties_t* properties, // tracer pool properties
|
||||
roctracer_pool_t** pool = NULL); // [out] returns tracer pool if not NULL,
|
||||
roctracer_pool_t** pool); // [out] returns tracer pool if not NULL,
|
||||
// otherwise sets the default one if it is not set yet
|
||||
static inline roctracer_status_t roctracer_open_pool(
|
||||
const roctracer_properties_t* properties) // tracer pool properties
|
||||
{
|
||||
return roctracer_open_pool_expl(properties, NULL);
|
||||
}
|
||||
// otherwise the error is generated
|
||||
|
||||
// Close tracer memory pool
|
||||
roctracer_status_t roctracer_close_pool(
|
||||
roctracer_pool_t* pool = NULL); // [in] memory pool, NULL is a default one
|
||||
roctracer_status_t roctracer_close_pool_expl(
|
||||
roctracer_pool_t* pool); // [in] memory pool, NULL is a default one
|
||||
static inline roctracer_status_t roctracer_close_pool()
|
||||
{
|
||||
return roctracer_close_pool_expl(NULL);
|
||||
}
|
||||
|
||||
// Return current default pool
|
||||
// Set new default pool if the argument is not NULL
|
||||
roctracer_pool_t* roctracer_default_pool(
|
||||
roctracer_pool_t* pool = NULL); // [in] new default pool if not NULL
|
||||
roctracer_pool_t* roctracer_default_pool_expl(
|
||||
roctracer_pool_t* pool); // [in] new default pool if not NULL
|
||||
static inline roctracer_pool_t* roctracer_default_pool()
|
||||
{
|
||||
return roctracer_default_pool_expl(NULL);
|
||||
}
|
||||
|
||||
// Enable activity records logging
|
||||
roctracer_status_t roctracer_enable_op_activity(
|
||||
roctracer_status_t roctracer_enable_op_activity_expl(
|
||||
activity_domain_t domain, // tracing domain
|
||||
uint32_t op, // activity op ID
|
||||
roctracer_pool_t* pool = NULL); // memory pool, NULL is a default one
|
||||
roctracer_status_t roctracer_enable_domain_activity(
|
||||
roctracer_pool_t* pool); // memory pool, NULL is a default one
|
||||
static inline roctracer_status_t roctracer_enable_op_activity(
|
||||
activity_domain_t domain, // tracing domain
|
||||
roctracer_pool_t* pool = NULL); // memory pool, NULL is a default one
|
||||
roctracer_status_t roctracer_enable_activity(
|
||||
roctracer_pool_t* pool = NULL); // memory pool, NULL is a default one
|
||||
uint32_t op) // activity op ID
|
||||
{
|
||||
return roctracer_enable_op_activity_expl(domain, op, NULL);
|
||||
}
|
||||
roctracer_status_t roctracer_enable_domain_activity_expl(
|
||||
activity_domain_t domain, // tracing domain
|
||||
roctracer_pool_t* pool); // memory pool, NULL is a default one
|
||||
static inline roctracer_status_t roctracer_enable_domain_activity(
|
||||
activity_domain_t domain) // tracing domain
|
||||
{
|
||||
return roctracer_enable_domain_activity_expl(domain, NULL);
|
||||
}
|
||||
roctracer_status_t roctracer_enable_activity_expl(
|
||||
roctracer_pool_t* pool); // memory pool, NULL is a default one
|
||||
static inline roctracer_status_t roctracer_enable_activity()
|
||||
{
|
||||
return roctracer_enable_activity_expl(NULL);
|
||||
}
|
||||
|
||||
// Disable activity records logging
|
||||
roctracer_status_t roctracer_disable_op_activity(
|
||||
@@ -207,24 +239,26 @@ roctracer_status_t roctracer_disable_domain_activity(
|
||||
roctracer_status_t roctracer_disable_activity();
|
||||
|
||||
// Flush available activity records
|
||||
roctracer_status_t roctracer_flush_activity(
|
||||
roctracer_pool_t* pool = NULL); // memory pool, NULL is a default one
|
||||
roctracer_status_t roctracer_flush_activity_expl(
|
||||
roctracer_pool_t* pool); // memory pool, NULL is a default one
|
||||
static inline roctracer_status_t roctracer_flush_activity()
|
||||
{
|
||||
return roctracer_flush_activity_expl(NULL);
|
||||
}
|
||||
|
||||
// Get system timestamp
|
||||
roctracer_status_t roctracer_get_timestamp(
|
||||
uint64_t* timestamp); // [out] return timestamp
|
||||
|
||||
// Load/Unload methods
|
||||
bool roctracer_load();
|
||||
void roctracer_unload();
|
||||
|
||||
// Set properties
|
||||
roctracer_status_t roctracer_set_properties(
|
||||
roctracer_domain_t domain, // tracing domain
|
||||
void* propertes); // tracing properties
|
||||
|
||||
struct HsaApiTable;
|
||||
bool roctracer_load(
|
||||
HsaApiTable* table,
|
||||
uint64_t runtime_version,
|
||||
uint64_t failed_tool_count,
|
||||
const char* const* failed_tool_names);
|
||||
|
||||
void roctracer_unload(bool destruct);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C" block
|
||||
#endif // __cplusplus
|
||||
|
||||
@@ -46,7 +46,7 @@ extern "C" {
|
||||
#endif // __cplusplus
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Application annotatin API
|
||||
// Application annotation API
|
||||
|
||||
// Tracing start API
|
||||
void roctracer_start();
|
||||
@@ -63,8 +63,8 @@ roctracer_status_t roctracer_activity_push_external_correlation_id(activity_corr
|
||||
|
||||
// Notifies that the calling thread is leaving an external API region.
|
||||
// Pop an external correlation id for the calling thread.
|
||||
// 'lastId' returns the last external correlation
|
||||
roctracer_status_t roctracer_activity_pop_external_correlation_id(activity_correlation_id_t* last_id = NULL);
|
||||
// 'lastId' returns the last external correlation if not NULL
|
||||
roctracer_status_t roctracer_activity_pop_external_correlation_id(activity_correlation_id_t* last_id);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C" block
|
||||
|
||||
+10
-14
@@ -23,26 +23,22 @@ THE SOFTWARE.
|
||||
#ifndef INC_ROCTRACER_HCC_H_
|
||||
#define INC_ROCTRACER_HCC_H_
|
||||
|
||||
#if HIP_VDI
|
||||
#define HIP_OP_ID_NUMBER 3
|
||||
#define HIP_OP_ID_COPY 1
|
||||
enum {
|
||||
HIP_OP_ID_DISPATCH = 0,
|
||||
HIP_OP_ID_COPY = 1,
|
||||
HIP_OP_ID_BARRIER = 2,
|
||||
HIP_OP_ID_NUMBER = 3
|
||||
};
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
typedef void (hipInitAsyncActivityCallback_t)(void* id_callback, void* op_callback, void* arg);
|
||||
typedef bool (hipEnableAsyncActivityCallback_t)(unsigned op, bool enable);
|
||||
typedef const char* (hipGetOpName_t)(unsigned op);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#else // !HIP_VDI
|
||||
#if LOCAL_BUILD
|
||||
#include <hc_prof_runtime.h>
|
||||
#else
|
||||
#include <hcc/hc_prof_runtime.h>
|
||||
#endif
|
||||
#define HIP_OP_ID_NUMBER hc::HSA_OP_ID_NUMBER
|
||||
#define HIP_OP_ID_COPY hc::HSA_OP_ID_COPY
|
||||
typedef decltype(Kalmar::CLAMP::InitActivityCallback) hipInitAsyncActivityCallback_t;
|
||||
typedef decltype(Kalmar::CLAMP::EnableActivityCallback) hipEnableAsyncActivityCallback_t;
|
||||
typedef decltype(Kalmar::CLAMP::GetCmdName) hipGetOpName_t;
|
||||
#endif // !HIP_VDI
|
||||
|
||||
#include "roctracer.h"
|
||||
|
||||
|
||||
@@ -33,7 +33,7 @@ extern "C" {
|
||||
#endif // __cplusplus
|
||||
|
||||
// Traced calls ID enumeration
|
||||
typedef hip_api_id_t roctracer_hip_api_cid_t;
|
||||
typedef enum hip_api_id_t roctracer_hip_api_cid_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C" block
|
||||
|
||||
@@ -22,16 +22,25 @@ THE SOFTWARE.
|
||||
|
||||
#ifndef INC_ROCTRACER_HSA_H_
|
||||
#define INC_ROCTRACER_HSA_H_
|
||||
#include <iostream>
|
||||
#include <mutex>
|
||||
|
||||
#include <hsa.h>
|
||||
#include <hsa_api_trace.h>
|
||||
#include <hsa_ext_amd.h>
|
||||
|
||||
#include "cb_table.h"
|
||||
#include "roctracer.h"
|
||||
|
||||
// HSA OP ID enumeration
|
||||
enum hsa_op_id_t {
|
||||
HSA_OP_ID_DISPATCH = 0,
|
||||
HSA_OP_ID_COPY = 1,
|
||||
HSA_OP_ID_BARRIER = 2,
|
||||
HSA_OP_ID_PCSAMPLE = 3,
|
||||
HSA_OP_ID_NUMBER = 4
|
||||
};
|
||||
|
||||
#ifdef __cplusplus
|
||||
#include <iostream>
|
||||
#include <hsa_api_trace.h>
|
||||
|
||||
namespace roctracer {
|
||||
namespace hsa_support {
|
||||
enum {
|
||||
@@ -129,6 +138,10 @@ struct output_streamer<hsa_queue_t**> {
|
||||
inline static std::ostream& put(std::ostream& out, hsa_queue_t** v) { out << "<queue " << *v << ">"; return out; }
|
||||
};
|
||||
};};
|
||||
#else // !__cplusplus
|
||||
typedef void* hsa_amd_queue_intercept_handler;
|
||||
typedef void* hsa_amd_runtime_queue_notifier;
|
||||
#endif //! __cplusplus
|
||||
|
||||
#include "inc/hsa_prof_str.h"
|
||||
#endif // INC_ROCTRACER_HSA_H_
|
||||
|
||||
@@ -23,11 +23,11 @@ THE SOFTWARE.
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
#ifndef INC_ROCTRACER_KFD_H_
|
||||
#define INC_ROCTRACER_KFD_H_
|
||||
#include <iostream>
|
||||
|
||||
#include "roctracer.h"
|
||||
#include "hsakmt.h"
|
||||
#ifdef __cplusplus
|
||||
#include "inc/kfd_ostream_ops.h"
|
||||
#endif
|
||||
#include "inc/kfd_prof_str.h"
|
||||
|
||||
#endif // INC_ROCTRACER_KFD_H_
|
||||
|
||||
@@ -33,8 +33,6 @@ THE SOFTWARE.
|
||||
#ifndef INC_ROCTRACER_ROCTX_H_
|
||||
#define INC_ROCTRACER_ROCTX_H_
|
||||
|
||||
#include "cb_table.h"
|
||||
|
||||
// ROC-TX API ID enumeration
|
||||
enum roctx_api_id_t {
|
||||
ROCTX_API_ID_roctxMarkA = 0,
|
||||
@@ -45,7 +43,7 @@ enum roctx_api_id_t {
|
||||
};
|
||||
|
||||
// ROCTX callbacks data type
|
||||
struct roctx_api_data_t {
|
||||
typedef struct roctx_api_data_s {
|
||||
union {
|
||||
const char* message;
|
||||
struct {
|
||||
@@ -58,14 +56,15 @@ struct roctx_api_data_t {
|
||||
const char* message;
|
||||
} roctxRangePop;
|
||||
} args;
|
||||
};
|
||||
} roctx_api_data_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
#include "cb_table.h"
|
||||
namespace roctx {
|
||||
|
||||
// ROCTX callbacks table type
|
||||
typedef roctracer::CbTable<ROCTX_API_ID_NUMBER> cb_table_t;
|
||||
|
||||
} // namespace roctx
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
|
||||
Обычный файл → Исполняемый файл
+41
-32
@@ -76,7 +76,7 @@ HEADER = \
|
||||
'\n'
|
||||
|
||||
structs_done = {}
|
||||
def process_struct(f,c,cppHeader,nname):
|
||||
def process_struct(f,c,cppHeader,nname,apiname):
|
||||
|
||||
if c not in cppHeader.classes:
|
||||
return
|
||||
@@ -104,9 +104,9 @@ def process_struct(f,c,cppHeader,nname):
|
||||
|
||||
if mtype != "" and "union" not in mtype:
|
||||
if array_size == "":
|
||||
str = " roctracer::kfd_support::output_streamer<"+mtype+">::put(out,v."+name+");\n"
|
||||
str = " roctracer::" + apiname.lower() + "_support::output_streamer<"+mtype+">::put(out,v."+name+");\n"
|
||||
else:
|
||||
str = " roctracer::kfd_support::output_streamer<"+mtype+"["+array_size+"]>::put(out,v."+name+");\n"
|
||||
str = " roctracer::" + apiname.lower() + "_support::output_streamer<"+mtype+"["+array_size+"]>::put(out,v."+name+");\n"
|
||||
|
||||
if nname != "" and nname not in str:
|
||||
#print("injecting ",nname, "in ", str)
|
||||
@@ -115,70 +115,79 @@ def process_struct(f,c,cppHeader,nname):
|
||||
f.write(str)
|
||||
else:
|
||||
nc = prop+"::"
|
||||
process_struct(f,nc,cppHeader,name)
|
||||
process_struct(f,nc,cppHeader,name,apiname)
|
||||
nc = prop+"::"+mtype+" "
|
||||
process_struct(f,nc,cppHeader,name)
|
||||
process_struct(f,nc,cppHeader,name,apiname)
|
||||
nc = c+"::"
|
||||
process_struct(f,nc,cppHeader,name)
|
||||
process_struct(f,nc,cppHeader,name,apiname)
|
||||
|
||||
|
||||
def gen_cppheader(infilepath,outfilepath):
|
||||
def gen_cppheader(infilepath, outfilepath):
|
||||
try:
|
||||
cppHeader = CppHeaderParser.CppHeader(infilepath)
|
||||
except CppHeaderParser.CppParseError as e:
|
||||
print(e)
|
||||
sys.exit(1)
|
||||
|
||||
mpath = os.path.dirname(outfilepath)
|
||||
if mpath == "":
|
||||
mpath = os.getcwd()
|
||||
apiname = outfilepath.replace(mpath+"/","")
|
||||
apiname = apiname.replace("_ostream_ops.h","")
|
||||
apiname = apiname.upper()
|
||||
f = open(outfilepath,"w+")
|
||||
f2 = open(mpath + "/basic_ostream_ops.h","w+")
|
||||
f.write("// automatically generated\n")
|
||||
f.write(LICENSE)
|
||||
f.write("\n")
|
||||
f2.write("// automatically generated\n")
|
||||
f.write(LICENSE + '\n')
|
||||
f2.write(LICENSE + '\n')
|
||||
HEADER_S = \
|
||||
'#ifndef INC_KFD_OSTREAM_OPS_H_\n' + \
|
||||
'#define INC_KFD_OSTREAM_OPS_H_\n' + \
|
||||
'#ifndef INC_' + apiname + '_OSTREAM_OPS_H_\n' + \
|
||||
'#define INC_' + apiname + '_OSTREAM_OPS_H_\n' + \
|
||||
'#include <iostream>\n' + \
|
||||
'\n' + \
|
||||
'#include "roctracer.h"\n' + \
|
||||
'#include "hsakmt.h"\n'
|
||||
'#include "roctracer.h"\n'
|
||||
f.write(HEADER_S)
|
||||
f.write('\n')
|
||||
f.write('namespace roctracer {\n')
|
||||
f.write('namespace kfd_support {\n')
|
||||
f.write('// begin ostream ops for KFD \n')
|
||||
f.write(HEADER)
|
||||
f.write('namespace ' + apiname.lower() + '_support {\n')
|
||||
f.write('// begin ostream ops for '+ apiname + ' \n')
|
||||
f.write('#include "basic_ostream_ops.h"' + '\n')
|
||||
f2.write(HEADER)
|
||||
for c in cppHeader.classes:
|
||||
if "union" in c:
|
||||
continue
|
||||
f.write("\ntemplate<>\n")
|
||||
f.write("struct output_streamer<"+c+"&> {\n")
|
||||
f.write(" inline static std::ostream& put(std::ostream& out, "+c+"& v)\n")
|
||||
f.write("{\n")
|
||||
process_struct(f,c,cppHeader,"")
|
||||
f.write(" return out;\n")
|
||||
f.write("}\n")
|
||||
f.write("};\n")
|
||||
if len(cppHeader.classes[c]["properties"]["public"])!=0:
|
||||
f.write("\ntemplate<>\n")
|
||||
f.write("struct output_streamer<"+c+"&> {\n")
|
||||
f.write(" inline static std::ostream& put(std::ostream& out, "+c+"& v)\n")
|
||||
f.write("{\n")
|
||||
process_struct(f,c,cppHeader,"",apiname)
|
||||
f.write(" return out;\n")
|
||||
f.write("}\n")
|
||||
f.write("};\n")
|
||||
|
||||
FOOTER = \
|
||||
'// end ostream ops for KFD \n'
|
||||
'// end ostream ops for '+ apiname + ' \n'
|
||||
FOOTER += '};};\n' + \
|
||||
'\n' + \
|
||||
'#endif // INC_KFD_OSTREAM_OPS_H_\n' + \
|
||||
'#endif // INC_' + apiname + '_OSTREAM_OPS_H_\n' + \
|
||||
' \n'
|
||||
FOOTER2 = '\n\n' + \
|
||||
'#endif // INC_BASIC_OSTREAM_OPS_H_\n' + \
|
||||
' \n'
|
||||
f.write(FOOTER)
|
||||
|
||||
f.close()
|
||||
f2.close()
|
||||
print('File ' + outfilepath + ' generated')
|
||||
print('File ' + mpath + '/basic_ostream_ops.h generated')
|
||||
|
||||
return
|
||||
|
||||
parser = argparse.ArgumentParser(description='genOstreamOps.py: generates ostream operators for all typedefs in provided input file.')
|
||||
requiredNamed = parser.add_argument_group('Required arguments')
|
||||
requiredNamed.add_argument('-in','--in', help='Header file to be parsed', required=True)
|
||||
requiredNamed.add_argument('-out','--out', help='Output file with ostream operators', required=True)
|
||||
requiredNamed.add_argument('-in', metavar='file', help='Header file to be parsed', required=True)
|
||||
requiredNamed.add_argument('-out', metavar='file', help='Output file with ostream operators', required=True)
|
||||
|
||||
args = vars(parser.parse_args())
|
||||
|
||||
if __name__ == '__main__':
|
||||
gen_cppheader(args['in'],args['out'])
|
||||
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
#!/usr/bin/python
|
||||
from __future__ import print_function
|
||||
import os, sys, re
|
||||
|
||||
OUT='inc/hsa_prof_str.h'
|
||||
@@ -36,7 +37,7 @@ LICENSE = \
|
||||
#############################################################
|
||||
# Error handler
|
||||
def fatal(module, msg):
|
||||
print >>sys.stderr, module + ' Error: "' + msg + '"'
|
||||
print (module + ' Error: "' + msg + '"', file = sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
# Get next text block
|
||||
@@ -342,8 +343,8 @@ class API_DescrParser:
|
||||
self.content += ' ' + self.api_id[call] + ' = ' + str(n) + ',\n'
|
||||
else:
|
||||
self.content += '\n'
|
||||
self.content += ' HSA_API_ID_NUMBER = ' + str(n) + ',\n'
|
||||
self.content += ' HSA_API_ID_ANY = ' + str(n + 1) + ',\n'
|
||||
self.content += ' HSA_API_ID_DISPATCH = ' + str(n) + ',\n'
|
||||
self.content += ' HSA_API_ID_NUMBER = ' + str(n + 1) + ',\n'
|
||||
self.content += '};\n'
|
||||
|
||||
# generate API args structure
|
||||
@@ -440,6 +441,7 @@ class API_DescrParser:
|
||||
# generate stream operator
|
||||
def gen_out_stream(self, n, name, call, struct):
|
||||
if n == -1:
|
||||
self.content += '#ifdef __cplusplus\n'
|
||||
self.content += 'typedef std::pair<uint32_t, hsa_api_data_t> hsa_api_data_pair_t;\n'
|
||||
self.content += 'inline std::ostream& operator<< (std::ostream& out, const hsa_api_data_pair_t& data_pair) {\n'
|
||||
self.content += ' const uint32_t cid = data_pair.first;\n'
|
||||
@@ -483,12 +485,13 @@ class API_DescrParser:
|
||||
self.content += ' }\n'
|
||||
self.content += ' return out;\n'
|
||||
self.content += '}\n'
|
||||
self.content += '#endif\n'
|
||||
|
||||
#############################################################
|
||||
# main
|
||||
# Usage
|
||||
if len(sys.argv) != 3:
|
||||
print >>sys.stderr, "Usage:", sys.argv[0], " <rocTracer root> <HSA runtime include path>"
|
||||
print ("Usage:", sys.argv[0], " <rocTracer root> <HSA runtime include path>", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
else:
|
||||
ROOT = sys.argv[1] + '/'
|
||||
@@ -497,7 +500,7 @@ else:
|
||||
descr = API_DescrParser(OUT, HSA_DIR, API_TABLES_H, API_HEADERS_H, LICENSE)
|
||||
|
||||
out_file = ROOT + OUT
|
||||
print 'Generating "' + out_file + '"'
|
||||
print ('Generating "' + out_file + '"')
|
||||
f = open(out_file, 'w')
|
||||
f.write(descr.content[:-1])
|
||||
f.close()
|
||||
|
||||
+12
-10
@@ -1,4 +1,5 @@
|
||||
#!/usr/bin/python
|
||||
from __future__ import print_function
|
||||
import os, sys, re
|
||||
|
||||
OUT_H = 'inc/kfd_prof_str.h'
|
||||
@@ -33,7 +34,7 @@ LICENSE = \
|
||||
#############################################################
|
||||
# Error handler
|
||||
def fatal(module, msg):
|
||||
print >>sys.stderr, module + ' Error: "' + msg + '"'
|
||||
print (module + ' Error: "' + msg + '"', file = sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
# Get next text block
|
||||
@@ -284,7 +285,6 @@ class API_DescrParser:
|
||||
self.content_h += '#include <string.h>\n'
|
||||
self.content_h += '#include \"roctracer_kfd.h\"\n'
|
||||
self.content_h += '#include \"hsakmt.h\"\n'
|
||||
self.content_h += '#include \"cb_table.h\"\n'
|
||||
|
||||
self.content_h += '#define PUBLIC_API __attribute__((visibility(\"default\")))\n'
|
||||
|
||||
@@ -293,6 +293,7 @@ class API_DescrParser:
|
||||
|
||||
self.content_h += '\n'
|
||||
self.content_h += '#if PROF_API_IMPL\n'
|
||||
self.content_h += '#include \"cb_table.h\"\n'
|
||||
self.content_h += 'namespace roctracer {\n'
|
||||
self.content_h += 'namespace kfd_support {\n'
|
||||
|
||||
@@ -372,7 +373,7 @@ class API_DescrParser:
|
||||
# generate API args structure
|
||||
def gen_arg_struct(self, n, name, call, struct):
|
||||
if n == -1:
|
||||
self.content_h += 'struct kfd_api_data_t {\n'
|
||||
self.content_h += 'typedef struct kfd_api_data_s {\n'
|
||||
self.content_h += ' uint64_t correlation_id;\n'
|
||||
self.content_h += ' uint32_t phase;\n'
|
||||
if len(self.api_rettypes) != 0:
|
||||
@@ -394,7 +395,7 @@ class API_DescrParser:
|
||||
self.content_h += ' } ' + call + ';\n'
|
||||
else:
|
||||
self.content_h += ' } args;\n'
|
||||
self.content_h += '};\n'
|
||||
self.content_h += '} kfd_api_data_t;\n'
|
||||
|
||||
# generate API callbacks
|
||||
def gen_callbacks(self, n, name, call, struct):
|
||||
@@ -406,8 +407,7 @@ class API_DescrParser:
|
||||
call_id = self.api_id[call];
|
||||
ret_type = struct['ret']
|
||||
self.content_h += ret_type + ' ' + call + '_callback(' + struct['args'] + ') {\n' # 'static ' +
|
||||
if call == 'hsaKmtOpenKFD':
|
||||
self.content_h += ' if (' + name + '_table == NULL) intercept_KFDApiTable();\n'
|
||||
self.content_h += ' if (' + name + '_table == NULL) intercept_KFDApiTable();\n'
|
||||
self.content_h += ' kfd_api_data_t api_data{};\n'
|
||||
for var in struct['alst']:
|
||||
self.content_h += ' api_data.args.' + call + '.' + var.replace("[]","") + ' = ' + var.replace("[]","") + ';\n'
|
||||
@@ -477,6 +477,7 @@ class API_DescrParser:
|
||||
# generate stream operator
|
||||
def gen_out_stream(self, n, name, call, struct):
|
||||
if n == -1:
|
||||
self.content_h += '#ifdef __cplusplus\n'
|
||||
self.content_h += 'typedef std::pair<uint32_t, kfd_api_data_t> kfd_api_data_pair_t;\n'
|
||||
self.content_h += 'inline std::ostream& operator<< (std::ostream& out, const kfd_api_data_pair_t& data_pair) {\n'
|
||||
self.content_h += ' const uint32_t cid = data_pair.first;\n'
|
||||
@@ -510,6 +511,7 @@ class API_DescrParser:
|
||||
self.content_h += ' }\n'
|
||||
self.content_h += ' return out;\n'
|
||||
self.content_h += '}\n'
|
||||
self.content_h += '#endif\n'
|
||||
self.content_cpp += 'inline std::ostream& operator<< (std::ostream& out, const HsaMemFlags& v) { out << "HsaMemFlags"; return out; }\n'
|
||||
|
||||
# generate PUBLIC_API for all API fcts
|
||||
@@ -525,7 +527,7 @@ class API_DescrParser:
|
||||
self.content_cpp += ' return true;\n';
|
||||
self.content_cpp += '}\n\n';
|
||||
|
||||
if call != '-':
|
||||
if call != '-' and call != 'hsaKmtCloseKFD' and call != 'hsaKmtOpenKFD':
|
||||
self.content_cpp += 'PUBLIC_API ' + struct['ret'] + " " + call + '(' + struct['args'] + ') { return roctracer::kfd_support::' + call + '_callback('
|
||||
for i in range(0,len(struct['alst'])):
|
||||
if i == (len(struct['alst'])-1):
|
||||
@@ -538,7 +540,7 @@ class API_DescrParser:
|
||||
# main
|
||||
# Usage
|
||||
if len(sys.argv) != 3:
|
||||
print >>sys.stderr, "Usage:", sys.argv[0], " <rocTracer root> <KFD include path>"
|
||||
print ("Usage:", sys.argv[0], " <rocTracer root> <KFD include path>", file = sys.stderr)
|
||||
sys.exit(1)
|
||||
else:
|
||||
ROOT = sys.argv[1] + '/'
|
||||
@@ -547,13 +549,13 @@ else:
|
||||
descr = API_DescrParser(OUT_H, KFD_DIR, API_HEADERS_H, LICENSE)
|
||||
|
||||
out_file = ROOT + OUT_H
|
||||
print 'Generating "' + out_file + '"'
|
||||
print ('Generating "' + out_file + '"')
|
||||
f = open(out_file, 'w')
|
||||
f.write(descr.content_h[:-1])
|
||||
f.close()
|
||||
|
||||
out_file = ROOT + OUT_CPP
|
||||
print 'Generating "' + out_file + '"'
|
||||
print ('Generating "' + out_file + '"')
|
||||
f = open(out_file, 'w')
|
||||
f.write(descr.content_cpp[:-1])
|
||||
f.close()
|
||||
|
||||
+10
-11
@@ -17,17 +17,16 @@ target_link_libraries( ${TARGET_LIB} PRIVATE ${HSA_RUNTIME_LIB} c stdc++ )
|
||||
execute_process ( COMMAND sh -xc "${ROOT_DIR}/script/hsaap.py ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH}" )
|
||||
|
||||
# Generating KFD/Thunk tracing primitives
|
||||
if ( DEFINED KFD_WRAPPER )
|
||||
set ( KFD_LIB "kfdwrapper64" )
|
||||
set ( KFD_LIB_SRC
|
||||
${LIB_DIR}/kfd/kfd_wrapper.cpp
|
||||
)
|
||||
execute_process ( COMMAND sh -xc "${ROOT_DIR}/script/gen_ostream_ops.py -in ${HSA_KMT_INC_PATH}/hsakmttypes.h -out ${ROOT_DIR}/inc/kfd_ostream_ops.h" )
|
||||
add_library ( ${KFD_LIB} SHARED ${KFD_LIB_SRC} )
|
||||
target_include_directories ( ${KFD_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} ${ROOT_DIR}/inc ${HSA_RUNTIME_INC_PATH} ${HSA_RUNTIME_HSA_INC_PATH} ${HSA_KMT_INC_PATH} )
|
||||
target_link_libraries( ${KFD_LIB} PRIVATE c stdc++ )
|
||||
execute_process ( COMMAND sh -xc "${ROOT_DIR}/script/kfdap.py ${ROOT_DIR} ${HSA_KMT_INC_PATH}" )
|
||||
endif()
|
||||
set ( KFD_LIB "kfdwrapper64" )
|
||||
set ( KFD_LIB_SRC
|
||||
${LIB_DIR}/kfd/kfd_wrapper.cpp
|
||||
)
|
||||
execute_process ( COMMAND sh -xc "${CMAKE_CXX_COMPILER} -E ${HSA_KMT_INC_PATH}/hsakmttypes.h > ${PROJECT_BINARY_DIR}/hsakmttypes_pp.h" )
|
||||
execute_process ( COMMAND sh -xc "${ROOT_DIR}/script/gen_ostream_ops.py -in ${PROJECT_BINARY_DIR}/hsakmttypes_pp.h -out ${ROOT_DIR}/inc/kfd_ostream_ops.h" )
|
||||
add_library ( ${KFD_LIB} SHARED ${KFD_LIB_SRC} )
|
||||
target_include_directories ( ${KFD_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} ${ROOT_DIR}/inc ${HSA_RUNTIME_INC_PATH} ${HSA_RUNTIME_HSA_INC_PATH} ${HSA_KMT_INC_PATH} )
|
||||
target_link_libraries( ${KFD_LIB} PRIVATE c stdc++ )
|
||||
execute_process ( COMMAND sh -xc "${ROOT_DIR}/script/kfdap.py ${ROOT_DIR} ${HSA_KMT_INC_PATH}" )
|
||||
|
||||
set ( ROCTX_LIB "roctx64" )
|
||||
set ( ROCTX_LIB_SRC
|
||||
|
||||
@@ -47,7 +47,7 @@ class Journal {
|
||||
}
|
||||
|
||||
~Journal() {
|
||||
for (auto& val : map_) delete val.second;
|
||||
for (auto& val : *map_) delete val.second;
|
||||
delete map_;
|
||||
}
|
||||
|
||||
|
||||
+47
-10
@@ -21,11 +21,10 @@ class BaseLoader : public T {
|
||||
if (handle_ == NULL) return NULL;
|
||||
|
||||
fun_t *f = (fun_t*) dlsym(handle_, fun_name);
|
||||
if (f == NULL) {
|
||||
if ((to_check_symb_ == true) && (f == NULL)) {
|
||||
fprintf(stderr, "roctracer: symbol lookup '%s' failed: \"%s\"\n", fun_name, dlerror());
|
||||
abort();
|
||||
}
|
||||
dlerror();
|
||||
return f;
|
||||
}
|
||||
|
||||
@@ -46,13 +45,12 @@ class BaseLoader : public T {
|
||||
|
||||
private:
|
||||
BaseLoader() {
|
||||
const int flags = RTLD_LAZY;
|
||||
const int flags = (to_load_ == true) ? RTLD_LAZY : RTLD_LAZY|RTLD_NOLOAD;
|
||||
handle_ = dlopen(lib_name_, flags);
|
||||
if (handle_ == NULL) {
|
||||
if ((to_check_open_ == true) && (handle_ == NULL)) {
|
||||
fprintf(stderr, "roctracer: Loading '%s' failed, %s\n", lib_name_, dlerror());
|
||||
abort();
|
||||
}
|
||||
dlerror();
|
||||
|
||||
T::init(this);
|
||||
}
|
||||
@@ -61,12 +59,43 @@ class BaseLoader : public T {
|
||||
if (handle_ != NULL) dlclose(handle_);
|
||||
}
|
||||
|
||||
static bool to_load_;
|
||||
static bool to_check_open_;
|
||||
static bool to_check_symb_;
|
||||
|
||||
static mutex_t mutex_;
|
||||
static const char* lib_name_;
|
||||
static std::atomic<loader_t*> instance_;
|
||||
void* handle_;
|
||||
};
|
||||
|
||||
// 'rocprofiler' library loader class
|
||||
class RocpApi {
|
||||
public:
|
||||
typedef BaseLoader<RocpApi> Loader;
|
||||
|
||||
typedef bool (RegisterCallback_t)(uint32_t op, void* callback, void* arg);
|
||||
typedef bool (OperateCallback_t)(uint32_t op);
|
||||
typedef bool (InitCallback_t)(void* callback, void* arg);
|
||||
typedef bool (EnableCallback_t)(uint32_t op, bool enable);
|
||||
typedef const char* (NameCallback_t)(uint32_t op);
|
||||
|
||||
RegisterCallback_t* RegisterApiCallback;
|
||||
OperateCallback_t* RemoveApiCallback;
|
||||
InitCallback_t* InitActivityCallback;
|
||||
EnableCallback_t* EnableActivityCallback;
|
||||
NameCallback_t* GetOpName;
|
||||
|
||||
protected:
|
||||
void init(Loader* loader) {
|
||||
RegisterApiCallback = loader->GetFun<RegisterCallback_t>("RegisterApiCallback");
|
||||
RemoveApiCallback = loader->GetFun<OperateCallback_t>("RemoveApiCallback");
|
||||
InitActivityCallback = loader->GetFun<InitCallback_t>("InitActivityCallback");
|
||||
EnableActivityCallback = loader->GetFun<EnableCallback_t>("EnableActivityCallback");
|
||||
GetOpName = loader->GetFun<NameCallback_t>("GetOpName");
|
||||
}
|
||||
};
|
||||
|
||||
// HIP runtime library loader class
|
||||
class HipApi {
|
||||
public:
|
||||
@@ -110,9 +139,9 @@ class HccApi {
|
||||
protected:
|
||||
void init(Loader* loader) {
|
||||
#if HIP_VDI
|
||||
InitActivityCallback = loader->GetFun<hipInitAsyncActivityCallback_t>("InitActivityCallback");
|
||||
EnableActivityCallback = loader->GetFun<hipEnableAsyncActivityCallback_t>("EnableActivityCallback");
|
||||
GetOpName = loader->GetFun<hipGetOpName_t>("GetCmdName");
|
||||
InitActivityCallback = loader->GetFun<hipInitAsyncActivityCallback_t>("hipInitActivityCallback");
|
||||
EnableActivityCallback = loader->GetFun<hipEnableAsyncActivityCallback_t>("hipEnableActivityCallback");
|
||||
GetOpName = loader->GetFun<hipGetOpName_t>("hipGetCmdName");
|
||||
#else
|
||||
InitActivityCallback = loader->GetFun<hipInitAsyncActivityCallback_t>("InitActivityCallbackImpl");
|
||||
EnableActivityCallback = loader->GetFun<hipEnableAsyncActivityCallback_t>("EnableActivityCallbackImpl");
|
||||
@@ -161,6 +190,7 @@ class RocTxApi {
|
||||
}
|
||||
};
|
||||
|
||||
typedef BaseLoader<RocpApi> RocpLoader;
|
||||
typedef BaseLoader<HipApi> HipLoader;
|
||||
typedef BaseLoader<HccApi> HccLoader;
|
||||
typedef BaseLoader<KfdApi> KfdLoader;
|
||||
@@ -171,9 +201,16 @@ typedef BaseLoader<RocTxApi> RocTxLoader;
|
||||
#define LOADER_INSTANTIATE() \
|
||||
template<class T> typename roctracer::BaseLoader<T>::mutex_t roctracer::BaseLoader<T>::mutex_; \
|
||||
template<class T> std::atomic<roctracer::BaseLoader<T>*> roctracer::BaseLoader<T>::instance_{}; \
|
||||
template<class T> bool roctracer::BaseLoader<T>::to_load_ = false; \
|
||||
template<class T> bool roctracer::BaseLoader<T>::to_check_open_ = true; \
|
||||
template<class T> bool roctracer::BaseLoader<T>::to_check_symb_ = true; \
|
||||
template<> const char* roctracer::RocpLoader::lib_name_ = "librocprofiler64.so"; \
|
||||
template<> const char* roctracer::HipLoader::lib_name_ = "libhip_hcc.so"; \
|
||||
template<> const char* roctracer::HccLoader::lib_name_ = "libmcwamp_hsa.so"; \
|
||||
template<> bool roctracer::HipLoader::to_check_open_ = false; \
|
||||
template<> const char* roctracer::HccLoader::lib_name_ = "libmcwamp.so"; \
|
||||
template<> bool roctracer::HccLoader::to_check_open_ = false; \
|
||||
template<> const char* roctracer::KfdLoader::lib_name_ = "libkfdwrapper64.so"; \
|
||||
template<> const char* roctracer::RocTxLoader::lib_name_ = "libroctx64.so";
|
||||
template<> const char* roctracer::RocTxLoader::lib_name_ = "libroctx64.so"; \
|
||||
template<> bool roctracer::RocTxLoader::to_load_ = true;
|
||||
|
||||
#endif // SRC_CORE_LOADER_H_
|
||||
|
||||
+170
-104
@@ -27,9 +27,7 @@ THE SOFTWARE.
|
||||
#include "inc/roctracer_roctx.h"
|
||||
#define PROF_API_IMPL 1
|
||||
#include "inc/roctracer_hsa.h"
|
||||
#ifdef KFD_WRAPPER
|
||||
#include "inc/roctracer_kfd.h"
|
||||
#endif
|
||||
|
||||
#include <dirent.h>
|
||||
#include <pthread.h>
|
||||
@@ -87,9 +85,13 @@ THE SOFTWARE.
|
||||
(void)err; \
|
||||
return X;
|
||||
|
||||
#ifndef onload_debug
|
||||
#define onload_debug false
|
||||
#endif
|
||||
#define ONLOAD_TRACE(str) \
|
||||
if (getenv("ROCP_ONLOAD_TRACE")) do { \
|
||||
std::cout << "PID(" << GetPid() << "): TRACER_LIB::" << __FUNCTION__ << " " << str << std::endl << std::flush; \
|
||||
} while(0);
|
||||
#define ONLOAD_TRACE_BEG() ONLOAD_TRACE("begin")
|
||||
#define ONLOAD_TRACE_END() ONLOAD_TRACE("end")
|
||||
|
||||
|
||||
static inline uint32_t GetPid() { return syscall(__NR_getpid); }
|
||||
|
||||
@@ -174,7 +176,7 @@ decltype(hsa_amd_memory_async_copy_rect)* hsa_amd_memory_async_copy_rect_fn;
|
||||
|
||||
typedef decltype(roctracer_enable_op_callback)* roctracer_enable_op_callback_t;
|
||||
typedef decltype(roctracer_disable_op_callback)* roctracer_disable_op_callback_t;
|
||||
typedef decltype(roctracer_enable_op_activity)* roctracer_enable_op_activity_t;
|
||||
typedef decltype(roctracer_enable_op_activity_expl)* roctracer_enable_op_activity_t;
|
||||
typedef decltype(roctracer_disable_op_activity)* roctracer_disable_op_activity_t;
|
||||
|
||||
struct cb_journal_data_t {
|
||||
@@ -251,18 +253,16 @@ class GlobalCounter {
|
||||
public:
|
||||
typedef std::mutex mutex_t;
|
||||
typedef uint64_t counter_t;
|
||||
typedef std::atomic<counter_t> atomic_counter_t;
|
||||
|
||||
static counter_t Increment() {
|
||||
std::lock_guard<mutex_t> lock(mutex_);
|
||||
return ++counter_;
|
||||
}
|
||||
static counter_t Increment() { return counter_.fetch_add(1, std::memory_order_relaxed); }
|
||||
|
||||
private:
|
||||
static mutex_t mutex_;
|
||||
static counter_t counter_;
|
||||
static atomic_counter_t counter_;
|
||||
};
|
||||
GlobalCounter::mutex_t GlobalCounter::mutex_;
|
||||
GlobalCounter::counter_t GlobalCounter::counter_ = 0;
|
||||
GlobalCounter::atomic_counter_t GlobalCounter::counter_{1};
|
||||
|
||||
// Records storage
|
||||
struct roctracer_api_data_t {
|
||||
@@ -284,6 +284,7 @@ typedef std::map<activity_correlation_id_t, activity_correlation_id_t> correlati
|
||||
typedef std::mutex correlation_id_mutex_t;
|
||||
correlation_id_map_t* correlation_id_map = NULL;
|
||||
correlation_id_mutex_t correlation_id_mutex;
|
||||
bool correlation_id_wait = true;
|
||||
|
||||
static thread_local std::stack<activity_correlation_id_t> external_id_stack;
|
||||
|
||||
@@ -296,6 +297,7 @@ static inline void CorrelationIdRegistr(const activity_correlation_id_t& correla
|
||||
|
||||
static inline activity_correlation_id_t CorrelationIdLookup(const activity_correlation_id_t& correlation_id) {
|
||||
auto it = correlation_id_map->find(correlation_id);
|
||||
if (correlation_id_wait) while (it == correlation_id_map->end()) it = correlation_id_map->find(correlation_id);
|
||||
if (it == correlation_id_map->end()) EXC_ABORT(ROCTRACER_STATUS_ERROR, "HCC activity id lookup failed(" << correlation_id << ")");
|
||||
return it->second;
|
||||
}
|
||||
@@ -330,6 +332,7 @@ void* HIP_SyncActivityCallback(
|
||||
data = &(top.data.hip);
|
||||
data_ptr = const_cast<hip_api_data_t*>(data);
|
||||
data_ptr->phase = phase;
|
||||
data_ptr->correlation_id = 0;
|
||||
}
|
||||
|
||||
// Filing record info
|
||||
@@ -391,8 +394,6 @@ void HCC_ActivityIdCallback(activity_correlation_id_t correlation_id) {
|
||||
}
|
||||
|
||||
void HCC_AsyncActivityCallback(uint32_t op_id, void* record, void* arg) {
|
||||
static hsa_rt_utils::Timer timer;
|
||||
|
||||
MemoryPool* pool = reinterpret_cast<MemoryPool*>(arg);
|
||||
roctracer_record_t* record_ptr = reinterpret_cast<roctracer_record_t*>(record);
|
||||
record_ptr->domain = ACTIVITY_DOMAIN_HCC_OPS;
|
||||
@@ -523,14 +524,36 @@ hsa_status_t hsa_amd_memory_async_copy_rect_interceptor(
|
||||
return status;
|
||||
}
|
||||
|
||||
void HSA_AsyncActivityCallback(uint32_t op_id, void* record, void* arg) {
|
||||
MemoryPool* pool = reinterpret_cast<MemoryPool*>(arg);
|
||||
roctracer_record_t* record_ptr = reinterpret_cast<roctracer_record_t*>(record);
|
||||
record_ptr->domain = ACTIVITY_DOMAIN_HSA_OPS;
|
||||
pool->Write(*record_ptr);
|
||||
}
|
||||
|
||||
// Logger routines and primitives
|
||||
util::Logger::mutex_t util::Logger::mutex_;
|
||||
std::atomic<util::Logger*> util::Logger::instance_{};
|
||||
|
||||
// Memory pool routines and primitives
|
||||
MemoryPool* memory_pool = NULL;
|
||||
typedef std::recursive_mutex memory_pool_mutex_t;
|
||||
memory_pool_mutex_t memory_pool_mutex;
|
||||
|
||||
// Stop sttaus routines and primitives
|
||||
unsigned stop_status_value = 0;
|
||||
typedef std::mutex stop_status_mutex_t;
|
||||
stop_status_mutex_t stop_status_mutex;
|
||||
unsigned set_stopped(unsigned val) {
|
||||
std::lock_guard<stop_status_mutex_t> lock(stop_status_mutex);
|
||||
const unsigned ret = (stop_status_value ^ val);
|
||||
stop_status_value = val;
|
||||
return ret;
|
||||
}
|
||||
} // namespace roctracer
|
||||
|
||||
LOADER_INSTANTIATE();
|
||||
TRACE_BUFFER_INSTANTIATE();
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Public library methods
|
||||
@@ -555,24 +578,16 @@ PUBLIC_API const char* roctracer_op_string(
|
||||
{
|
||||
API_METHOD_PREFIX
|
||||
switch (domain) {
|
||||
case ACTIVITY_DOMAIN_HSA_API: {
|
||||
case ACTIVITY_DOMAIN_HSA_API:
|
||||
return roctracer::hsa_support::GetApiName(op);
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HCC_OPS: {
|
||||
case ACTIVITY_DOMAIN_HSA_OPS:
|
||||
return roctracer::RocpLoader::Instance().GetOpName(op);
|
||||
case ACTIVITY_DOMAIN_HCC_OPS:
|
||||
return roctracer::HccLoader::Instance().GetOpName(kind);
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HIP_API: {
|
||||
case ACTIVITY_DOMAIN_HIP_API:
|
||||
return roctracer::HipLoader::Instance().ApiName(op);
|
||||
break;
|
||||
}
|
||||
#if KFD_WRAPPER
|
||||
case ACTIVITY_DOMAIN_KFD_API: {
|
||||
case ACTIVITY_DOMAIN_KFD_API:
|
||||
return roctracer::kfd_support::GetApiName(op);
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
default:
|
||||
EXC_RAISING(ROCTRACER_STATUS_BAD_DOMAIN, "invalid domain ID(" << domain << ")");
|
||||
}
|
||||
@@ -593,13 +608,11 @@ PUBLIC_API roctracer_status_t roctracer_op_code(
|
||||
if (kind != NULL) *kind = 0;
|
||||
break;
|
||||
}
|
||||
#ifdef KFD_WRAPPER
|
||||
case ACTIVITY_DOMAIN_KFD_API: {
|
||||
*op = roctracer::kfd_support::GetApiCode(str);
|
||||
if (kind != NULL) *kind = 0;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
default:
|
||||
EXC_RAISING(ROCTRACER_STATUS_BAD_DOMAIN, "limited domain ID(" << domain << ")");
|
||||
}
|
||||
@@ -608,13 +621,11 @@ PUBLIC_API roctracer_status_t roctracer_op_code(
|
||||
|
||||
static inline uint32_t get_op_num(const uint32_t& domain) {
|
||||
switch (domain) {
|
||||
case ACTIVITY_DOMAIN_HSA_OPS: return 1;
|
||||
case ACTIVITY_DOMAIN_HSA_OPS: return HSA_OP_ID_NUMBER;
|
||||
case ACTIVITY_DOMAIN_HSA_API: return HSA_API_ID_NUMBER;
|
||||
case ACTIVITY_DOMAIN_HCC_OPS: return HIP_OP_ID_NUMBER;
|
||||
case ACTIVITY_DOMAIN_HIP_API: return HIP_API_ID_NUMBER;
|
||||
#ifdef KFD_WRAPPER
|
||||
case ACTIVITY_DOMAIN_KFD_API: return KFD_API_ID_NUMBER;
|
||||
#endif
|
||||
case ACTIVITY_DOMAIN_EXT_API: return 0;
|
||||
case ACTIVITY_DOMAIN_ROCTX: return ROCTX_API_ID_NUMBER;
|
||||
default:
|
||||
@@ -631,28 +642,35 @@ static roctracer_status_t roctracer_enable_callback_fun(
|
||||
void* user_data)
|
||||
{
|
||||
switch (domain) {
|
||||
#ifdef KFD_WRAPPER
|
||||
case ACTIVITY_DOMAIN_KFD_API: {
|
||||
const bool succ = roctracer::KfdLoader::Instance().RegisterApiCallback(op, (void*)callback, user_data);
|
||||
if (succ == false) EXC_RAISING(ROCTRACER_STATUS_ERROR, "KFD RegisterApiCallback error");
|
||||
if (succ == false) EXC_RAISING(ROCTRACER_STATUS_ERROR, "KFD RegisterApiCallback error(" << op << ") failed");
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
case ACTIVITY_DOMAIN_HSA_OPS: break;
|
||||
case ACTIVITY_DOMAIN_HSA_API: {
|
||||
#if 0
|
||||
if (op == HSA_API_ID_DISPATCH) {
|
||||
const bool succ = roctracer::RocpLoader::Instance().RegisterApiCallback(op, (void*)callback, user_data);
|
||||
if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HSA_ERR, "HSA::EnableActivityCallback error(" << op << ") failed");
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
roctracer::hsa_support::cb_table.set(op, callback, user_data);
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HCC_OPS: break;
|
||||
case ACTIVITY_DOMAIN_HIP_API: {
|
||||
if (roctracer::HipLoader::Instance().Enabled() == false) break;
|
||||
|
||||
hipError_t hip_err = roctracer::HipLoader::Instance().RegisterApiCallback(op, (void*)callback, user_data);
|
||||
if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "hipRegisterApiCallback(" << op << ") error(" << hip_err << ")");
|
||||
if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "HIP::RegisterApiCallback(" << op << ") error(" << hip_err << ")");
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_ROCTX: {
|
||||
if (roctracer::RocTxLoader::Instance().Enabled()) {
|
||||
const bool suc = roctracer::RocTxLoader::Instance().RegisterApiCallback(op, (void*)callback, user_data);
|
||||
if (suc == false) EXC_RAISING(ROCTRACER_STATUS_ROCTX_ERR, "roctxRegisterApiCallback(" << op << ") failed");
|
||||
if (suc == false) EXC_RAISING(ROCTRACER_STATUS_ROCTX_ERR, "ROCTX::RegisterApiCallback(" << op << ") failed");
|
||||
}
|
||||
break;
|
||||
}
|
||||
@@ -668,8 +686,8 @@ static void roctracer_enable_callback_impl(
|
||||
roctracer_rtapi_callback_t callback,
|
||||
void* user_data)
|
||||
{
|
||||
roctracer::cb_journal->registr({domain, op, {callback, user_data}});
|
||||
roctracer_enable_callback_fun((roctracer_domain_t)domain, op, callback, user_data);
|
||||
roctracer::cb_journal->registr({domain, op, {callback, user_data}});
|
||||
roctracer_enable_callback_fun((roctracer_domain_t)domain, op, callback, user_data);
|
||||
}
|
||||
|
||||
PUBLIC_API roctracer_status_t roctracer_enable_op_callback(
|
||||
@@ -712,25 +730,35 @@ static roctracer_status_t roctracer_disable_callback_fun(
|
||||
uint32_t op)
|
||||
{
|
||||
switch (domain) {
|
||||
#ifdef KFD_WRAPPER
|
||||
case ACTIVITY_DOMAIN_KFD_API: {
|
||||
const bool succ = roctracer::KfdLoader::Instance().RemoveApiCallback(op);
|
||||
if (succ == false) EXC_RAISING(ROCTRACER_STATUS_ERROR, "KFD RemoveApiCallback error");
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
case ACTIVITY_DOMAIN_HSA_OPS: break;
|
||||
case ACTIVITY_DOMAIN_HSA_API: break;
|
||||
case ACTIVITY_DOMAIN_HSA_API: {
|
||||
#if 0
|
||||
if (op == HSA_API_ID_DISPATCH) {
|
||||
const bool succ = roctracer::RocpLoader::Instance().RemoveApiCallback(op);
|
||||
if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HSA_ERR, "HSA::RemoveActivityCallback error(" << op << ") failed");
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
roctracer::hsa_support::cb_table.set(op, NULL, NULL);
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HCC_OPS: break;
|
||||
case ACTIVITY_DOMAIN_HIP_API: {
|
||||
if (roctracer::HipLoader::Instance().Enabled() == false) break;
|
||||
|
||||
hipError_t hip_err = roctracer::HipLoader::Instance().RemoveApiCallback(op);
|
||||
if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "hipRemoveApiCallback error(" << hip_err << ")");
|
||||
if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "HIP::RemoveApiCallback(" << op << "), error(" << hip_err << ")");
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_ROCTX: {
|
||||
if (roctracer::RocTxLoader::Instance().Enabled()) {
|
||||
const bool suc = roctracer::RocTxLoader::Instance().RemoveApiCallback(op);
|
||||
if (suc == false) EXC_RAISING(ROCTRACER_STATUS_ROCTX_ERR, "roctxRemoveApiCallback(" << op << ") failed");
|
||||
if (suc == false) EXC_RAISING(ROCTRACER_STATUS_ROCTX_ERR, "ROCTX::RemoveApiCallback(" << op << ") failed");
|
||||
}
|
||||
break;
|
||||
}
|
||||
@@ -777,7 +805,7 @@ PUBLIC_API roctracer_status_t roctracer_disable_callback()
|
||||
}
|
||||
|
||||
// Return default pool and set new one if parameter pool is not NULL.
|
||||
PUBLIC_API roctracer_pool_t* roctracer_default_pool(roctracer_pool_t* pool) {
|
||||
PUBLIC_API roctracer_pool_t* roctracer_default_pool_expl(roctracer_pool_t* pool) {
|
||||
std::lock_guard<roctracer::memory_pool_mutex_t> lock(roctracer::memory_pool_mutex);
|
||||
roctracer_pool_t* p = reinterpret_cast<roctracer_pool_t*>(roctracer::memory_pool);
|
||||
if (pool != NULL) roctracer::memory_pool = reinterpret_cast<roctracer::MemoryPool*>(pool);
|
||||
@@ -785,7 +813,7 @@ PUBLIC_API roctracer_pool_t* roctracer_default_pool(roctracer_pool_t* pool) {
|
||||
}
|
||||
|
||||
// Open memory pool
|
||||
PUBLIC_API roctracer_status_t roctracer_open_pool(
|
||||
PUBLIC_API roctracer_status_t roctracer_open_pool_expl(
|
||||
const roctracer_properties_t* properties,
|
||||
roctracer_pool_t** pool)
|
||||
{
|
||||
@@ -802,7 +830,7 @@ PUBLIC_API roctracer_status_t roctracer_open_pool(
|
||||
}
|
||||
|
||||
// Close memory pool
|
||||
PUBLIC_API roctracer_status_t roctracer_close_pool(roctracer_pool_t* pool) {
|
||||
PUBLIC_API roctracer_status_t roctracer_close_pool_expl(roctracer_pool_t* pool) {
|
||||
API_METHOD_PREFIX
|
||||
std::lock_guard<roctracer::memory_pool_mutex_t> lock(roctracer::memory_pool_mutex);
|
||||
roctracer_pool_t* ptr = (pool == NULL) ? roctracer_default_pool() : pool;
|
||||
@@ -821,14 +849,34 @@ static roctracer_status_t roctracer_enable_activity_fun(
|
||||
if (pool == NULL) pool = roctracer_default_pool();
|
||||
switch (domain) {
|
||||
case ACTIVITY_DOMAIN_HSA_OPS: {
|
||||
roctracer::hsa_support::async_copy_callback_enabled = true;
|
||||
rocprofiler::InterceptQueue::Enable(true);
|
||||
if (op == HSA_OP_ID_COPY) {
|
||||
roctracer::hsa_support::async_copy_callback_enabled = true;
|
||||
} else {
|
||||
const bool init_phase = (roctracer::RocpLoader::GetRef() == NULL);
|
||||
if (init_phase == true) {
|
||||
roctracer::RocpLoader::Instance().InitActivityCallback((void*)roctracer::HSA_AsyncActivityCallback,
|
||||
(void*)pool);
|
||||
}
|
||||
const bool succ = roctracer::RocpLoader::Instance().EnableActivityCallback(op, true);
|
||||
if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HSA_ERR, "HSA::EnableActivityCallback error");
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HSA_API: break;
|
||||
case ACTIVITY_DOMAIN_KFD_API: break;
|
||||
case ACTIVITY_DOMAIN_HCC_OPS: {
|
||||
if (roctracer::HccLoader::GetRef() == NULL) {
|
||||
const bool init_phase = (roctracer::HccLoader::GetRef() == NULL);
|
||||
if (roctracer::HccLoader::Instance().Enabled() == false) break;
|
||||
|
||||
if (init_phase == true) {
|
||||
if (getenv("ROCP_HCC_CORRID_WAIT") != NULL) {
|
||||
roctracer::correlation_id_wait = true;
|
||||
fprintf(stdout, "roctracer: HCC correlation ID wait enabled\n"); fflush(stdout);
|
||||
}
|
||||
if (getenv("ROCP_HCC_CORRID_NOWAIT") != NULL) {
|
||||
roctracer::correlation_id_wait = false;
|
||||
fprintf(stdout, "roctracer: HCC correlation ID wait disabled\n"); fflush(stdout);
|
||||
}
|
||||
roctracer::HccLoader::Instance().InitActivityCallback((void*)roctracer::HCC_ActivityIdCallback,
|
||||
(void*)roctracer::HCC_AsyncActivityCallback,
|
||||
(void*)pool);
|
||||
@@ -838,6 +886,8 @@ static roctracer_status_t roctracer_enable_activity_fun(
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HIP_API: {
|
||||
if (roctracer::HipLoader::Instance().Enabled() == false) break;
|
||||
|
||||
const hipError_t hip_err = roctracer::HipLoader::Instance().RegisterActivityCallback(op, (void*)roctracer::HIP_SyncActivityCallback, (void*)pool);
|
||||
if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "hipRegisterActivityCallback error(" << hip_err << ")");
|
||||
break;
|
||||
@@ -858,7 +908,7 @@ static void roctracer_enable_activity_impl(
|
||||
roctracer_enable_activity_fun((roctracer_domain_t)domain, op, pool);
|
||||
}
|
||||
|
||||
PUBLIC_API roctracer_status_t roctracer_enable_op_activity(
|
||||
PUBLIC_API roctracer_status_t roctracer_enable_op_activity_expl(
|
||||
roctracer_domain_t domain,
|
||||
uint32_t op,
|
||||
roctracer_pool_t* pool)
|
||||
@@ -868,7 +918,7 @@ PUBLIC_API roctracer_status_t roctracer_enable_op_activity(
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
PUBLIC_API roctracer_status_t roctracer_enable_domain_activity(
|
||||
PUBLIC_API roctracer_status_t roctracer_enable_domain_activity_expl(
|
||||
roctracer_domain_t domain,
|
||||
roctracer_pool_t* pool)
|
||||
{
|
||||
@@ -878,7 +928,7 @@ PUBLIC_API roctracer_status_t roctracer_enable_domain_activity(
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
PUBLIC_API roctracer_status_t roctracer_enable_activity(
|
||||
PUBLIC_API roctracer_status_t roctracer_enable_activity_expl(
|
||||
roctracer_pool_t* pool)
|
||||
{
|
||||
API_METHOD_PREFIX
|
||||
@@ -896,20 +946,29 @@ static roctracer_status_t roctracer_disable_activity_fun(
|
||||
{
|
||||
switch (domain) {
|
||||
case ACTIVITY_DOMAIN_HSA_OPS: {
|
||||
roctracer::hsa_support::async_copy_callback_enabled = false;
|
||||
rocprofiler::InterceptQueue::Enable(false);
|
||||
if (op == HSA_OP_ID_COPY) {
|
||||
roctracer::hsa_support::async_copy_callback_enabled = true;
|
||||
} else {
|
||||
if (roctracer::RocpLoader::GetRef() == NULL) break;
|
||||
const bool succ = roctracer::RocpLoader::Instance().EnableActivityCallback(op, false);
|
||||
if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HSA_ERR, "HSA::EnableActivityCallback(false) error, op(" << op << ")");
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HSA_API: break;
|
||||
case ACTIVITY_DOMAIN_KFD_API: break;
|
||||
case ACTIVITY_DOMAIN_HCC_OPS: {
|
||||
if (roctracer::HccLoader::Instance().Enabled() == false) break;
|
||||
|
||||
const bool succ = roctracer::HccLoader::Instance().EnableActivityCallback(op, false);
|
||||
if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HCC_OPS_ERR, "HCC::EnableActivityCallback(NULL) error domain(" << domain << ") op(" << op << ")");
|
||||
if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HCC_OPS_ERR, "HCC::EnableActivityCallback(NULL) error, op(" << op << ")");
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HIP_API: {
|
||||
if (roctracer::HipLoader::Instance().Enabled() == false) break;
|
||||
|
||||
const hipError_t hip_err = roctracer::HipLoader::Instance().RemoveActivityCallback(op);
|
||||
if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "hipRemoveActivityCallback error(" << hip_err << ")");
|
||||
if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "HIP::RemoveActivityCallback op(" << op << "), error(" << hip_err << ")");
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_ROCTX: break;
|
||||
@@ -923,8 +982,8 @@ static void roctracer_disable_activity_impl(
|
||||
uint32_t domain,
|
||||
uint32_t op)
|
||||
{
|
||||
roctracer::act_journal->remove({domain, op, {}});
|
||||
roctracer_disable_activity_fun((roctracer_domain_t)domain, op);
|
||||
roctracer::act_journal->remove({domain, op, {}});
|
||||
roctracer_disable_activity_fun((roctracer_domain_t)domain, op);
|
||||
}
|
||||
|
||||
PUBLIC_API roctracer_status_t roctracer_disable_op_activity(
|
||||
@@ -956,11 +1015,12 @@ PUBLIC_API roctracer_status_t roctracer_disable_activity()
|
||||
}
|
||||
|
||||
// Flush available activity records
|
||||
PUBLIC_API roctracer_status_t roctracer_flush_activity(roctracer_pool_t* pool) {
|
||||
PUBLIC_API roctracer_status_t roctracer_flush_activity_expl(roctracer_pool_t* pool) {
|
||||
API_METHOD_PREFIX
|
||||
if (pool == NULL) pool = roctracer_default_pool();
|
||||
roctracer::MemoryPool* memory_pool = reinterpret_cast<roctracer::MemoryPool*>(pool);
|
||||
memory_pool->Flush();
|
||||
roctracer::TraceBufferBase::FlushAll();
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
@@ -1001,16 +1061,26 @@ PUBLIC_API void roctracer_mark(const char* str) {
|
||||
|
||||
// Start API
|
||||
PUBLIC_API void roctracer_start() {
|
||||
if (roctracer::ext_support::roctracer_start_cb) roctracer::ext_support::roctracer_start_cb();
|
||||
roctracer::cb_journal->foreach(roctracer::cb_en_functor_t(roctracer_enable_callback_fun));
|
||||
roctracer::act_journal->foreach(roctracer::act_en_functor_t(roctracer_enable_activity_fun));
|
||||
if (roctracer::set_stopped(0)) {
|
||||
if (roctracer::ext_support::roctracer_start_cb) roctracer::ext_support::roctracer_start_cb();
|
||||
roctracer::cb_journal->foreach(roctracer::cb_en_functor_t(roctracer_enable_callback_fun));
|
||||
roctracer::act_journal->foreach(roctracer::act_en_functor_t(roctracer_enable_activity_fun));
|
||||
}
|
||||
}
|
||||
|
||||
// Stop API
|
||||
PUBLIC_API void roctracer_stop() {
|
||||
roctracer::cb_journal->foreach(roctracer::cb_dis_functor_t(roctracer_disable_callback_fun));
|
||||
roctracer::act_journal->foreach(roctracer::act_dis_functor_t(roctracer_disable_activity_fun));
|
||||
if (roctracer::ext_support::roctracer_stop_cb) roctracer::ext_support::roctracer_stop_cb();
|
||||
if (roctracer::set_stopped(1)) {
|
||||
roctracer::cb_journal->foreach(roctracer::cb_dis_functor_t(roctracer_disable_callback_fun));
|
||||
roctracer::act_journal->foreach(roctracer::act_dis_functor_t(roctracer_disable_activity_fun));
|
||||
if (roctracer::ext_support::roctracer_stop_cb) roctracer::ext_support::roctracer_stop_cb();
|
||||
}
|
||||
}
|
||||
|
||||
PUBLIC_API roctracer_status_t roctracer_get_timestamp(uint64_t* timestamp) {
|
||||
API_METHOD_PREFIX
|
||||
*timestamp = util::HsaRsrcFactory::Instance().TimestampNs();
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
// Set properties
|
||||
@@ -1021,6 +1091,8 @@ PUBLIC_API roctracer_status_t roctracer_set_properties(
|
||||
API_METHOD_PREFIX
|
||||
switch (domain) {
|
||||
case ACTIVITY_DOMAIN_HSA_OPS: {
|
||||
roctracer::trace_buffer.StartWorkerThread();
|
||||
|
||||
// HSA OPS properties
|
||||
roctracer::hsa_ops_properties_t* ops_properties = reinterpret_cast<roctracer::hsa_ops_properties_t*>(properties);
|
||||
HsaApiTable* table = reinterpret_cast<HsaApiTable*>(ops_properties->table);
|
||||
@@ -1046,12 +1118,10 @@ PUBLIC_API roctracer_status_t roctracer_set_properties(
|
||||
|
||||
break;
|
||||
}
|
||||
#ifdef KFD_WRAPPER
|
||||
case ACTIVITY_DOMAIN_KFD_API: {
|
||||
roctracer::kfd_support::intercept_KFDApiTable();
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
case ACTIVITY_DOMAIN_HSA_API: {
|
||||
// HSA API properties
|
||||
HsaApiTable* table = reinterpret_cast<HsaApiTable*>(properties);
|
||||
@@ -1062,7 +1132,7 @@ PUBLIC_API roctracer_status_t roctracer_set_properties(
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HCC_OPS:
|
||||
case ACTIVITY_DOMAIN_HIP_API: {
|
||||
#ifdef HIP_VDI
|
||||
#if HIP_VDI
|
||||
const char* hip_lib_name = "libamdhip64.so";
|
||||
roctracer::HccLoader::SetLibName(hip_lib_name);
|
||||
roctracer::HipLoader::SetLibName(hip_lib_name);
|
||||
@@ -1082,58 +1152,54 @@ PUBLIC_API roctracer_status_t roctracer_set_properties(
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
// HSA-runtime tool on-load method
|
||||
PUBLIC_API bool roctracer_load(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count,
|
||||
const char* const* failed_tool_names) {
|
||||
if (onload_debug) { printf("LIB roctracer_load\n"); fflush(stdout); }
|
||||
static bool is_loaded = false;
|
||||
if (is_loaded) return true;
|
||||
static bool is_loaded = false;
|
||||
|
||||
PUBLIC_API bool roctracer_load() {
|
||||
ONLOAD_TRACE("begin, loaded(" << is_loaded << ")");
|
||||
|
||||
if (is_loaded == true) return true;
|
||||
is_loaded = true;
|
||||
|
||||
if (onload_debug) { printf("LIB roctracer_load end\n"); fflush(stdout); }
|
||||
if (roctracer::cb_journal == NULL) roctracer::cb_journal = new roctracer::CbJournal;
|
||||
if (roctracer::act_journal == NULL) roctracer::act_journal = new roctracer::ActJournal;
|
||||
|
||||
ONLOAD_TRACE_END();
|
||||
return true;
|
||||
}
|
||||
|
||||
PUBLIC_API void roctracer_unload(bool destruct) {
|
||||
static bool is_unloaded = false;
|
||||
PUBLIC_API void roctracer_unload() {
|
||||
ONLOAD_TRACE("begin, loaded(" << is_loaded << ")");
|
||||
|
||||
if (onload_debug) { printf("LIB roctracer_unload (%d, %d)\n", (int)destruct, (int)is_unloaded); fflush(stdout); }
|
||||
if (destruct == false) return;
|
||||
if (is_unloaded == true) return;
|
||||
is_unloaded = true;
|
||||
if (is_loaded == false) return;
|
||||
is_loaded = false;
|
||||
|
||||
if (roctracer::cb_journal != NULL) {
|
||||
delete roctracer::cb_journal;
|
||||
roctracer::cb_journal = NULL;
|
||||
}
|
||||
if (roctracer::act_journal != NULL) {
|
||||
delete roctracer::act_journal;
|
||||
roctracer::act_journal = NULL;
|
||||
}
|
||||
|
||||
roctracer::trace_buffer.Flush();
|
||||
roctracer::close_output_file(roctracer::kernel_file_handle);
|
||||
if (onload_debug) { printf("LIB roctracer_unload end\n"); fflush(stdout); }
|
||||
}
|
||||
|
||||
PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count,
|
||||
const char* const* failed_tool_names) {
|
||||
if (onload_debug) { printf("LIB OnLoad\n"); fflush(stdout); }
|
||||
const bool ret = roctracer_load(table, runtime_version, failed_tool_count, failed_tool_names);
|
||||
if (onload_debug) { printf("LIB OnLoad end\n"); fflush(stdout); }
|
||||
return ret;
|
||||
}
|
||||
PUBLIC_API void OnUnload() {
|
||||
if (onload_debug) { printf("LIB OnUnload\n"); fflush(stdout); }
|
||||
roctracer_unload(false);
|
||||
if (onload_debug) { printf("LIB OnUnload end\n"); fflush(stdout); }
|
||||
ONLOAD_TRACE_END();
|
||||
}
|
||||
|
||||
CONSTRUCTOR_API void constructor() {
|
||||
if (onload_debug) { printf("LIB constructor\n"); fflush(stdout); }
|
||||
ONLOAD_TRACE_BEG();
|
||||
roctracer::util::Logger::Create();
|
||||
if (roctracer::cb_journal == NULL) roctracer::cb_journal = new roctracer::CbJournal;
|
||||
if (roctracer::act_journal == NULL) roctracer::act_journal = new roctracer::ActJournal;
|
||||
if (onload_debug) { printf("LIB constructor end\n"); fflush(stdout); }
|
||||
roctracer_load();
|
||||
ONLOAD_TRACE_END();
|
||||
}
|
||||
|
||||
DESTRUCTOR_API void destructor() {
|
||||
if (onload_debug) { printf("LIB destructor\n"); fflush(stdout); }
|
||||
roctracer_unload(true);
|
||||
ONLOAD_TRACE_BEG();
|
||||
roctracer_unload();
|
||||
util::HsaRsrcFactory::Destroy();
|
||||
roctracer::util::Logger::Destroy();
|
||||
if (onload_debug) { printf("LIB destructor end\n"); fflush(stdout); }
|
||||
ONLOAD_TRACE_END();
|
||||
}
|
||||
|
||||
} // extern "C"
|
||||
|
||||
@@ -2,12 +2,23 @@
|
||||
#define SRC_CORE_TRACE_BUFFER_H_
|
||||
|
||||
#include <atomic>
|
||||
#include <iostream>
|
||||
#include <list>
|
||||
#include <mutex>
|
||||
#include <sstream>
|
||||
|
||||
#include <pthread.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#define FATAL(stream) \
|
||||
do { \
|
||||
std::ostringstream oss; \
|
||||
oss << __FUNCTION__ << "(), " << stream; \
|
||||
std::cout << oss.str() << std::endl; \
|
||||
abort(); \
|
||||
} while (0)
|
||||
|
||||
#define PTHREAD_CALL(call) \
|
||||
do { \
|
||||
int err = call; \
|
||||
@@ -53,8 +64,55 @@ struct trace_entry_t {
|
||||
};
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct push_element_fun {
|
||||
T* const elem_;
|
||||
void fun(T* node) { if (node->next_elem_ == NULL) node->next_elem_ = elem_; }
|
||||
push_element_fun(T* elem) : elem_(elem) {}
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct call_element_fun {
|
||||
void (T::*fptr_)();
|
||||
void fun(T* node) { (node->*fptr_)(); }
|
||||
call_element_fun(void (T::*f)()) : fptr_(f) {}
|
||||
};
|
||||
|
||||
struct TraceBufferBase {
|
||||
typedef std::mutex mutex_t;
|
||||
|
||||
virtual void StartWorkerThread() = 0;
|
||||
virtual void Flush() = 0;
|
||||
|
||||
static void StartWorkerThreadAll() { foreach(call_element_fun<TraceBufferBase>(&TraceBufferBase::StartWorkerThread)); }
|
||||
static void FlushAll() { foreach(call_element_fun<TraceBufferBase>(&TraceBufferBase::Flush)); }
|
||||
|
||||
static void Push(TraceBufferBase* elem) {
|
||||
if (head_elem_ == NULL) head_elem_ = elem;
|
||||
else foreach(push_element_fun<TraceBufferBase>(elem));
|
||||
}
|
||||
|
||||
TraceBufferBase() : next_elem_(NULL) {}
|
||||
|
||||
template<class F>
|
||||
static void foreach(const F& f_in) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
F f = f_in;
|
||||
TraceBufferBase* p = head_elem_;
|
||||
while (p != NULL) {
|
||||
TraceBufferBase* next = p->next_elem_;
|
||||
f.fun(p);
|
||||
p = next;
|
||||
}
|
||||
}
|
||||
|
||||
TraceBufferBase* next_elem_;
|
||||
static TraceBufferBase* head_elem_;
|
||||
static mutex_t mutex_;
|
||||
};
|
||||
|
||||
template <typename Entry>
|
||||
class TraceBuffer {
|
||||
class TraceBuffer : protected TraceBufferBase {
|
||||
public:
|
||||
typedef void (*callback_t)(Entry*);
|
||||
typedef TraceBuffer<Entry> Obj;
|
||||
@@ -67,7 +125,8 @@ class TraceBuffer {
|
||||
};
|
||||
|
||||
TraceBuffer(const char* name, uint32_t size, flush_prm_t* flush_prm_arr, uint32_t flush_prm_count) :
|
||||
is_flushed_(false)
|
||||
is_flushed_(false),
|
||||
work_thread_started_(false)
|
||||
{
|
||||
name_ = strdup(name);
|
||||
size_ = size;
|
||||
@@ -80,31 +139,43 @@ class TraceBuffer {
|
||||
flush_prm_arr_ = flush_prm_arr;
|
||||
flush_prm_count_ = flush_prm_count;
|
||||
|
||||
PTHREAD_CALL(pthread_mutex_init(&work_mutex_, NULL));
|
||||
PTHREAD_CALL(pthread_cond_init(&work_cond_, NULL));
|
||||
PTHREAD_CALL(pthread_create(&work_thread_, NULL, allocate_worker, this));
|
||||
TraceBufferBase::Push(this);
|
||||
}
|
||||
|
||||
~TraceBuffer() {
|
||||
PTHREAD_CALL(pthread_cancel(work_thread_));
|
||||
void *res;
|
||||
PTHREAD_CALL(pthread_join(work_thread_, &res));
|
||||
if (res != PTHREAD_CANCELED) abort_run("~TraceBuffer: consumer thread wasn't stopped correctly");
|
||||
|
||||
StopWorkerThread();
|
||||
Flush();
|
||||
}
|
||||
|
||||
void StartWorkerThread() {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
if (work_thread_started_ == false) {
|
||||
PTHREAD_CALL(pthread_mutex_init(&work_mutex_, NULL));
|
||||
PTHREAD_CALL(pthread_cond_init(&work_cond_, NULL));
|
||||
PTHREAD_CALL(pthread_create(&work_thread_, NULL, allocate_worker, this));
|
||||
work_thread_started_ = true;
|
||||
}
|
||||
}
|
||||
|
||||
void StopWorkerThread() {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
if (work_thread_started_ == true) {
|
||||
PTHREAD_CALL(pthread_cancel(work_thread_));
|
||||
void *res;
|
||||
PTHREAD_CALL(pthread_join(work_thread_, &res));
|
||||
if (res != PTHREAD_CANCELED) FATAL("consumer thread wasn't stopped correctly");
|
||||
work_thread_started_ = false;
|
||||
}
|
||||
}
|
||||
|
||||
Entry* GetEntry() {
|
||||
const pointer_t pointer = read_pointer_.fetch_add(1);
|
||||
if (pointer >= end_pointer_) wrap_buffer(pointer);
|
||||
if (pointer >= end_pointer_) abort_run("pointer >= end_pointer_ after buffer wrap");
|
||||
if (pointer >= end_pointer_) FATAL("pointer >= end_pointer_ after buffer wrap");
|
||||
return data_ + (pointer + size_ - end_pointer_);
|
||||
}
|
||||
|
||||
void Flush() {
|
||||
flush_buf();
|
||||
}
|
||||
void Flush() { flush_buf(); }
|
||||
|
||||
private:
|
||||
void flush_buf() {
|
||||
@@ -113,8 +184,12 @@ class TraceBuffer {
|
||||
|
||||
if (is_flushed == false) {
|
||||
for (flush_prm_t* prm = flush_prm_arr_; prm < flush_prm_arr_ + flush_prm_count_; prm++) {
|
||||
// Flushed entries type
|
||||
uint32_t type = prm->type;
|
||||
// Flushing function
|
||||
callback_t fun = prm->fun;
|
||||
if (fun == NULL) FATAL("flush function is not set");
|
||||
|
||||
pointer_t pointer = 0;
|
||||
for (Entry* ptr : buf_list_) {
|
||||
Entry* end = ptr + size_;
|
||||
@@ -134,7 +209,7 @@ class TraceBuffer {
|
||||
|
||||
inline Entry* allocate_fun() {
|
||||
Entry* ptr = (Entry*) malloc(size_ * sizeof(Entry));
|
||||
if (ptr == NULL) abort_run("TraceBuffer::allocate_fun: calloc failed");
|
||||
if (ptr == NULL) FATAL("malloc failed");
|
||||
//memset(ptr, 0, size_ * sizeof(Entry));
|
||||
return ptr;
|
||||
}
|
||||
@@ -156,24 +231,20 @@ class TraceBuffer {
|
||||
|
||||
void wrap_buffer(const pointer_t pointer) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
if (work_thread_started_ == false) FATAL("worker thread is not started");
|
||||
|
||||
PTHREAD_CALL(pthread_mutex_lock(&work_mutex_));
|
||||
if (pointer >= end_pointer_) {
|
||||
data_ = next_;
|
||||
next_ = NULL;
|
||||
PTHREAD_CALL(pthread_cond_signal(&work_cond_));
|
||||
end_pointer_ += size_;
|
||||
if (end_pointer_ == 0) abort_run("TraceBuffer::wrap_buffer: pointer overflow");
|
||||
if (end_pointer_ == 0) FATAL("pointer overflow");
|
||||
buf_list_.push_back(data_);
|
||||
}
|
||||
PTHREAD_CALL(pthread_mutex_unlock(&work_mutex_));
|
||||
}
|
||||
|
||||
void abort_run(const char* str) {
|
||||
fprintf(stderr, "%s\n", str);
|
||||
fflush(stderr);
|
||||
abort();
|
||||
}
|
||||
|
||||
const char* name_;
|
||||
uint32_t size_;
|
||||
Entry* data_;
|
||||
@@ -189,9 +260,14 @@ class TraceBuffer {
|
||||
pthread_t work_thread_;
|
||||
pthread_mutex_t work_mutex_;
|
||||
pthread_cond_t work_cond_;
|
||||
bool work_thread_started_;
|
||||
|
||||
mutex_t mutex_;
|
||||
};
|
||||
} // namespace roctracer
|
||||
|
||||
#define TRACE_BUFFER_INSTANTIATE() \
|
||||
roctracer::TraceBufferBase* roctracer::TraceBufferBase::head_elem_ = NULL; \
|
||||
roctracer::TraceBufferBase::mutex_t roctracer::TraceBufferBase::mutex_;
|
||||
|
||||
#endif // SRC_CORE_TRACE_BUFFER_H_
|
||||
|
||||
@@ -44,9 +44,6 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "util/exception.h"
|
||||
#include "util/logger.h"
|
||||
|
||||
namespace util {
|
||||
|
||||
// Callback function to get available in the system agents
|
||||
@@ -149,6 +146,11 @@ HsaRsrcFactory::HsaRsrcFactory(bool initialize_hsa) : initialize_hsa_(initialize
|
||||
CHECK_STATUS("HSA timer allocation failed",
|
||||
(timer_ == NULL) ? HSA_STATUS_ERROR : HSA_STATUS_SUCCESS);
|
||||
|
||||
// Time correlation
|
||||
const uint32_t corr_iters = 1000;
|
||||
CorrelateTime(HsaTimer::TIME_ID_CLOCK_REALTIME, corr_iters);
|
||||
CorrelateTime(HsaTimer::TIME_ID_CLOCK_MONOTONIC, corr_iters);
|
||||
|
||||
// System timeout
|
||||
timeout_ = (timeout_ns_ == HsaTimer::TIMESTAMP_MAX) ? timeout_ns_ : timer_->ns_to_sysclock(timeout_ns_);
|
||||
}
|
||||
@@ -192,6 +194,8 @@ void HsaRsrcFactory::InitHsaApiTable(HsaApiTable* table) {
|
||||
hsa_api_.hsa_executable_load_agent_code_object = table->core_->hsa_executable_load_agent_code_object_fn;
|
||||
hsa_api_.hsa_executable_freeze = table->core_->hsa_executable_freeze_fn;
|
||||
hsa_api_.hsa_executable_get_symbol = table->core_->hsa_executable_get_symbol_fn;
|
||||
hsa_api_.hsa_executable_symbol_get_info = table->core_->hsa_executable_symbol_get_info_fn;
|
||||
hsa_api_.hsa_executable_iterate_symbols = table->core_->hsa_executable_iterate_symbols_fn;
|
||||
|
||||
hsa_api_.hsa_system_get_info = table->core_->hsa_system_get_info_fn;
|
||||
hsa_api_.hsa_system_get_major_extension_table = table->core_->hsa_system_get_major_extension_table_fn;
|
||||
@@ -230,6 +234,8 @@ void HsaRsrcFactory::InitHsaApiTable(HsaApiTable* table) {
|
||||
hsa_api_.hsa_executable_load_agent_code_object = hsa_executable_load_agent_code_object;
|
||||
hsa_api_.hsa_executable_freeze = hsa_executable_freeze;
|
||||
hsa_api_.hsa_executable_get_symbol = hsa_executable_get_symbol;
|
||||
hsa_api_.hsa_executable_symbol_get_info = hsa_executable_symbol_get_info;
|
||||
hsa_api_.hsa_executable_iterate_symbols = hsa_executable_iterate_symbols;
|
||||
|
||||
hsa_api_.hsa_system_get_info = hsa_system_get_info;
|
||||
hsa_api_.hsa_system_get_major_extension_table = hsa_system_get_major_extension_table;
|
||||
@@ -336,6 +342,11 @@ const AgentInfo* HsaRsrcFactory::AddAgentInfo(const hsa_agent_t agent) {
|
||||
status = hsa_api_.hsa_amd_agent_iterate_memory_pools(agent, FindStandardPool, &agent_info->gpu_pool);
|
||||
CHECK_ITER_STATUS("hsa_amd_agent_iterate_memory_pools(gpu pool)", status);
|
||||
|
||||
// GFX8 and GFX9 SGPR/VGPR block sizes
|
||||
agent_info->sgpr_block_dflt = (strcmp(agent_info->gfxip, "gfx8") == 0) ? 1 : 2;
|
||||
agent_info->sgpr_block_size = 8;
|
||||
agent_info->vgpr_block_size = 4;
|
||||
|
||||
// Set GPU index
|
||||
agent_info->dev_index = gpu_list_.size();
|
||||
gpu_list_.push_back(agent_info);
|
||||
@@ -508,22 +519,25 @@ uint8_t* HsaRsrcFactory::AllocateCmdMemory(const AgentInfo* agent_info, size_t s
|
||||
}
|
||||
|
||||
// Wait signal
|
||||
void HsaRsrcFactory::SignalWait(const hsa_signal_t& signal) const {
|
||||
hsa_signal_value_t HsaRsrcFactory::SignalWait(const hsa_signal_t& signal, const hsa_signal_value_t& signal_value) const {
|
||||
const hsa_signal_value_t exp_value = signal_value - 1;
|
||||
hsa_signal_value_t ret_value = signal_value;
|
||||
while (1) {
|
||||
const hsa_signal_value_t signal_value =
|
||||
hsa_api_.hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, 1, timeout_, HSA_WAIT_STATE_BLOCKED);
|
||||
if (signal_value == 0) {
|
||||
break;
|
||||
} else {
|
||||
if (signal_value == 1) WARN_LOGGING("signal waiting...");
|
||||
else EXC_RAISING(HSA_STATUS_ERROR, "hsa_signal_wait_scacquire (" << signal_value << ")");
|
||||
ret_value =
|
||||
hsa_api_.hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, signal_value, timeout_, HSA_WAIT_STATE_BLOCKED);
|
||||
if (ret_value == exp_value) break;
|
||||
if (ret_value != signal_value) {
|
||||
std::cerr << "Error: HsaRsrcFactory::SignalWait: signal_value(" << signal_value
|
||||
<< "), ret_value(" << ret_value << ")" << std::endl << std::flush;
|
||||
abort();
|
||||
}
|
||||
}
|
||||
return ret_value;
|
||||
}
|
||||
|
||||
// Wait signal with signal value restore
|
||||
void HsaRsrcFactory::SignalWaitRestore(const hsa_signal_t& signal, const hsa_signal_value_t& signal_value) const {
|
||||
SignalWait(signal);
|
||||
SignalWait(signal, signal_value);
|
||||
hsa_api_.hsa_signal_store_relaxed(const_cast<hsa_signal_t&>(signal), signal_value);
|
||||
}
|
||||
|
||||
@@ -536,7 +550,7 @@ bool HsaRsrcFactory::Memcpy(const hsa_agent_t& agent, void* dst, const void* src
|
||||
CHECK_STATUS("hsa_signal_create()", status);
|
||||
status = hsa_api_.hsa_amd_memory_async_copy(dst, cpu_agents_[0], src, agent, size, 0, NULL, s);
|
||||
CHECK_STATUS("hsa_amd_memory_async_copy()", status);
|
||||
SignalWait(s);
|
||||
SignalWait(s, 1);
|
||||
status = hsa_api_.hsa_signal_destroy(s);
|
||||
CHECK_STATUS("hsa_signal_destroy()", status);
|
||||
}
|
||||
@@ -680,9 +694,59 @@ uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, const void* packet, size_t s
|
||||
return write_idx;
|
||||
}
|
||||
|
||||
const char* HsaRsrcFactory::GetKernelName(uint64_t addr) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
const auto it = symbols_map_->find(addr);
|
||||
if (it == symbols_map_->end()) {
|
||||
fprintf(stderr, "HsaRsrcFactory::kernel addr (0x%lx) is not found\n", addr);
|
||||
abort();
|
||||
}
|
||||
return strdup(it->second);
|
||||
}
|
||||
|
||||
void HsaRsrcFactory::EnableExecutableTracking(HsaApiTable* table) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
executable_tracking_on_ = true;
|
||||
table->core_->hsa_executable_freeze_fn = hsa_executable_freeze_interceptor;
|
||||
}
|
||||
|
||||
hsa_status_t HsaRsrcFactory::executable_symbols_cb(hsa_executable_t exec, hsa_executable_symbol_t symbol, void *data) {
|
||||
hsa_symbol_kind_t value = (hsa_symbol_kind_t)0;
|
||||
hsa_status_t status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_TYPE, &value);
|
||||
CHECK_STATUS("Error in getting symbol info", status);
|
||||
if (value == HSA_SYMBOL_KIND_KERNEL) {
|
||||
uint64_t addr = 0;
|
||||
uint32_t len = 0;
|
||||
status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &addr);
|
||||
CHECK_STATUS("Error in getting kernel object", status);
|
||||
status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, &len);
|
||||
CHECK_STATUS("Error in getting name len", status);
|
||||
char *name = new char[len + 1];
|
||||
status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME, name);
|
||||
CHECK_STATUS("Error in getting kernel name", status);
|
||||
name[len] = 0;
|
||||
auto ret = symbols_map_->insert({addr, name});
|
||||
if (ret.second == false) {
|
||||
delete[] ret.first->second;
|
||||
ret.first->second = name;
|
||||
}
|
||||
}
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t HsaRsrcFactory::hsa_executable_freeze_interceptor(hsa_executable_t executable, const char *options) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
if (symbols_map_ == NULL) symbols_map_ = new symbols_map_t;
|
||||
hsa_status_t status = hsa_api_.hsa_executable_iterate_symbols(executable, executable_symbols_cb, NULL);
|
||||
CHECK_STATUS("Error in iterating executable symbols", status);
|
||||
return hsa_api_.hsa_executable_freeze(executable, options);;
|
||||
}
|
||||
|
||||
std::atomic<HsaRsrcFactory*> HsaRsrcFactory::instance_{};
|
||||
HsaRsrcFactory::mutex_t HsaRsrcFactory::mutex_;
|
||||
HsaRsrcFactory::timestamp_t HsaRsrcFactory::timeout_ns_ = HsaTimer::TIMESTAMP_MAX;
|
||||
hsa_pfn_t HsaRsrcFactory::hsa_api_{};
|
||||
bool HsaRsrcFactory::executable_tracking_on_ = false;
|
||||
HsaRsrcFactory::symbols_map_t* HsaRsrcFactory::symbols_map_ = NULL;
|
||||
|
||||
} // namespace util
|
||||
|
||||
@@ -35,6 +35,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <iostream>
|
||||
@@ -94,6 +95,8 @@ struct hsa_pfn_t {
|
||||
decltype(hsa_executable_load_agent_code_object)* hsa_executable_load_agent_code_object;
|
||||
decltype(hsa_executable_freeze)* hsa_executable_freeze;
|
||||
decltype(hsa_executable_get_symbol)* hsa_executable_get_symbol;
|
||||
decltype(hsa_executable_symbol_get_info)* hsa_executable_symbol_get_info;
|
||||
decltype(hsa_executable_iterate_symbols)* hsa_executable_iterate_symbols;
|
||||
|
||||
decltype(hsa_system_get_info)* hsa_system_get_info;
|
||||
decltype(hsa_system_get_major_extension_table)* hsa_system_get_major_extension_table;
|
||||
@@ -159,6 +162,11 @@ struct AgentInfo {
|
||||
|
||||
// Number of Shader Arrays Per Shader Engines in Gpu
|
||||
uint32_t shader_arrays_per_se;
|
||||
|
||||
// SGPR/VGPR block sizes
|
||||
uint32_t sgpr_block_dflt;
|
||||
uint32_t sgpr_block_size;
|
||||
uint32_t vgpr_block_size;
|
||||
};
|
||||
|
||||
// HSA timer class
|
||||
@@ -169,6 +177,12 @@ class HsaTimer {
|
||||
static const timestamp_t TIMESTAMP_MAX = UINT64_MAX;
|
||||
typedef long double freq_t;
|
||||
|
||||
enum time_id_t {
|
||||
TIME_ID_CLOCK_REALTIME = 0,
|
||||
TIME_ID_CLOCK_MONOTONIC = 1,
|
||||
TIME_ID_NUMBER
|
||||
};
|
||||
|
||||
HsaTimer(const hsa_pfn_t* hsa_api) : hsa_api_(hsa_api) {
|
||||
timestamp_t sysclock_hz = 0;
|
||||
hsa_status_t status = hsa_api_->hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sysclock_hz);
|
||||
@@ -184,6 +198,11 @@ class HsaTimer {
|
||||
return timestamp_t((freq_t)time / sysclock_factor_);
|
||||
}
|
||||
|
||||
// Method for timespec/ns conversion
|
||||
static timestamp_t timespec_to_ns(const timespec& time) {
|
||||
return ((timestamp_t)time.tv_sec * 1000000000) + time.tv_nsec;
|
||||
}
|
||||
|
||||
// Return timestamp in 'ns'
|
||||
timestamp_t timestamp_ns() const {
|
||||
timestamp_t sysclock;
|
||||
@@ -192,6 +211,54 @@ class HsaTimer {
|
||||
return sysclock_to_ns(sysclock);
|
||||
}
|
||||
|
||||
// Return time in 'ns'
|
||||
static timestamp_t clocktime_ns(clockid_t clock_id) {
|
||||
timespec time;
|
||||
clock_gettime(clock_id, &time);
|
||||
return timespec_to_ns(time);
|
||||
}
|
||||
|
||||
// Return pair of correlated values of profiling timestamp and time with
|
||||
// correlation error for a given time ID and number of iterations
|
||||
void correlated_pair_ns(time_id_t time_id, uint32_t iters,
|
||||
timestamp_t* timestamp_v, timestamp_t* time_v, timestamp_t* error_v) const {
|
||||
clockid_t clock_id = 0;
|
||||
switch (clock_id) {
|
||||
case TIME_ID_CLOCK_REALTIME:
|
||||
clock_id = CLOCK_REALTIME;
|
||||
break;
|
||||
case TIME_ID_CLOCK_MONOTONIC:
|
||||
clock_id = CLOCK_MONOTONIC;
|
||||
break;
|
||||
default:
|
||||
CHECK_STATUS("internal error: invalid time_id", HSA_STATUS_ERROR);
|
||||
}
|
||||
|
||||
std::vector<timestamp_t> ts_vec(iters);
|
||||
std::vector<timespec> tm_vec(iters);
|
||||
const uint32_t steps = iters - 1;
|
||||
|
||||
for (uint32_t i = 0; i < iters; ++i) {
|
||||
hsa_api_->hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP, &ts_vec[i]);
|
||||
clock_gettime(clock_id, &tm_vec[i]);
|
||||
}
|
||||
|
||||
const timestamp_t ts_base = sysclock_to_ns(ts_vec.front());
|
||||
const timestamp_t tm_base = timespec_to_ns(tm_vec.front());
|
||||
const timestamp_t error = (ts_vec.back() - ts_vec.front()) / (2 * steps);
|
||||
|
||||
timestamp_t ts_accum = 0;
|
||||
timestamp_t tm_accum = 0;
|
||||
for (uint32_t i = 0; i < iters; ++i) {
|
||||
ts_accum += (ts_vec[i] - ts_base);
|
||||
tm_accum += (timespec_to_ns(tm_vec[i]) - tm_base);
|
||||
}
|
||||
|
||||
*timestamp_v = (ts_accum / iters) + ts_base + error;
|
||||
*time_v = (tm_accum / iters) + tm_base;
|
||||
*error_v = error;
|
||||
}
|
||||
|
||||
private:
|
||||
// Timestamp frequency factor
|
||||
freq_t sysclock_factor_;
|
||||
@@ -293,7 +360,7 @@ class HsaRsrcFactory {
|
||||
uint8_t* AllocateCmdMemory(const AgentInfo* agent_info, size_t size);
|
||||
|
||||
// Wait signal
|
||||
void SignalWait(const hsa_signal_t& signal) const;
|
||||
hsa_signal_value_t SignalWait(const hsa_signal_t& signal, const hsa_signal_value_t& signal_value) const;
|
||||
|
||||
// Wait signal with signal value restore
|
||||
void SignalWaitRestore(const hsa_signal_t& signal, const hsa_signal_value_t& signal_value) const;
|
||||
@@ -322,6 +389,11 @@ class HsaRsrcFactory {
|
||||
static uint64_t Submit(hsa_queue_t* queue, const void* packet);
|
||||
static uint64_t Submit(hsa_queue_t* queue, const void* packet, size_t size_bytes);
|
||||
|
||||
// Enable executables loading tracking
|
||||
static bool IsExecutableTracking() { return executable_tracking_on_; }
|
||||
static void EnableExecutableTracking(HsaApiTable* table);
|
||||
static const char* GetKernelName(uint64_t addr);
|
||||
|
||||
// Initialize HSA API table
|
||||
void static InitHsaApiTable(HsaApiTable* table);
|
||||
static const hsa_pfn_t* HsaApi() { return &hsa_api_; }
|
||||
@@ -346,6 +418,27 @@ class HsaRsrcFactory {
|
||||
if (instance_ != NULL) Instance().timeout_ = Instance().timer_->ns_to_sysclock(time);
|
||||
}
|
||||
|
||||
void CorrelateTime(HsaTimer::time_id_t time_id, uint32_t iters) {
|
||||
timestamp_t timestamp_v = 0;
|
||||
timestamp_t time_v = 0;
|
||||
timestamp_t error_v = 0;
|
||||
timer_->correlated_pair_ns(time_id, iters, ×tamp_v, &time_v, &error_v);
|
||||
time_shift_[time_id] = time_v - timestamp_v;
|
||||
time_error_[time_id] = error_v;
|
||||
}
|
||||
|
||||
hsa_status_t GetTime(uint32_t time_id, timestamp_t value, uint64_t* time) {
|
||||
if (time_id >= HsaTimer::TIME_ID_NUMBER) return HSA_STATUS_ERROR;
|
||||
*time = value + time_shift_[time_id];
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t GetTimestamp(uint32_t time_id, uint64_t value, timestamp_t* timestamp) {
|
||||
if (time_id >= HsaTimer::TIME_ID_NUMBER) return HSA_STATUS_ERROR;
|
||||
*timestamp = value - time_shift_[time_id];
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
private:
|
||||
// System agents iterating callback
|
||||
static hsa_status_t GetHsaAgentsCallback(hsa_agent_t agent, void* data);
|
||||
@@ -386,6 +479,13 @@ class HsaRsrcFactory {
|
||||
// System agents map
|
||||
std::map<hsa_agent_handle_t, const AgentInfo*> agent_map_;
|
||||
|
||||
// Executables loading tracking
|
||||
typedef std::map<uint64_t, const char*> symbols_map_t;
|
||||
static symbols_map_t* symbols_map_;
|
||||
static bool executable_tracking_on_;
|
||||
static hsa_status_t hsa_executable_freeze_interceptor(hsa_executable_t executable, const char *options);
|
||||
static hsa_status_t executable_symbols_cb(hsa_executable_t exec, hsa_executable_symbol_t symbol, void *data);
|
||||
|
||||
// HSA runtime API table
|
||||
static hsa_pfn_t hsa_api_;
|
||||
|
||||
@@ -403,6 +503,10 @@ class HsaRsrcFactory {
|
||||
// HSA timer
|
||||
HsaTimer* timer_;
|
||||
|
||||
// Time shift array to support time conversion
|
||||
timestamp_t time_shift_[HsaTimer::TIME_ID_NUMBER];
|
||||
timestamp_t time_error_[HsaTimer::TIME_ID_NUMBER];
|
||||
|
||||
// CPU/kern-arg memory pools
|
||||
hsa_amd_memory_pool_t *cpu_pool_;
|
||||
hsa_amd_memory_pool_t *kern_arg_pool_;
|
||||
|
||||
@@ -58,9 +58,9 @@ target_link_libraries ( ${TEST_LIB} ${ROCTRACER_TARGET} ${HSA_RUNTIME_LIB} c std
|
||||
|
||||
## Build HSA test
|
||||
execute_process ( COMMAND sh -xc "if [ ! -e ${TEST_DIR}/hsa ] ; then git clone https://github.com/ROCmSoftwarePlatform/hsa-class.git ${TEST_DIR}/hsa; fi" )
|
||||
execute_process ( COMMAND sh -xc "if [ -e ${TEST_DIR}/hsa ] ; then cd ${TEST_DIR}/hsa && git fetch origin && git checkout a3aabb5; fi" )
|
||||
execute_process ( COMMAND sh -xc "if [ -e ${TEST_DIR}/hsa ] ; then cd ${TEST_DIR}/hsa && git fetch origin && git checkout 777c308; fi" )
|
||||
set ( TEST_DIR ${HSA_TEST_DIR} )
|
||||
add_subdirectory ( ${TEST_DIR} ${PROJECT_BINARY_DIR}/test/hsa )
|
||||
|
||||
## copying run script
|
||||
execute_process ( COMMAND sh -xc "cp ${RUN_SCRIPT} ${PROJECT_BINARY_DIR}" )
|
||||
execute_process ( COMMAND sh -xc "cp ${RUN_SCRIPT} ${PROJECT_BINARY_DIR}" )
|
||||
@@ -1,7 +1,6 @@
|
||||
ROOT_PATH = ../..
|
||||
LIB_PATH = $(ROOT_PATH)/build
|
||||
ROC_LIBS = -L$(LIB_PATH) -lroctracer64
|
||||
export LD_LIBRARY_PATH=$(LIB_PATH)
|
||||
ROC_LIBS = -Wl,--rpath,${LIB_PATH} $(LIB_PATH)/libroctracer64.so $(LIB_PATH)/libroctx64.so
|
||||
|
||||
HIP_PATH?= $(wildcard /opt/rocm/hip)
|
||||
ifeq (,$(HIP_PATH))
|
||||
|
||||
@@ -23,8 +23,10 @@ THE SOFTWARE.
|
||||
#include <iostream>
|
||||
|
||||
// hip header file
|
||||
#include "hip/hip_runtime.h"
|
||||
#include <hip/hip_runtime.h>
|
||||
#include "roctracer_ext.h"
|
||||
// roctx header file
|
||||
#include <inc/roctx.h>
|
||||
|
||||
|
||||
#define WIDTH 1024
|
||||
@@ -94,15 +96,23 @@ int main() {
|
||||
hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice);
|
||||
|
||||
roctracer_mark("before HIP LaunchKernel");
|
||||
roctxMark("before hipLaunchKernel");
|
||||
roctxRangePush("hipLaunchKernel");
|
||||
// Lauching kernel from host
|
||||
hipLaunchKernelGGL(matrixTranspose, dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y),
|
||||
dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, gpuTransposeMatrix,
|
||||
gpuMatrix, WIDTH);
|
||||
roctracer_mark("after HIP LaunchKernel");
|
||||
roctxMark("after hipLaunchKernel");
|
||||
|
||||
// Memory transfer from device to host
|
||||
roctxRangePush("hipMemcpy");
|
||||
|
||||
hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost);
|
||||
|
||||
roctxRangePop(); // for "hipMemcpy"
|
||||
roctxRangePop(); // for "hipLaunchKernel"
|
||||
|
||||
// CPU MatrixTranspose computation
|
||||
matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH);
|
||||
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
ROOT_PATH = ../..
|
||||
LIB_PATH = $(ROOT_PATH)/build
|
||||
ROC_LIBS = -L$(LIB_PATH) -lroctracer64 -lroctx64
|
||||
HSA_KMT_INC_PATH ?= /opt/rocm/include
|
||||
export LD_LIBRARY_PATH=$(LIB_PATH)
|
||||
ROC_LIBS = -Wl,--rpath,${LIB_PATH} $(LIB_PATH)/libroctracer64.so $(LIB_PATH)/libroctx64.so
|
||||
ROCM_INC_PATH ?= /opt/rocm/include
|
||||
HIP_VDI ?= 0
|
||||
ITERATIONS ?= 100
|
||||
|
||||
@@ -12,27 +11,37 @@ ifeq (,$(HIP_PATH))
|
||||
endif
|
||||
|
||||
HIPCC=$(HIP_PATH)/bin/hipcc
|
||||
|
||||
TARGET=hcc
|
||||
|
||||
SOURCES = MatrixTranspose.cpp
|
||||
OBJECTS = $(SOURCES:.cpp=.o)
|
||||
|
||||
EXECUTABLE=./MatrixTranspose
|
||||
OBJECTS = MatrixTranspose.o
|
||||
FLAGS =-g -I$(ROOT_PATH) -I$(ROOT_PATH)/inc -I${ROCM_INC_PATH}/hsa -I${ROCM_INC_PATH} -DLOCAL_BUILD=1 -DHIP_VDI=${HIP_VDI} -DITERATIONS=$(ITERATIONS) -DAMD_INTERNAL_BUILD=1
|
||||
|
||||
ifeq ($(C_TEST), 1)
|
||||
COMP=gcc
|
||||
SOURCES = MatrixTranspose.c
|
||||
FLAGS += -DHIP_TEST=0 -D__HIP_PLATFORM_HCC__=1 -I${ROCM_INC_PATH}/hcc
|
||||
else
|
||||
COMP=$(HIPCC)
|
||||
SOURCES = MatrixTranspose.cpp
|
||||
FLAGS += -DHIP_TEST=1
|
||||
endif
|
||||
ifeq ($(MGPU_TEST), 1)
|
||||
FLAGS += -DMGPU_TEST=1
|
||||
endif
|
||||
|
||||
.PHONY: test
|
||||
|
||||
|
||||
all: clean $(EXECUTABLE)
|
||||
|
||||
CXXFLAGS =-g -I$(ROOT_PATH) -I$(ROOT_PATH)/inc -I${HSA_KMT_INC_PATH} -DLOCAL_BUILD=1 -DHIP_VDI=${HIP_VDI} -DITERATIONS=$(ITERATIONS)
|
||||
CXX=$(HIPCC)
|
||||
$(OBJECTS): $(SOURCES)
|
||||
$(COMP) $(FLAGS) -c -o $@ $<
|
||||
|
||||
$(EXECUTABLE): $(OBJECTS)
|
||||
$(HIPCC) $(OBJECTS) -o $@ $(ROC_LIBS)
|
||||
|
||||
test: $(EXECUTABLE)
|
||||
$(EXECUTABLE)
|
||||
LD_PRELOAD="$(LIB_PATH)/libkfdwrapper64.so librocprofiler64.so" $(EXECUTABLE)
|
||||
|
||||
clean:
|
||||
rm -f $(EXECUTABLE)
|
||||
|
||||
Символическая ссылка
@@ -0,0 +1 @@
|
||||
MatrixTranspose.cpp
|
||||
@@ -238,20 +238,22 @@ void api_callback(
|
||||
fprintf(stdout, "<rocTX \"%s\">\n", data->args.message);
|
||||
return;
|
||||
}
|
||||
|
||||
if (domain == ACTIVITY_DOMAIN_KFD_API) {
|
||||
const kfd_api_data_t* data = (const kfd_api_data_t*)(callback_data);
|
||||
fprintf(stdout, "<%s id(%u)\tcorrelation_id(%lu) %s pid(%u) tid(%u)> \n",
|
||||
fprintf(stdout, "<%s id(%u)\tcorrelation_id(%lu) %s> \n",
|
||||
roctracer_op_string(ACTIVITY_DOMAIN_KFD_API, cid, 0),
|
||||
cid,
|
||||
data->correlation_id,
|
||||
(data->phase == ACTIVITY_API_PHASE_ENTER) ? "on-enter" : "on-exit",
|
||||
GetPid(),
|
||||
GetTid()
|
||||
);
|
||||
return;
|
||||
}
|
||||
const hip_api_data_t* data = (const hip_api_data_t*)(callback_data);
|
||||
fprintf(stdout, "<%s id(%u)\tcorrelation_id(%lu) %s> ",
|
||||
roctracer_op_string(ACTIVITY_DOMAIN_HIP_API, cid, 0),
|
||||
cid,
|
||||
data->correlation_id,
|
||||
(data->phase == ACTIVITY_API_PHASE_ENTER) ? "on-enter" : "on-exit");
|
||||
if (data->phase == ACTIVITY_API_PHASE_ENTER) {
|
||||
switch (cid) {
|
||||
case HIP_API_ID_hipMemcpy:
|
||||
@@ -326,7 +328,7 @@ void activity_callback(const char* begin, const char* end, void* arg) {
|
||||
record->end_ns
|
||||
);
|
||||
if ((record->domain == ACTIVITY_DOMAIN_HIP_API) || (record->domain == ACTIVITY_DOMAIN_KFD_API)) {
|
||||
fprintf(stdout, " process_id(%u) thread_id(%u)\n",
|
||||
fprintf(stdout, " process_id(%u) thread_id(%u)",
|
||||
record->process_id,
|
||||
record->thread_id
|
||||
);
|
||||
@@ -335,7 +337,13 @@ void activity_callback(const char* begin, const char* end, void* arg) {
|
||||
record->device_id,
|
||||
record->queue_id
|
||||
);
|
||||
if (record->op == HIP_OP_ID_COPY) fprintf(stdout, " bytes(0x%zx)\n", record->bytes);
|
||||
if (record->op == HIP_OP_ID_COPY) fprintf(stdout, " bytes(0x%zx)", record->bytes);
|
||||
} else if (record->domain == ACTIVITY_DOMAIN_HSA_OPS) {
|
||||
fprintf(stdout, " se(%u) cycle(%lu) pc(%lx)",
|
||||
record->pc_sample.se,
|
||||
record->pc_sample.cycle,
|
||||
record->pc_sample.pc
|
||||
);
|
||||
} else if (record->domain == ACTIVITY_DOMAIN_EXT_API) {
|
||||
fprintf(stdout, " external_id(%lu)\n",
|
||||
record->external_id
|
||||
@@ -365,9 +373,10 @@ void init_tracing() {
|
||||
// Enable HIP activity tracing
|
||||
ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_API));
|
||||
ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HCC_OPS));
|
||||
// Enable PC sampling
|
||||
ROCTRACER_CALL(roctracer_enable_op_activity(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_PCSAMPLE));
|
||||
// Enable KFD API tracing
|
||||
ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_KFD_API, api_callback, NULL));
|
||||
ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_KFD_API));
|
||||
// Enable rocTX
|
||||
ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_ROCTX, api_callback, NULL));
|
||||
}
|
||||
@@ -385,7 +394,9 @@ void stop_tracing() {
|
||||
ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HIP_API));
|
||||
ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HIP_API));
|
||||
ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HCC_OPS));
|
||||
ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_KFD_API));
|
||||
ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HSA_OPS));
|
||||
ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_KFD_API));
|
||||
ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_ROCTX));
|
||||
ROCTRACER_CALL(roctracer_flush_activity());
|
||||
printf("# STOP #############################\n");
|
||||
}
|
||||
|
||||
@@ -90,6 +90,7 @@ eval_test "tool SYS/HSA test" ./test/MatrixTranspose "test/MatrixTranspose_sys_h
|
||||
# Tracing control <delay:length:rate>
|
||||
export ROCTRACER_DOMAIN="hip"
|
||||
eval_test "tool period test" "ROCP_CTRL_RATE=10:100000:1000000 ./test/MatrixTranspose" "test/MatrixTranspose_hip_trace"
|
||||
eval_test "tool flushing test" "ROCP_FLUSH_RATE=100000 ./test/MatrixTranspose" "test/MatrixTranspose_hip_trace_flush"
|
||||
|
||||
# HSA test
|
||||
export ROCTRACER_DOMAIN="hsa"
|
||||
|
||||
+258
-171
@@ -37,14 +37,14 @@ THE SOFTWARE.
|
||||
#include <inc/roctracer_hsa.h>
|
||||
#include <inc/roctracer_hip.h>
|
||||
#include <inc/roctracer_hcc.h>
|
||||
#ifdef KFD_WRAPPER
|
||||
#include <inc/roctracer_kfd.h>
|
||||
#endif
|
||||
#include <inc/ext/hsa_rt_utils.hpp>
|
||||
#include <src/core/loader.h>
|
||||
#include <src/core/trace_buffer.h>
|
||||
#include <util/xml.h>
|
||||
|
||||
#include "util/hsa_rsrc_factory.h"
|
||||
|
||||
#define PUBLIC_API __attribute__((visibility("default")))
|
||||
#define CONSTRUCTOR_API __attribute__((constructor))
|
||||
#define DESTRUCTOR_API __attribute__((destructor))
|
||||
@@ -59,9 +59,12 @@ THE SOFTWARE.
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#ifndef onload_debug
|
||||
#define onload_debug false
|
||||
#endif
|
||||
#define ONLOAD_TRACE(str) \
|
||||
if (getenv("ROCP_ONLOAD_TRACE")) do { \
|
||||
std::cout << "PID(" << GetPid() << "): TRACER_TOOL::" << __FUNCTION__ << " " << str << std::endl << std::flush; \
|
||||
} while(0);
|
||||
#define ONLOAD_TRACE_BEG() ONLOAD_TRACE("begin")
|
||||
#define ONLOAD_TRACE_END() ONLOAD_TRACE("end")
|
||||
|
||||
typedef hsa_rt_utils::Timer::timestamp_t timestamp_t;
|
||||
hsa_rt_utils::Timer* timer = NULL;
|
||||
@@ -74,8 +77,13 @@ bool trace_hsa_activity = false;
|
||||
bool trace_hip_api = false;
|
||||
bool trace_hip_activity = false;
|
||||
bool trace_kfd = false;
|
||||
bool trace_pcs = false;
|
||||
// API trace vector
|
||||
std::vector<std::string> hsa_api_vec;
|
||||
std::vector<std::string> kfd_api_vec;
|
||||
|
||||
LOADER_INSTANTIATE();
|
||||
TRACE_BUFFER_INSTANTIATE();
|
||||
|
||||
// Global output file handle
|
||||
FILE* roctx_file_handle = NULL;
|
||||
@@ -84,18 +92,25 @@ FILE* hsa_async_copy_file_handle = NULL;
|
||||
FILE* hip_api_file_handle = NULL;
|
||||
FILE* hcc_activity_file_handle = NULL;
|
||||
FILE* kfd_api_file_handle = NULL;
|
||||
FILE* pc_sample_file_handle = NULL;
|
||||
|
||||
void close_output_file(FILE* file_handle);
|
||||
void close_file_handles() {
|
||||
if (roctx_file_handle) close_output_file(roctx_file_handle);
|
||||
if (hsa_api_file_handle) close_output_file(hsa_api_file_handle);
|
||||
if (hsa_async_copy_file_handle) close_output_file(hsa_async_copy_file_handle);
|
||||
if (hip_api_file_handle) close_output_file(hip_api_file_handle);
|
||||
if (hcc_activity_file_handle) close_output_file(hcc_activity_file_handle);
|
||||
if (kfd_api_file_handle) close_output_file(kfd_api_file_handle);
|
||||
if (pc_sample_file_handle) close_output_file(pc_sample_file_handle);
|
||||
}
|
||||
|
||||
static inline uint32_t GetPid() { return syscall(__NR_getpid); }
|
||||
static inline uint32_t GetTid() { return syscall(__NR_gettid); }
|
||||
|
||||
// Error handler
|
||||
void fatal(const std::string msg) {
|
||||
fflush(roctx_file_handle);
|
||||
fflush(hsa_api_file_handle);
|
||||
fflush(hsa_async_copy_file_handle);
|
||||
fflush(hip_api_file_handle);
|
||||
fflush(hcc_activity_file_handle);
|
||||
fflush(kfd_api_file_handle);
|
||||
close_file_handles();
|
||||
fflush(stdout);
|
||||
fprintf(stderr, "%s\n\n", msg.c_str());
|
||||
fflush(stderr);
|
||||
@@ -121,19 +136,19 @@ void* control_thr_fun(void*) {
|
||||
const uint32_t len_us = control_len_us % 1000000;
|
||||
const uint32_t dist_sec = control_dist_us / 1000000;
|
||||
const uint32_t dist_us = control_dist_us % 1000000;
|
||||
bool start = true;
|
||||
bool to_start = true;
|
||||
|
||||
sleep(delay_sec);
|
||||
usleep(delay_us);
|
||||
|
||||
while (1) {
|
||||
if (start) {
|
||||
start = false;
|
||||
if (to_start) {
|
||||
to_start = false;
|
||||
roctracer_start();
|
||||
sleep(len_sec);
|
||||
usleep(len_us);
|
||||
} else {
|
||||
start = true;
|
||||
to_start = true;
|
||||
roctracer_stop();
|
||||
sleep(dist_sec);
|
||||
usleep(dist_us);
|
||||
@@ -141,6 +156,20 @@ void* control_thr_fun(void*) {
|
||||
}
|
||||
}
|
||||
|
||||
// Flushing control thread
|
||||
uint32_t control_flush_us = 0;
|
||||
void* flush_thr_fun(void*) {
|
||||
const uint32_t dist_sec = control_flush_us / 1000000;
|
||||
const uint32_t dist_us = control_flush_us % 1000000;
|
||||
|
||||
while (1) {
|
||||
sleep(dist_sec);
|
||||
usleep(dist_us);
|
||||
roctracer_flush_activity();
|
||||
roctracer::TraceBufferBase::FlushAll();
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// rocTX annotation tracing
|
||||
|
||||
@@ -148,7 +177,7 @@ struct roctx_trace_entry_t {
|
||||
uint32_t valid;
|
||||
uint32_t type;
|
||||
uint32_t cid;
|
||||
timestamp_t timestamp;
|
||||
timestamp_t time;
|
||||
uint32_t pid;
|
||||
uint32_t tid;
|
||||
const char* message;
|
||||
@@ -165,12 +194,16 @@ static inline void roctx_callback_fun(
|
||||
uint32_t tid,
|
||||
const char* message)
|
||||
{
|
||||
const timestamp_t timestamp = timer->timestamp_fn_ns();
|
||||
#if ROCTX_CLOCK_TIME
|
||||
const timestamp_t time = HsaTimer::clocktime_ns(HsaTimer::TIME_ID_CLOCK_MONOTONIC);
|
||||
#else
|
||||
const timestamp_t time = timer->timestamp_fn_ns();
|
||||
#endif
|
||||
roctx_trace_entry_t* entry = roctx_trace_buffer.GetEntry();
|
||||
entry->valid = roctracer::TRACE_ENTRY_COMPL;
|
||||
entry->type = 0;
|
||||
entry->cid = cid;
|
||||
entry->timestamp = timestamp;
|
||||
entry->time = time;
|
||||
entry->pid = GetPid();
|
||||
entry->tid = tid;
|
||||
entry->message = (message != NULL) ? strdup(message) : NULL;
|
||||
@@ -187,25 +220,26 @@ void roctx_api_callback(
|
||||
roctx_callback_fun(domain, cid, GetTid(), data->args.message);
|
||||
}
|
||||
|
||||
// Start/Stop callbacks
|
||||
void roctx_range_stack_callback(const roctx_range_data_t* data, void* arg) {
|
||||
const bool* is_stop_ptr = (bool*)arg;
|
||||
const uint32_t cid = (*is_stop_ptr == true) ? ROCTX_API_ID_roctxRangePop : ROCTX_API_ID_roctxRangePushA;
|
||||
const char* message = (*is_stop_ptr == true) ? NULL : data->message;
|
||||
roctx_callback_fun(ACTIVITY_DOMAIN_ROCTX, cid, data->tid, message);
|
||||
// rocTX Start/Stop callbacks
|
||||
void roctx_range_start_callback(const roctx_range_data_t* data, void* arg) {
|
||||
roctx_callback_fun(ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_roctxRangePushA, data->tid, data->message);
|
||||
}
|
||||
void stop_callback() {
|
||||
bool is_stop = true;
|
||||
roctracer::RocTxLoader::Instance().RangeStackIterate(roctx_range_stack_callback, (void*)&is_stop);
|
||||
}
|
||||
void start_callback() {
|
||||
bool is_stop = false;
|
||||
roctracer::RocTxLoader::Instance().RangeStackIterate(roctx_range_stack_callback, (void*)&is_stop);
|
||||
void roctx_range_stop_callback(const roctx_range_data_t* data, void* arg) {
|
||||
roctx_callback_fun(ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_roctxRangePop, data->tid, NULL);
|
||||
}
|
||||
void start_callback() { roctracer::RocTxLoader::Instance().RangeStackIterate(roctx_range_start_callback, NULL); }
|
||||
void stop_callback() { roctracer::RocTxLoader::Instance().RangeStackIterate(roctx_range_stop_callback, NULL); }
|
||||
|
||||
// rocTX buffer flush function
|
||||
void roctx_flush_cb(roctx_trace_entry_t* entry) {
|
||||
#if ROCTX_CLOCK_TIME
|
||||
timestamp_t timestamp = 0;
|
||||
HsaRsrcFactory::Instance().GetTimestamp(HsaTimer::TIME_ID_CLOCK_MONOTONIC, entry->time, ×tamp);
|
||||
#else
|
||||
const timestamp_t timestamp = entry->time;
|
||||
#endif
|
||||
std::ostringstream os;
|
||||
os << entry->timestamp << " " << entry->pid << ":" << entry->tid << " " << entry->cid;
|
||||
os << timestamp << " " << entry->pid << ":" << entry->tid << " " << entry->cid;
|
||||
if (entry->message != NULL) os << ":\"" << entry->message << "\"";
|
||||
else os << ":\"\"";
|
||||
fprintf(roctx_file_handle, "%s\n", os.str().c_str()); fflush(roctx_file_handle);
|
||||
@@ -270,6 +304,9 @@ void hsa_activity_callback(
|
||||
index++;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// HIP API tracing
|
||||
|
||||
struct hip_api_trace_entry_t {
|
||||
uint32_t valid;
|
||||
uint32_t type;
|
||||
@@ -284,9 +321,6 @@ struct hip_api_trace_entry_t {
|
||||
void* ptr;
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// HIP API tracing
|
||||
|
||||
void hip_api_flush_cb(hip_api_trace_entry_t* entry);
|
||||
roctracer::TraceBuffer<hip_api_trace_entry_t>::flush_prm_t hip_flush_prm[1] = {{0, hip_api_flush_cb}};
|
||||
roctracer::TraceBuffer<hip_api_trace_entry_t> hip_api_trace_buffer("HIP", 0x200000, hip_flush_prm, 1);
|
||||
@@ -413,21 +447,27 @@ void hip_api_flush_cb(hip_api_trace_entry_t* entry) {
|
||||
|
||||
// Activity tracing callback
|
||||
// hipMalloc id(3) correlation_id(1): begin_ns(1525888652762640464) end_ns(1525888652762877067)
|
||||
void hcc_activity_callback(const char* begin, const char* end, void* arg) {
|
||||
void pool_activity_callback(const char* begin, const char* end, void* arg) {
|
||||
const roctracer_record_t* record = reinterpret_cast<const roctracer_record_t*>(begin);
|
||||
const roctracer_record_t* end_record = reinterpret_cast<const roctracer_record_t*>(end);
|
||||
|
||||
while (record < end_record) {
|
||||
const char * name = roctracer_op_string(record->domain, record->op, record->kind);
|
||||
if (record->domain == ACTIVITY_DOMAIN_HCC_OPS) {
|
||||
fprintf(hcc_activity_file_handle, "%lu:%lu %d:%lu %s:%lu\n",
|
||||
record->begin_ns, record->end_ns, record->device_id, record->queue_id, name, record->correlation_id);
|
||||
fflush(hcc_activity_file_handle);
|
||||
} else {
|
||||
#if 0
|
||||
fprintf(hip_api_file_handle, "%lu:%lu %u:%u %s()\n",
|
||||
record->begin_ns, record->end_ns, record->process_id, record->thread_id, name);
|
||||
#endif
|
||||
switch(record->domain) {
|
||||
case ACTIVITY_DOMAIN_HCC_OPS:
|
||||
fprintf(hcc_activity_file_handle, "%lu:%lu %d:%lu %s:%lu\n",
|
||||
record->begin_ns, record->end_ns,
|
||||
record->device_id, record->queue_id,
|
||||
name, record->correlation_id);
|
||||
fflush(hcc_activity_file_handle);
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HSA_OPS:
|
||||
if (record->op == HSA_OP_ID_PCSAMPLE) {
|
||||
fprintf(pc_sample_file_handle, "%u %lu 0x%lx %s\n",
|
||||
record->pc_sample.se, record->pc_sample.cycle, record->pc_sample.pc, name);
|
||||
fflush(pc_sample_file_handle);
|
||||
}
|
||||
break;
|
||||
}
|
||||
ROCTRACER_CALL(roctracer_next_record(record, &record));
|
||||
}
|
||||
@@ -437,7 +477,7 @@ void hcc_activity_callback(const char* begin, const char* end, void* arg) {
|
||||
// KFD API tracing
|
||||
|
||||
// KFD API callback function
|
||||
#ifdef KFD_WRAPPER
|
||||
static thread_local bool in_kfd_api_callback = false;
|
||||
void kfd_api_callback(
|
||||
uint32_t domain,
|
||||
uint32_t cid,
|
||||
@@ -445,6 +485,8 @@ void kfd_api_callback(
|
||||
void* arg)
|
||||
{
|
||||
(void)arg;
|
||||
if (in_kfd_api_callback) return;
|
||||
in_kfd_api_callback = true;
|
||||
const kfd_api_data_t* data = reinterpret_cast<const kfd_api_data_t*>(callback_data);
|
||||
if (data->phase == ACTIVITY_API_PHASE_ENTER) {
|
||||
kfd_begin_timestamp = timer->timestamp_fn_ns();
|
||||
@@ -454,8 +496,8 @@ void kfd_api_callback(
|
||||
os << kfd_begin_timestamp << ":" << end_timestamp << " " << GetPid() << ":" << GetTid() << " " << kfd_api_data_pair_t(cid, *data);
|
||||
fprintf(kfd_api_file_handle, "%s\n", os.str().c_str());
|
||||
}
|
||||
in_kfd_api_callback = false;
|
||||
}
|
||||
#endif
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
@@ -523,14 +565,73 @@ FILE* open_output_file(const char* prefix, const char* name) {
|
||||
}
|
||||
|
||||
void close_output_file(FILE* file_handle) {
|
||||
if ((file_handle != NULL) && (file_handle != stdout)) fclose(file_handle);
|
||||
if (file_handle != NULL) {
|
||||
fflush(file_handle);
|
||||
if (file_handle != stdout) fclose(file_handle);
|
||||
}
|
||||
}
|
||||
|
||||
// HSA-runtime tool on-load method
|
||||
extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count,
|
||||
const char* const* failed_tool_names) {
|
||||
if (onload_debug) { printf("TOOL OnLoad\n"); fflush(stdout); }
|
||||
timer = new hsa_rt_utils::Timer(table->core_->hsa_system_get_info_fn);
|
||||
// Allocating tracing pool
|
||||
void open_tracing_pool() {
|
||||
if (roctracer_default_pool() == NULL) {
|
||||
roctracer_properties_t properties{};
|
||||
properties.buffer_size = 0x80000;
|
||||
properties.buffer_callback_fun = pool_activity_callback;
|
||||
ROCTRACER_CALL(roctracer_open_pool(&properties));
|
||||
}
|
||||
}
|
||||
|
||||
// Flush tracing pool
|
||||
void close_tracing_pool() {
|
||||
if (roctracer_default_pool() != NULL) {
|
||||
ROCTRACER_CALL(roctracer_flush_activity());
|
||||
}
|
||||
}
|
||||
|
||||
// tool library is loaded
|
||||
static bool is_loaded = false;
|
||||
|
||||
// tool unload method
|
||||
void tool_unload() {
|
||||
ONLOAD_TRACE("begin, loaded(" << is_loaded << ")");
|
||||
|
||||
if (is_loaded == false) return;
|
||||
is_loaded = false;
|
||||
|
||||
if (trace_roctx) {
|
||||
ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_ROCTX));
|
||||
}
|
||||
if (trace_hsa_api) {
|
||||
ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HSA_API));
|
||||
}
|
||||
if (trace_hsa_activity || trace_pcs) {
|
||||
ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HSA_OPS));
|
||||
}
|
||||
if (trace_hip_api || trace_hip_activity) {
|
||||
ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HIP_API));
|
||||
ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HIP_API));
|
||||
ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HCC_OPS));
|
||||
}
|
||||
if (trace_kfd) {
|
||||
ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_KFD_API));
|
||||
}
|
||||
|
||||
// Flush tracing pool
|
||||
close_tracing_pool();
|
||||
roctracer::TraceBufferBase::FlushAll();
|
||||
close_file_handles();
|
||||
|
||||
ONLOAD_TRACE_END();
|
||||
}
|
||||
|
||||
// tool load method
|
||||
void tool_load() {
|
||||
ONLOAD_TRACE("begin, loaded(" << is_loaded << ")");
|
||||
|
||||
if (is_loaded == true) return;
|
||||
is_loaded = true;
|
||||
|
||||
roctracer::TraceBufferBase::StartWorkerThreadAll();
|
||||
|
||||
// Output file
|
||||
const char* output_prefix = getenv("ROCP_OUTPUT_DIR");
|
||||
@@ -571,11 +672,12 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
|
||||
if (std::string(trace_domain).find("kfd") != std::string::npos) {
|
||||
trace_kfd = true;
|
||||
}
|
||||
}
|
||||
|
||||
// API trace vector
|
||||
std::vector<std::string> hsa_api_vec;
|
||||
std::vector<std::string> kfd_api_vec;
|
||||
// PC sampling enabling
|
||||
if (std::string(trace_domain).find("pcs") != std::string::npos) {
|
||||
trace_pcs = true;
|
||||
}
|
||||
}
|
||||
|
||||
printf("ROCTracer (pid=%d): ", (int)GetPid()); fflush(stdout);
|
||||
|
||||
@@ -648,6 +750,84 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
|
||||
ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_ROCTX, roctx_api_callback, NULL));
|
||||
}
|
||||
|
||||
const char* ctrl_str = getenv("ROCP_CTRL_RATE");
|
||||
if (ctrl_str != NULL) {
|
||||
uint32_t ctrl_delay = 0;
|
||||
uint32_t ctrl_len = 0;
|
||||
uint32_t ctrl_rate = 0;
|
||||
|
||||
sscanf(ctrl_str, "%d:%d:%d", &ctrl_delay, &ctrl_len, &ctrl_rate);
|
||||
|
||||
if (ctrl_len > ctrl_rate) {
|
||||
fprintf(stderr, "ROCTracer: control length value (%u) > rate value (%u)\n", ctrl_len, ctrl_rate);
|
||||
abort();
|
||||
}
|
||||
control_dist_us = ctrl_rate - ctrl_len;
|
||||
control_len_us = ctrl_len;
|
||||
control_delay_us = ctrl_delay;
|
||||
|
||||
roctracer_stop();
|
||||
|
||||
if (ctrl_delay != UINT32_MAX) {
|
||||
fprintf(stdout, "ROCTracer: trace control: delay(%uus), length(%uus), rate(%uus)\n", ctrl_delay, ctrl_len, ctrl_rate); fflush(stdout);
|
||||
pthread_t thread;
|
||||
pthread_attr_t attr;
|
||||
int err = pthread_attr_init(&attr);
|
||||
if (err) { errno = err; perror("pthread_attr_init"); abort(); }
|
||||
err = pthread_create(&thread, &attr, control_thr_fun, NULL);
|
||||
} else {
|
||||
fprintf(stdout, "ROCTracer: trace start disabled\n"); fflush(stdout);
|
||||
}
|
||||
}
|
||||
|
||||
const char* flush_str = getenv("ROCP_FLUSH_RATE");
|
||||
if (flush_str != NULL) {
|
||||
sscanf(flush_str, "%d", &control_flush_us);
|
||||
if (control_flush_us == 0) {
|
||||
fprintf(stderr, "ROCTracer: control flush rate bad value\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
fprintf(stdout, "ROCTracer: trace control flush rate(%uus)\n", control_flush_us); fflush(stdout);
|
||||
pthread_t thread;
|
||||
pthread_attr_t attr;
|
||||
int err = pthread_attr_init(&attr);
|
||||
if (err) { errno = err; perror("pthread_attr_init"); abort(); }
|
||||
err = pthread_create(&thread, &attr, flush_thr_fun, NULL);
|
||||
}
|
||||
|
||||
// Enable KFD API callbacks/activity
|
||||
if (trace_kfd) {
|
||||
kfd_api_file_handle = open_output_file(output_prefix, "kfd_api_trace.txt");
|
||||
// initialize KFD tracing
|
||||
roctracer_set_properties(ACTIVITY_DOMAIN_KFD_API, NULL);
|
||||
|
||||
printf(" KFD-trace(");
|
||||
if (kfd_api_vec.size() != 0) {
|
||||
for (unsigned i = 0; i < kfd_api_vec.size(); ++i) {
|
||||
uint32_t cid = KFD_API_ID_NUMBER;
|
||||
const char* api = kfd_api_vec[i].c_str();
|
||||
ROCTRACER_CALL(roctracer_op_code(ACTIVITY_DOMAIN_KFD_API, api, &cid, NULL));
|
||||
ROCTRACER_CALL(roctracer_enable_op_callback(ACTIVITY_DOMAIN_KFD_API, cid, kfd_api_callback, NULL));
|
||||
printf(" %s", api);
|
||||
}
|
||||
} else {
|
||||
ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_KFD_API, kfd_api_callback, NULL));
|
||||
}
|
||||
printf(")\n");
|
||||
}
|
||||
|
||||
ONLOAD_TRACE_END();
|
||||
}
|
||||
|
||||
// HSA-runtime tool on-load method
|
||||
extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count,
|
||||
const char* const* failed_tool_names) {
|
||||
ONLOAD_TRACE_BEG();
|
||||
timer = new hsa_rt_utils::Timer(table->core_->hsa_system_get_info_fn);
|
||||
|
||||
const char* output_prefix = getenv("ROCP_OUTPUT_DIR");
|
||||
|
||||
// Enable HSA API callbacks/activity
|
||||
if (trace_hsa_api) {
|
||||
hsa_api_file_handle = open_output_file(output_prefix, "hsa_api_trace.txt");
|
||||
@@ -660,7 +840,7 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
|
||||
for (unsigned i = 0; i < hsa_api_vec.size(); ++i) {
|
||||
uint32_t cid = HSA_API_ID_NUMBER;
|
||||
const char* api = hsa_api_vec[i].c_str();
|
||||
ROCTRACER_CALL(roctracer_op_code(ACTIVITY_DOMAIN_HSA_API, api, &cid));
|
||||
ROCTRACER_CALL(roctracer_op_code(ACTIVITY_DOMAIN_HSA_API, api, &cid, NULL));
|
||||
ROCTRACER_CALL(roctracer_enable_op_callback(ACTIVITY_DOMAIN_HSA_API, cid, hsa_api_callback, NULL));
|
||||
printf(" %s", api);
|
||||
}
|
||||
@@ -684,145 +864,52 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
|
||||
roctracer_set_properties(ACTIVITY_DOMAIN_HSA_OPS, &ops_properties);
|
||||
|
||||
fprintf(stdout, " HSA-activity-trace()\n"); fflush(stdout);
|
||||
ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HSA_OPS));
|
||||
ROCTRACER_CALL(roctracer_enable_op_activity(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_COPY));
|
||||
}
|
||||
|
||||
// Enable HIP API callbacks/activity
|
||||
if (trace_hip_api || trace_hip_activity) {
|
||||
hip_api_file_handle = open_output_file(output_prefix, "hip_api_trace.txt");
|
||||
hcc_activity_file_handle = open_output_file(output_prefix, "hcc_ops_trace.txt");
|
||||
|
||||
fprintf(stdout, " HIP-trace()\n"); fflush(stdout);
|
||||
// roctracer properties
|
||||
roctracer_set_properties(ACTIVITY_DOMAIN_HIP_API, (void*)mark_api_callback);
|
||||
// Allocating tracing pool
|
||||
roctracer_properties_t properties{};
|
||||
properties.buffer_size = 0x80000;
|
||||
properties.buffer_callback_fun = hcc_activity_callback;
|
||||
ROCTRACER_CALL(roctracer_open_pool(&properties));
|
||||
open_tracing_pool();
|
||||
// Enable tracing
|
||||
if (trace_hip_api) {
|
||||
hip_api_file_handle = open_output_file(output_prefix, "hip_api_trace.txt");
|
||||
ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HIP_API, hip_api_callback, NULL));
|
||||
ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_API));
|
||||
}
|
||||
if (trace_hip_activity) {
|
||||
hcc_activity_file_handle = open_output_file(output_prefix, "hcc_ops_trace.txt");
|
||||
ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HCC_OPS));
|
||||
}
|
||||
}
|
||||
|
||||
const char* ctrl_str = getenv("ROCP_CTRL_RATE");
|
||||
if (ctrl_str != NULL) {
|
||||
uint32_t ctrl_delay = 0;
|
||||
uint32_t ctrl_len = 0;
|
||||
uint32_t ctrl_rate = 0;
|
||||
int ret = sscanf(ctrl_str, "%d:%d:%d", &ctrl_delay, &ctrl_len, &ctrl_rate);
|
||||
if (ret != 3) {
|
||||
fprintf(stderr, "ROCTracer: control rate value invalid 'delay:length:rate': '%s'\n", ctrl_str);
|
||||
abort();
|
||||
}
|
||||
if (ctrl_len > ctrl_rate) {
|
||||
fprintf(stderr, "ROCTracer: control length value (%u) > rate value (%u)\n", ctrl_len, ctrl_rate);
|
||||
abort();
|
||||
}
|
||||
control_dist_us = ctrl_rate - ctrl_len;
|
||||
control_len_us = ctrl_len;
|
||||
control_delay_us = ctrl_delay;
|
||||
|
||||
fprintf(stdout, "ROCTracer: trace control: delay(%uus), length(%uus), rate(%uus)\n", ctrl_delay, ctrl_len, ctrl_rate); fflush(stdout);
|
||||
|
||||
roctracer_stop();
|
||||
|
||||
pthread_t thread;
|
||||
pthread_attr_t attr;
|
||||
int err = pthread_attr_init(&attr);
|
||||
if (err) { errno = err; perror("pthread_attr_init"); abort(); }
|
||||
err = pthread_create(&thread, &attr, control_thr_fun, NULL);
|
||||
// Enable PC sampling
|
||||
if (trace_pcs) {
|
||||
fprintf(stdout, " PCS-trace()\n"); fflush(stdout);
|
||||
open_tracing_pool();
|
||||
pc_sample_file_handle = open_output_file(output_prefix, "pc_sample_trace.txt");
|
||||
ROCTRACER_CALL(roctracer_enable_op_activity(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_PCSAMPLE));
|
||||
}
|
||||
|
||||
#ifdef KFD_WRAPPER
|
||||
// Enable KFD API callbacks/activity
|
||||
if (trace_kfd) {
|
||||
kfd_api_file_handle = open_output_file(output_prefix, "kfd_api_trace.txt");
|
||||
// initialize KFD tracing
|
||||
roctracer_set_properties(ACTIVITY_DOMAIN_KFD_API, NULL);
|
||||
|
||||
printf(" KFD-trace(");
|
||||
if (kfd_api_vec.size() != 0) {
|
||||
for (unsigned i = 0; i < kfd_api_vec.size(); ++i) {
|
||||
uint32_t cid = KFD_API_ID_NUMBER;
|
||||
const char* api = kfd_api_vec[i].c_str();
|
||||
ROCTRACER_CALL(roctracer_op_code(ACTIVITY_DOMAIN_KFD_API, api, &cid));
|
||||
ROCTRACER_CALL(roctracer_enable_op_callback(ACTIVITY_DOMAIN_KFD_API, cid, kfd_api_callback, NULL));
|
||||
printf(" %s", api);
|
||||
}
|
||||
} else {
|
||||
ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_KFD_API, kfd_api_callback, NULL));
|
||||
}
|
||||
printf(")\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
if (onload_debug) { printf("TOOL OnLoad end\n"); fflush(stdout); }
|
||||
return roctracer_load(table, runtime_version, failed_tool_count, failed_tool_names);
|
||||
}
|
||||
|
||||
// tool unload method
|
||||
void tool_unload(bool destruct) {
|
||||
static bool is_unloaded = false;
|
||||
|
||||
if (onload_debug) { printf("TOOL tool_unload (%d, %d)\n", (int)destruct, (int)is_unloaded); fflush(stdout); }
|
||||
if (destruct == false) return;
|
||||
if (is_unloaded == true) return;
|
||||
is_unloaded = true;
|
||||
roctracer_unload(destruct);
|
||||
|
||||
if (trace_roctx) {
|
||||
ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_ROCTX));
|
||||
|
||||
roctx_trace_buffer.Flush();
|
||||
close_output_file(roctx_file_handle);
|
||||
}
|
||||
if (trace_hsa_api) {
|
||||
ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HSA_API));
|
||||
|
||||
hsa_api_trace_buffer.Flush();
|
||||
close_output_file(hsa_api_file_handle);
|
||||
}
|
||||
if (trace_hsa_activity) {
|
||||
ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HSA_OPS));
|
||||
|
||||
close_output_file(hsa_async_copy_file_handle);
|
||||
}
|
||||
if (trace_hip_api || trace_hip_activity) {
|
||||
ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HIP_API));
|
||||
ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HIP_API));
|
||||
ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HCC_OPS));
|
||||
ROCTRACER_CALL(roctracer_flush_activity());
|
||||
ROCTRACER_CALL(roctracer_close_pool());
|
||||
|
||||
hip_api_trace_buffer.Flush();
|
||||
close_output_file(hip_api_file_handle);
|
||||
close_output_file(hcc_activity_file_handle);
|
||||
}
|
||||
|
||||
if (trace_kfd) {
|
||||
ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_KFD_API));
|
||||
fclose(kfd_api_file_handle);
|
||||
}
|
||||
if (onload_debug) { printf("TOOL tool_unload end\n"); fflush(stdout); }
|
||||
ONLOAD_TRACE_END();
|
||||
return true;
|
||||
}
|
||||
|
||||
// HSA-runtime on-unload method
|
||||
extern "C" PUBLIC_API void OnUnload() {
|
||||
if (onload_debug) { printf("TOOL OnUnload\n"); fflush(stdout); }
|
||||
tool_unload(false);
|
||||
if (onload_debug) { printf("TOOL OnUnload end\n"); fflush(stdout); }
|
||||
ONLOAD_TRACE("");
|
||||
}
|
||||
|
||||
extern "C" CONSTRUCTOR_API void constructor() {
|
||||
if (onload_debug) { printf("TOOL constructor ...end\n"); fflush(stdout); }
|
||||
ONLOAD_TRACE_BEG();
|
||||
tool_load();
|
||||
ONLOAD_TRACE_END();
|
||||
}
|
||||
extern "C" DESTRUCTOR_API void destructor() {
|
||||
if (onload_debug) { printf("TOOL destructor\n"); fflush(stdout); }
|
||||
tool_unload(true);
|
||||
if (onload_debug) { printf("TOOL destructor end\n"); fflush(stdout); }
|
||||
ONLOAD_TRACE_BEG();
|
||||
tool_unload();
|
||||
ONLOAD_TRACE_END();
|
||||
}
|
||||
|
||||
Ссылка в новой задаче
Block a user