Merge branch 'amd-master' into rkebichi-trace-compare

Этот коммит содержится в:
eshcherb
2020-03-25 19:27:49 -05:00
коммит произвёл GitHub
родитель e0b23ead23 cb89ed3325
Коммит 7b8b39b495
35 изменённых файлов: 1871 добавлений и 525 удалений
+2
Просмотреть файл
@@ -8,6 +8,8 @@ b
build
inc/hsa_prof_str.h
inc/kfd_prof_str.h
inc/basic_ostream_ops.h
inc/kfd_ostream_ops.h
test/hsa
test/MatrixTranspose/MatrixTranspose
test/MatrixTranspose_test/MatrixTranspose
+72 -33
Просмотреть файл
@@ -28,6 +28,9 @@ cmake_minimum_required ( VERSION 3.5.0 )
## Verbose output.
set ( CMAKE_VERBOSE_MAKEFILE TRUE CACHE BOOL "Verbose Output" FORCE )
# Install prefix
set(CMAKE_INSTALL_PREFIX "/opt/rocm" CACHE PATH "Install path prefix, prepended onto install directories")
## Set module name and project name.
set ( ROCTRACER_NAME "roctracer" )
set ( ROCTRACER_TARGET "${ROCTRACER_NAME}64" )
@@ -43,17 +46,25 @@ include ( env )
## Setup the package version.
get_version ( "1.0.0" )
message ( "-- LIB-VERSION: ${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH}" )
set ( BUILD_VERSION_MAJOR ${VERSION_MAJOR} )
set ( BUILD_VERSION_MINOR ${VERSION_MINOR} )
set ( BUILD_VERSION_PATCH ${VERSION_PATCH} )
set ( LIB_VERSION_STRING "${BUILD_VERSION_MAJOR}.${BUILD_VERSION_MINOR}.${BUILD_VERSION_PATCH}" )
if ( DEFINED VERSION_BUILD AND NOT ${VERSION_BUILD} STREQUAL "" )
message ( "VERSION BUILD DEFINED ${VERSION_BUILD}" )
set ( BUILD_VERSION_PATCH "${BUILD_VERSION_PATCH}-${VERSION_BUILD}" )
endif ()
set ( BUILD_VERSION_STRING "${BUILD_VERSION_MAJOR}.${BUILD_VERSION_MINOR}.${BUILD_VERSION_PATCH}" )
set ( LIB_VERSION_MAJOR ${BUILD_VERSION_MAJOR} )
set ( LIB_VERSION_MINOR ${BUILD_VERSION_MINOR} )
if (DEFINED ENV{ROCM_LIBPATCH_VERSION})
set (LIB_VERSION_PATCH $ENV{ROCM_LIBPATCH_VERSION} )
else ()
set (LIB_VERSION_PATCH ${BUILD_VERSION_PATCH} )
endif()
set ( LIB_VERSION_STRING "${LIB_VERSION_MAJOR}.${LIB_VERSION_MINOR}.${LIB_VERSION_PATCH}" )
message ( "-- LIB-VERSION: ${LIB_VERSION_MAJOR}.${LIB_VERSION_MINOR}.${LIB_VERSION_PATCH}" )
## Set target and root/lib/test directory
set ( TARGET_NAME "${ROCTRACER_TARGET}" )
@@ -66,53 +77,81 @@ include ( ${LIB_DIR}/CMakeLists.txt )
## Set the VERSION and SOVERSION values
set_property ( TARGET ${TARGET_NAME} PROPERTY VERSION "${LIB_VERSION_STRING}" )
set_property ( TARGET ${TARGET_NAME} PROPERTY SOVERSION "${BUILD_VERSION_MAJOR}" )
set_property ( TARGET ${TARGET_NAME} PROPERTY SOVERSION "${LIB_VERSION_MAJOR}" )
# If the library is a release, strip the target library
if ( "${CMAKE_BUILD_TYPE}" STREQUAL release )
add_custom_command ( TARGET ${ROCTRACER_TARGET} POST_BUILD COMMAND ${CMAKE_STRIP} *.so )
endif ()
#if ( "${CMAKE_BUILD_TYPE}" STREQUAL release )
# add_custom_command ( TARGET ${ROCTRACER_TARGET} POST_BUILD COMMAND ${CMAKE_STRIP} *.so )
#endif ()
## Build tests
add_subdirectory ( ${TEST_DIR} ${PROJECT_BINARY_DIR}/test )
## Install and packaging
set ( CMAKE_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX}/${ROCTRACER_NAME} )
message ( "---------Install-Dir: ${CMAKE_INSTALL_PREFIX}" )
## pbulic headers
set ( PUBLIC_HEADERS
roctracer.h
roctracer_hip.h
roctracer_hcc.h
roctracer_ext.h
ext/prof_protocol.h
ext/hsa_rt_utils.hpp
roctx.h
)
add_custom_target ( inc-link ALL WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
COMMAND ${CMAKE_COMMAND} -E create_symlink ../${ROCTRACER_NAME}/include inc-link )
## Installation and packaging
set ( DEST_NAME ${ROCPROFILER_NAME} )
if ( DEFINED CPACK_PACKAGING_INSTALL_PREFIX )
get_filename_component ( DEST_NAME ${CPACK_PACKAGING_INSTALL_PREFIX} NAME )
get_filename_component ( DEST_DIR ${CPACK_PACKAGING_INSTALL_PREFIX} DIRECTORY )
set ( CPACK_PACKAGING_INSTALL_PREFIX ${DEST_DIR} )
endif ()
message ( "-----------Dest-name: ${DEST_NAME}" )
message ( "------Install-prefix: ${CMAKE_INSTALL_PREFIX}" )
message ( "-----------CPACK-dir: ${CPACK_PACKAGING_INSTALL_PREFIX}" )
#add_custom_target ( inc-link ALL WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
# COMMAND ${CMAKE_COMMAND} -E create_symlink ../${DEST_NAME}/include inc-link )
add_custom_target ( so-link ALL WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
COMMAND ${CMAKE_COMMAND} -E create_symlink ../${ROCTRACER_NAME}/lib/${ROCTRACER_LIBRARY}.so so-link )
COMMAND ${CMAKE_COMMAND} -E create_symlink ../${DEST_NAME}/lib/${ROCTRACER_LIBRARY}.so so-link )
add_custom_target ( so-major-link ALL WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
COMMAND ${CMAKE_COMMAND} -E create_symlink ../${DEST_NAME}/lib/${ROCTRACER_LIBRARY}.so.${LIB_VERSION_MAJOR} so-major-link )
add_custom_target ( so-patch-link ALL WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
COMMAND ${CMAKE_COMMAND} -E create_symlink ../${DEST_NAME}/lib/${ROCTRACER_LIBRARY}.so.${LIB_VERSION_STRING} so-patch-link )
## Install information
install ( TARGETS ${ROCTRACER_TARGET} LIBRARY DESTINATION lib )
install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/roctracer.h DESTINATION include )
install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/roctracer_hip.h DESTINATION include )
install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/roctracer_hcc.h DESTINATION include )
install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/roctracer_ext.h DESTINATION include )
install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/ext/prof_protocol.h DESTINATION include/ext )
install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/ext/hsa_rt_utils.hpp DESTINATION include/ext )
install ( FILES ${PROJECT_BINARY_DIR}/inc-link DESTINATION ../include RENAME ${ROCTRACER_NAME} )
install ( FILES ${PROJECT_BINARY_DIR}/so-link DESTINATION ../lib RENAME ${ROCTRACER_LIBRARY}.so )
install ( FILES ${PROJECT_BINARY_DIR}/test/libtracer_tool.so DESTINATION tool )
install ( TARGETS ${ROCTRACER_TARGET} LIBRARY DESTINATION ${DEST_NAME}/lib )
foreach ( header ${PUBLIC_HEADERS} )
install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/${header} DESTINATION ${DEST_NAME}/include )
install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/${header} DESTINATION include/${DEST_NAME} )
endforeach ()
#install ( FILES ${PROJECT_BINARY_DIR}/inc-link DESTINATION include RENAME ${DEST_NAME} )
install ( FILES ${PROJECT_BINARY_DIR}/so-link DESTINATION lib RENAME ${ROCTRACER_LIBRARY}.so )
install ( FILES ${PROJECT_BINARY_DIR}/so-major-link DESTINATION lib RENAME ${ROCTRACER_LIBRARY}.so.${LIB_VERSION_MAJOR} )
install ( FILES ${PROJECT_BINARY_DIR}/so-patch-link DESTINATION lib RENAME ${ROCTRACER_LIBRARY}.so.${LIB_VERSION_STRING} )
install ( FILES ${PROJECT_BINARY_DIR}/test/libtracer_tool.so DESTINATION ${DEST_NAME}/tool )
## rocTX
set ( ROCTX_TARGET "roctx64" )
set ( ROCTX_LIBRARY "lib${ROCTX_TARGET}" )
add_custom_target ( so-roctx-link ALL WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
COMMAND ${CMAKE_COMMAND} -E create_symlink ../${ROCTRACER_NAME}/lib/${ROCTX_LIBRARY}.so so-roctx-link )
## Set the VERSION and SOVERSION values
set_property ( TARGET ${ROCTX_TARGET} PROPERTY VERSION "${LIB_VERSION_STRING}" )
set_property ( TARGET ${ROCTX_TARGET} PROPERTY SOVERSION "${LIB_VERSION_MAJOR}" )
install ( TARGETS "roctx64" LIBRARY DESTINATION lib )
install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/roctx.h DESTINATION include )
install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/roctracer_roctx.h DESTINATION include )
install ( FILES ${PROJECT_BINARY_DIR}/so-roctx-link DESTINATION ../lib RENAME ${ROCTX_LIBRARY}.so )
add_custom_target ( so-roctx-link ALL WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
COMMAND ${CMAKE_COMMAND} -E create_symlink ../${DEST_NAME}/lib/${ROCTX_LIBRARY}.so so-roctx-link )
add_custom_target ( so-roctx-major-link ALL WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
COMMAND ${CMAKE_COMMAND} -E create_symlink ../${DEST_NAME}/lib/${ROCTX_LIBRARY}.so.${LIB_VERSION_MAJOR} so-roctx-major-link )
add_custom_target ( so-roctx-patch-link ALL WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
COMMAND ${CMAKE_COMMAND} -E create_symlink ../${DEST_NAME}/lib/${ROCTX_LIBRARY}.so.${LIB_VERSION_STRING} so-roctx-patch-link )
install ( TARGETS "roctx64" LIBRARY DESTINATION ${DEST_NAME}/lib )
install ( FILES ${PROJECT_BINARY_DIR}/so-roctx-link DESTINATION lib RENAME ${ROCTX_LIBRARY}.so )
install ( FILES ${PROJECT_BINARY_DIR}/so-roctx-major-link DESTINATION lib RENAME ${ROCTX_LIBRARY}.so.${LIB_VERSION_MAJOR} )
install ( FILES ${PROJECT_BINARY_DIR}/so-roctx-patch-link DESTINATION lib RENAME ${ROCTX_LIBRARY}.so.${LIB_VERSION_STRING} )
## KFD wrapper
if ( DEFINED KFD_WRAPPER )
install ( TARGETS "kfdwrapper64" LIBRARY DESTINATION lib )
endif ()
install ( TARGETS "kfdwrapper64" LIBRARY DESTINATION ${DEST_NAME}/lib )
## Packaging directives
set ( CPACK_GENERATOR "DEB" "RPM" "TGZ" )
@@ -129,7 +168,7 @@ set ( CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE" )
set ( CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/postinst;${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/prerm" )
## RPM package specific variables
set ( CPACK_RPM_PRE_INSTALL_SCRIPT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/RPM/rpm_post" )
set ( CPACK_RPM_POST_INSTALL_SCRIPT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/RPM/rpm_post" )
set ( CPACK_RPM_POST_UNINSTALL_SCRIPT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/RPM/rpm_postun" )
include ( CPack )
+4 -1
Просмотреть файл
@@ -3,7 +3,10 @@
set -e
do_ldconfig() {
echo /opt/rocm/roctracer/lib > /etc/ld.so.conf.d/libroctracer64.conf && ldconfig
INSTALL_PATH=/opt/rocm/roctracer
if [ -e "${INSTALL_PATH}" ] ; then
echo /opt/rocm/roctracer/lib > /etc/ld.so.conf.d/libroctracer64.conf && ldconfig
fi
}
case "$1" in
+1
Просмотреть файл
@@ -1,4 +1,5 @@
Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
[MITx11 License]
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
+38 -15
Просмотреть файл
@@ -1,52 +1,75 @@
# ROC-tracer
```
ROC-tracer library, Runtimes Generic Callback/Activity APIs.
ROC-tracer library: Runtimes Generic Callback/Activity APIs.
The goal of the implementation is to provide a generic independent from
specific runtime profiler to trace API and asyncronous activity.
The API provides functionality for registering the runtimes API callbacks and
asyncronous activity records pool support.
ROC-TX librray: code annotation evemts API
Includes basic API: roctxMark, roctxRangePush, roctxRangePop.
```
## Usage
```
rocTracer API:
To use the rocTracer API you need the API header and to link your application with roctracer .so librray:
- the API header: /opt/rocm/roctracer/include/roctracer.h
- the .so library: /opt/rocm/lib/libroctracer64.so
rocTX API:
To use the rocTX API you need the API header and to link your application with roctx .so librray:
- the API header: /opt/rocm/roctracer/include/roctx.h
- the .so library: /opt/rocm/lib/libroctx64.so
```
## The library source tree
```
- inc/roctracer.h - Library public API
- doc - documentation
- inc/roctracer.h - rocTacer library public API header
- inc/roctx.h - rocTX library puiblic API header
- src - Library sources
- core - Library API sources
- util - Library utils sources
- core - rocTracer library API sources
- roctx - rocTX library API sources
- util - library utils sources
- test - test suit
- MatrixTranspose - test based on HIP MatrixTranspose sample
```
## Documentation
```
- API description: inc/roctracer.h
- Code example: test/MatrixTranspose_test/MatrixTranspose.cpp
```
- API description:
- ['roctracer' / 'rocTX' profiling C API specification](doc/roctracer_spec.md)
- Code examples:
- [test/MatrixTranspose_test/MatrixTranspose.cpp](test/MatrixTranspose_test/MatrixTranspose.cpp)
- [test/MatrixTranspose/MatrixTranspose.cpp](test/MatrixTranspose/MatrixTranspose.cpp)
## To build and run test
```
- ROCm-2.3 or higher is required
cd <your path>
- ROCm is required
- Python modules requirements: CppHeaderParser, argparse.
To install:
sudo pip install CppHeaderParser argparse
- CLone development branch of roctracer:
git clone -b amd-master https://github.com/ROCm-Developer-Tools/roctracer
- Set environment:
export CMAKE_PREFIX_PATH=/opt/rocm
- To use custom HIP/HCC versions:
export HIP_PATH=/opt/rocm/hip
export HCC_HOME=/opt/rocm/hcc
export CMAKE_PREFIX_PATH=/opt/rocm
- Build ROCtracer
- To build roctracer library:
export CMAKE_BUILD_TYPE=<debug|release> # release by default
cd <your path>/roctracer && mkdir build && cd build && cmake -DCMAKE_INSTALL_PREFIX=/opt/rocm .. && make -j <nproc>
- To build and run test
- To build and run test:
make mytest
run.sh
- To install
- To install:
make install
or
make package && dpkg -i *.deb
+4 -1
Просмотреть файл
@@ -1 +1,4 @@
echo /opt/rocm/roctracer/lib > /etc/ld.so.conf.d/libroctracer64.conf && ldconfig
INSTALL_PATH=/opt/rocm/roctracer
if [ -e "${INSTALL_PATH}" ] ; then
echo /opt/rocm/roctracer/lib > /etc/ld.so.conf.d/libroctracer64.conf && ldconfig
fi
+4 -2
Просмотреть файл
@@ -1,7 +1,8 @@
#!/bin/bash -x
SRC_DIR=`dirname $0`
COMPONENT="roctracer"
ROCM_PATH="/opt/rocm"
ROCM_PATH="${ROCM_PATH:=/opt/rocm}"
LD_RUNPATH_FLAG=" -Wl,--enable-new-dtags -Wl,--rpath,$ROCM_PATH/lib:$ROCM_PATH/lib64"
fatal() {
echo "$1"
@@ -19,9 +20,9 @@ if [ -z "$PACKAGE_ROOT" ] ; then PACKAGE_ROOT=$ROCM_PATH; fi
if [ -z "$PACKAGE_PREFIX" ] ; then PACKAGE_PREFIX="$ROCM_PATH/$COMPONENT"; fi
if [ -z "$PREFIX_PATH" ] ; then PREFIX_PATH=$PACKAGE_ROOT; fi
if [ -n "$HIP_VDI" ] ; then HIP_VDI_OPT="-DHIP_VDI=1"; fi
if [ -n "$ROCM_RPATH" ] ; then LD_RUNPATH_FLAG=" -Wl,--enable-new-dtags -Wl,--rpath,${ROCM_RPATH}"; fi
ROCTRACER_ROOT=$(cd $ROCTRACER_ROOT && echo $PWD)
MAKE_OPTS="-j 8 -C $BUILD_DIR"
mkdir -p $BUILD_DIR
pushd $BUILD_DIR
@@ -33,6 +34,7 @@ cmake \
-DCMAKE_INSTALL_PREFIX=$PACKAGE_ROOT \
-DCPACK_PACKAGING_INSTALL_PREFIX=$PACKAGE_PREFIX \
-DCPACK_GENERATOR="DEB;RPM" \
-DCMAKE_SHARED_LINKER_FLAGS="$LD_RUNPATH_FLAG" \
$HIP_VDI_OPT \
$ROCTRACER_ROOT
make
+1 -6
Просмотреть файл
@@ -50,11 +50,6 @@ else()
set ( HIP_VDI 0 )
endif()
## Enable KFD wrapper
if ( DEFINED KFD_WRAPPER )
add_definitions ( -DKFD_WRAPPER=${KFD_WRAPPER} )
endif()
## Enable HIP/HCC local build
if ( DEFINED LOCAL_BUILD )
add_definitions ( -DLOCAL_BUILD=${LOCAL_BUILD} )
@@ -123,6 +118,7 @@ endif ()
find_library ( HSA_KMT_LIB "libhsakmt.so" )
get_filename_component ( HSA_KMT_LIB_PATH ${HSA_KMT_LIB} DIRECTORY )
set ( HSA_KMT_INC_PATH "${HSA_KMT_LIB_PATH}/../include" )
set ( ROCM_INC_PATH ${HSA_KMT_INC_PATH} )
## Basic Tool Chain Information
message ( "----------------NBIT: ${NBIT}" )
@@ -136,6 +132,5 @@ message ( "-------------HCC-Inc: ${HCC_INC_DIR}" )
message ( "-------------HIP-Inc: ${HIP_INC_DIR}" )
message ( "-------------KFD-Inc: ${HSA_KMT_INC_PATH}" )
message ( "-------------HIP-VDI: ${HIP_VDI}" )
message ( "---------KFD_WRAPPER: ${KFD_WRAPPER}" )
message ( "-----CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}" )
message ( "---CMAKE_PREFIX_PATH: ${CMAKE_PREFIX_PATH}" )
+748
Просмотреть файл
@@ -0,0 +1,748 @@
# ROC Tracer / ROC-TX Libraries Specification
```
ROC Tracer API version 2
ROC-TX API version 1
- The rocTracer API is agnostic to specific runtime and may trace
the runtime API calls and asynchronous GPU activity.
- The rocTX API is provided for application code annotation.
```
## 1. High level overview
```
The goal of the implementation is to provide a runtime independent API
for tracing of runtime calls and asynchronous activity, like GPU kernel
dispatches and memory moves. The tracing includes callback API for
runtime API tracing and activity API for asynchronous activity records
logging.
Depending on particular runtime intercepting mechanism, the rocTracer
library can be dynamically linked, dynamically loaded by the runtime as
a plugin or some API wrapper can be loaded using LD_PRELOAD.
The library has a C API.
The rocTracer library is an API that intercepts runtime API calls and
traces asynchronous activity. The activity tracing results are recorded
in a ring buffer.
The rocTX contains application code instrumentation API to support high
level correlation of runtime API/activity events. The API includes mark
and nested ranges.
```
## 2. General API
### 2.1. Description
```
The library supports method for getting the error number and error string
of the last failed library API call. It allows to check the conformance
of used library API header and the library binary, the version macros and
API methods can be used.
Returning the error and error string methods:
• roctracer_status_t – error code enumeration
• roctracer_error_string – method for returning the error string
Library version:
• ROCTRACER_VERSION_MAJOR – API major version macro
• ROCTRACER_VERSION_MINOR – API minor version macro
• roctracer_version_major – library major version
• roctracer_version_minor – library minor version
```
### 2.2. Error codes and error string methods
```
Error code enumeration:
typedef enum {
ROCTRACER_STATUS_SUCCESS = 0,
ROCTRACER_STATUS_ERROR = 1,
ROCTRACER_STATUS_UNINIT = 2,
ROCTRACER_STATUS_BREAK = 3,
ROCTRACER_STATUS_BAD_DOMAIN = 4,
ROCTRACER_STATUS_BAD_PARAMETER = 5,
ROCTRACER_STATUS_HIP_API_ERR = 6,
ROCTRACER_STATUS_HCC_OPS_ERR = 7,
ROCTRACER_STATUS_ROCTX_ERR = 8,
} roctracer_status_t;
Return error string:
const char* roctracer_error_string();
```
### 2.3. Library version
```
The library provides major and minor versions. Major version is for
incompatible API changes and minor version for bug fixes.
API version macros defined in the library API header roctracer.h:
ROCTRACER_VERSION_MAJOR
ROCTRACER_VERSION_MINOR
Methods to check library major and minor venison:
uint32_t roctracer_major_version();
uint32_t roctracer_minor_version();
```
## 3. Frontend API
### 3.1. Description
```
The rocTracer provides support for runtime API callbacks and activity
records logging. The APIs of different runtimes at different levels
are considered as different API domains with assigned domain IDs. For
example, language level and driver level. The API callbacks provide
the API calls arguments and are called on two phases on “enter” and
on “exit”. The activity records are logged to the ring buffer and can
be associated with the respective API calls using the correlation ID.
Activity API can be used to enable collecting of the records with
timestamping data for API calls and asynchronous activity like the
kernel submits, memory copies and barriers
Tracing domains:
• roctracer_domain_t – runtime API domains, HIP, HSA, etc…
• roctracer_op_string – Return Op string by given domain and
activity Op code
• roctracer_op_code – Return Op code and kind by given string
Callback API:
• roctracer_rtapi_callback_t – runtime API callback type
• roctracer_enable_op_callback – enable runtime API callback
by domain and Op code
• roctracer_enable_domain_callback – enable runtime API callback
by domain for all Ops
• roctracer_enable_callback – enable runtime API callback for
all domains, all Ops
• roctracer_disable_op_callback – disable runtime API callback
by domain and Op code
• roctracer_enable_op_callback – enable runtime API callback
by domain for all Ops
• roctracer_enable_op_callback – enable runtime API callback for
all domains, all Ops
Activity API:
• roctracer_record_t – activity record
• roctracer_pool_t – records pool type
• roctracer_allocator_t – tracer allocator type
• roctracer_buffer_callback_t – pool callback type
• roctracer_open_pool[_expl] – create records pool
• roctracer_close_pool[_expl] – close records pool
• roctracer_default_pool[_expl] – get/set default pool
• roctracer_properties_t – tracer properties
• roctracer_enable_op_activity[_expl] – enable activity records logging
• roctracer_enable_domain_activity[_expl] – enable activity records logging
• roctracer_enable_activity[_expl] – enable activity records logging
• roctracer_disable_op_activity – disable activity records logging
• roctracer_disable_domain_activity – disable activity records logging
• roctracer_disable_activity – disable activity records logging
• roctracer_flush_activity[_expl] – disable activity records logging
• roctracer_next_record – return next record
• roctracer_get_timestamp – return correlated GPU/CPU system timestamp
External correlation ID API:
• roctracer_activity_push_external_correlation_id - push an external
correlation id for the calling thread
• roctracer_activity_pop_external_correlation_id - pop an external
correlation id for the calling thread
Tracing control API:
• roctracer_start – tracing start
• roctracer_stop – tracer stop
```
### 3.2. Tracing Domains
```
Various tracing domains are supported. Each domain is assigned with
a domain ID. The domains include HSA, HIP, and HCC runtime levels.
Traced API domains:
typedef enum {
ACTIVITY_DOMAIN_HSA_API = 0, // HSA API domain
ACTIVITY_DOMAIN_HSA_OPS = 1, // HSA async activity domain
ACTIVITY_DOMAIN_HIP_API = 2, // HIP API domain
ACTIVITY_DOMAIN_HIP_OPS = 3, // HIP async activity domain
ACTIVITY_DOMAIN_KFD_API = 4, // KFD API domain
ACTIVITY_DOMAIN_EXT_API = 5, // External ID domain
ACTIVITY_DOMAIN_ROCTX = 6, // ROCTX domain
ACTIVITY_DOMAIN_NUMBER = 7
} activity_domain_t;
Return name by given domain and Op code:
const char* roctracer_op_string( // NULL returned on error and error number is set
uint32_t domain, // tracing domain
uint32_t op, // activity op code
uint32_t kind); // activity kind
Return Op code and kind by given string:
roctracer_status_t roctracer_op_code(
uint32_t domain, // tracing domain
const char* str, // [in] op string
uint32_t* op, // [out] op code
uint32_t* kind); // [out] op kind code if not NULL
```
### 3.3. Callback API
```
The tracer provides support for runtime API callbacks and activity records
logging. The API callbacks provide the API calls arguments and are called
on two phases on “enter”, on “exit”.
API phase passed to the callbacks:
typedef enum {
ROCTRACER_API_PHASE_ENTER,
ROCTRACER_API_PHASE_EXIT,
} roctracer_api_phase_t;
Runtime API callback type:
typedef void (*roctracer_rtapi_callback_t)(
uint32_t domain, // runtime API domain
uint32_t cid, // API call ID
const void* data, // [in] callback data with correlation id and the call
// arguments
void* arg); // [in/out] user passed data
Enable runtime API callbacks:
roctracer_status_t roctracer_enable_op_callback(
activity_domain_t domain, // tracing domain
uint32_t op, // API call ID
activity_rtapi_callback_t callback, // callback function pointer
void* arg); // [in/out] callback arg
roctracer_status_t roctracer_enable_domain_callback(
activity_domain_t domain, // tracing domain
activity_rtapi_callback_t callback, // callback function pointer
void* arg); // [in/out] callback arg
roctracer_status_t roctracer_enable_callback(
activity_rtapi_callback_t callback, // callback function pointer
void* arg); // [in/out] callback arg
Disable runtime API callbacks:
roctracer_status_t roctracer_disable_op_callback(
activity_domain_t domain, // tracing domain
uint32_t op); // API call ID
roctracer_status_t roctracer_disable_domain_callback(
activity_domain_t domain); // tracing domain
roctracer_status_t roctracer_disable_callback();
```
### 3.4 Activity API
```
The activity records are asynchronously logged to the pool and can be
associated with the respective API callbacks using the correlation ID.
Activity API can be used to enable collecting the records with
timestamp data for API calls and GPU activity like kernel submits,
memory copies, and barriers.
// Correlation id
typedef uint64_t activity_correlation_id_t;
Activity record type:
// Activity record type
struct activity_record_t {
uint32_t domain; // activity domain id
activity_kind_t kind; // activity kind
activity_op_t op; // activity op
activity_correlation_id_t correlation_id; // activity ID
uint64_t begin_ns; // host begin timestamp
uint64_t end_ns; // host end timestamp
union {
struct {
int device_id; // device id
uint64_t queue_id; // queue id
};
struct {
uint32_t process_id; // device id
uint32_t thread_id; // thread id
};
struct {
activity_correlation_id_t external_id; // external correlation id
};
};
size_t bytes; // data size bytes
};
Return next record:
static inline int roctracer_next_record(
const activity_record_t* record, // [in] record ptr
const activity_record_t** next); // [out] next record ptr
Tracer allocator type:
typedef void (*roctracer_allocator_t)(
char** ptr, // memory pointer
size_t size, // memory size
void* arg); // allocator arg
Pool callback type:
typedef void (*roctracer_buffer_callback_t)(
const char* begin, // [in] available buffered trace records
const char* end, // [in] end of buffered trace records
void* arg); // [in/out] callback arg
Tracer properties:
typedef struct {
uint32_t mode; // roctracer mode
size_t buffer_size; // buffer size
// power of 2
roctracer_allocator_t alloc_fun; // memory allocator
// function pointer
void* alloc_arg; // memory allocator
// function pointer
roctracer_buffer_callback_t buffer_callback_fun; // tracer record
// callback function
void* buffer_callback_arg; // tracer record
// callback arg
} roctracer_properties_t;
Tracer memory pool handle type:
typedef void roctracer_pool_t;
Create tracer memory pool:
roctracer_status_t roctracer_open_pool(
const roctracer_properties_t* properties); // tracer pool properties
roctracer_status_t roctracer_open_pool_expl(
const roctracer_properties_t* properties, // tracer pool properties
roctracer_pool_t** pool); // [out] returns tracer pool if
// not NULL, otherwise sets the
// default one if it is not set
// yet; otherwise the error is
// generated
Close tracer memory pool:
roctracer_status_t roctracer_close_pool();
roctracer_status_t roctracer_close_pool_expl(
roctracer_pool_t* pool); // memory pool, NULL means default pool
Return current default pool. Set new default pool if the argument is not NULL:
roctracer_pool_t* roctracer_default_pool();
roctracer_pool_t* roctracer_default_pool_expl(
roctracer_pool_t* pool); // new default pool if not NULL
```
Enable activity records logging:
```
roctracer_status_t roctracer_enable_op_activity(
activity_domain_t domain, // tracing domain
uint32_t op); // activity op ID
roctracer_status_t roctracer_enable_op_activity_expl(
activity_domain_t domain, // tracing domain
uint32_t op, // activity op ID
roctracer_pool_t* pool); // memory pool, NULL means default pool
roctracer_status_t roctracer_enable_domain_activity(
activity_domain_t domain); // tracing domain
roctracer_status_t roctracer_enable_domain_activity_expl(
activity_domain_t domain, // tracing domain
roctracer_pool_t* pool); // memory pool, NULL means default pool
roctracer_status_t roctracer_enable_activity();
roctracer_status_t roctracer_enable_activity_expl(
roctracer_pool_t* pool); // memory pool, NULL means default pool
Disable activity records logging:
roctracer_status_t roctracer_disable_op_activity(
activity_domain_t domain, // tracing domain
uint32_t op); // activity op ID
roctracer_status_t roctracer_disable_domain_activity(
activity_domain_t domain); // tracing domain
roctracer_status_t roctracer_disable_activity();
Flush available activity records:
roctracer_status_t roctracer_flush_activity();
roctracer_status_t roctracer_flush_activity_expl(
roctracer_pool_t* pool); // memory pool, NULL means default pool
Return correlated GPU/CPU system timestamp:
roctracer_status_t roctracer_get_timestamp(
uint64_t* timestamp); // [out] return timestamp
```
External correlation ID API
```
The API provides activity records to associate rocTracer correlation IDs with
IDs provided by external APIs. The external ID records are identified by
ACTIVITY_DOMAIN_EXT_API domain value.
Using the push method an external ID is pushed to a per CPU thread stack and
the pop method can be used to remove the last pushed ID.
An external ID record is inserted before any generated rocTracer activity record
if the same CPU external ID stack is non-empty.
Notifies that the calling thread is entering an external API region.
Push an external correlation id for the calling thread.
roctracer_status_t roctracer_activity_push_external_correlation_id(
activity_correlation_id_t id); // external correlation id
Notifies that the calling thread is leaving an external API region.
Pop an external correlation id for the calling thread.
roctracer_status_t roctracer_activity_pop_external_correlation_id(
activity_correlation_id_t* last_id); // returns the last external correlation id
// if not NULL
```
Tracing control API
```
Tracing start:
void roctracer_start();
Tracing stop:
void roctracer_stop();
```
## 4. rocTracer Usage Code Examples
### 4.1. HIP API and HCC ops, GPU Activity Tracing
```
#include <inc/roctracer_hip.h>
#include <inc/roctracer_hcc.h>
// HIP API callback function
void hip_api_callback(
uint32_t domain,
uint32_t cid,
const void* callback_data,
void* arg)
{
(void)arg;
const hip_api_data_t* data = reinterpret_cast <const hip_api_data_t*>
(callback_data);
fprintf(stdout, "<%s id(%u)\tcorrelation_id(%lu) %s> ",
roctracer_id_string(ACTIVITY_DOMAIN_HIP_API, cid),
cid,
data->correlation_id,
(data->phase == ACTIVITY_API_PHASE_ENTER) ? "on-enter" : "on-exit");
<some code . . .>
}
// Activity tracing callback
void activity_callback(const char* begin, const char* end, void* arg) {
const roctracer_record_t* record = reinterpret_cast<const
roctracer_record_t*>(begin);
const roctracer_record_t* end_record = reinterpret_cast<const
roctracer_record_t*>(end);
fprintf(stdout, "\tActivity records:\n");
while (record < end_record) {
const char * name = roctracer_op_string(record->domain,
record->activity_id, 0);
fprintf(stdout, "\t%s\tcorrelation_id(%lu) time_ns(%lu:%lu)
device_id(%d) stream_id(%lu)\n",
name,
record->correlation_id,
record->begin_ns,
record->end_ns,
record->device_id,
record->stream_id
);
<some code . . .>
ROCTRACER_CALL(roctracer_next_record(record, &record));
}
}
int main() {
// Allocating tracing pool
roctracer_properties_t properties{};
properties.buffer_size = 12;
properties.buffer_callback_fun = activity_callback;
ROCTRACER_CALL(roctracer_open_pool(&properties));
// Enable HIP API callbacks. HIP_API_ID_ANY can be used to trace all HIP
// API calls.
ROCTRACER_CALL(roctracer_enable_op_callback(ACTIVITY_DOMAIN_HIP_API,
HIP_API_ID_hipModuleLaunchKernel,
hip_api_callback, NULL));
ROCTRACER_CALL(roctracer_enable_op_acticity(ACTIVITY_DOMAIN_HIP_API,
HIP_API_ID_hipModuleLaunchKernel));
// Enable HIP kernel dispatch activity tracing
ROCTRACER_CALL(roctracer_enable_op_activity(ACTIVITY_DOMAIN_HCC_OPS,
hc::HSA_OP_ID_DISPATCH));
<test code>
// Disable tracing and closing the pool
ROCTRACER_CALL(roctracer_disable_callback());
ROCTRACER_CALL(roctracer_disable_activity());
ROCTRACER_CALL(roctracer_close_pool());
}
```
### 4.2. MatrixTranspose HIP sample with all APIs/activity tracing enabled
```
This shows a MatrixTranspose HIP sample with enabled tracing of
all HIP API and all GPU asynchronous activity.
/*
Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <iostream>
// hip header file
#include <hip/hip_runtime.h>
#ifndef ITERATIONS
# define ITERATIONS 100
#endif
#define WIDTH 1024
#define NUM (WIDTH * WIDTH)
#define THREADS_PER_BLOCK_X 4
#define THREADS_PER_BLOCK_Y 4
#define THREADS_PER_BLOCK_Z 1
// Device (Kernel) function, it must be void
// hipLaunchParm provides the execution configuration
__global__ void matrixTranspose(hipLaunchParm lp, float* out, float* in,
const int width) {
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
out[y * width + x] = in[x * width + y];
}
// CPU implementation of matrix transpose
void matrixTransposeCPUReference(float* output, float* input, const unsigned
int width) {
for (unsigned int j = 0; j < width; j++) {
for (unsigned int i = 0; i < width; i++) {
output[i * width + j] = input[j * width + i];
}
}
}
int iterations = ITERATIONS;
void start_tracing();
void stop_tracing();
int main() {
float* Matrix;
float* TransposeMatrix;
float* cpuTransposeMatrix;
float* gpuMatrix;
float* gpuTransposeMatrix;
hipDeviceProp_t devProp;
hipGetDeviceProperties(&devProp, 0);
std::cout << "Device name " << devProp.name << std::endl;
int i;
int errors;
while (iterations-- > 0) {
start_tracing();
Matrix = (float*)malloc(NUM * sizeof(float));
TransposeMatrix = (float*)malloc(NUM * sizeof(float));
cpuTransposeMatrix = (float*)malloc(NUM * sizeof(float));
// initialize the input data
for (i = 0; i < NUM; i++) {
Matrix[i] = (float)i * 10.0f;
}
// allocate the memory on the device side
hipMalloc((void**)&gpuMatrix, NUM * sizeof(float));
hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float));
// Memory transfer from host to device
hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float),
hipMemcpyHostToDevice);
// Lauching kernel from host
hipLaunchKernel(matrixTranspose,
dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH /
THREADS_PER_BLOCK_Y),
dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0,
gpuTransposeMatrix, gpuMatrix, WIDTH);
// Memory transfer from device to host
hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float),
hipMemcpyDeviceToHost);
// CPU MatrixTranspose computation
matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH);
// verify the results
errors = 0;
double eps = 1.0E-6;
for (i = 0; i < NUM; i++) {
if (std::abs(TransposeMatrix[i] - cpuTransposeMatrix[i]) > eps) {
errors++;
}
}
if (errors != 0) {
printf("FAILED: %d errors\n", errors);
} else {
printf("PASSED!\n");
}
// free the resources on device side
hipFree(gpuMatrix);
hipFree(gpuTransposeMatrix);
// free the resources on host side
free(Matrix);
free(TransposeMatrix);
free(cpuTransposeMatrix);
stop_tracing();
}
return errors;
}
/////////////////////////////////////////////////////////////////////////////
// HIP/HCC Callbacks/Activity tracing
/////////////////////////////////////////////////////////////////////////////
#include <inc/roctracer_hip.h>
#include <inc/roctracer_hcc.h>
// Macro to check ROC-tracer calls status
#define ROCTRACER_CALL(call) \
do { \
int err = call; \
if (err != 0) { \
std::cerr << roctracer_error_string() << std::endl << std::flush; \
abort(); \
} \
} while (0)
// HIP API callback function
void hip_api_callback(
uint32_t domain,
uint32_t cid,
const void* callback_data,
void* arg)
{
(void)arg;
const hip_api_data_t* data = reinterpret_cast<const hip_api_data_t*>
(callback_data);
fprintf(stdout, "<%s id(%u)\tcorrelation_id(%lu) %s> ",
roctracer_op_string(ACTIVITY_DOMAIN_HIP_API, cid, 0),
cid,
data->correlation_id,
(data->phase == ACTIVITY_API_PHASE_ENTER) ? "on-enter" : "on-exit");
if (data->phase == ACTIVITY_API_PHASE_ENTER) {
switch (cid) {
case HIP_API_ID_hipMemcpy:
fprintf(stdout, "dst(%p) src(%p) size(0x%x) kind(%u)",
data->args.hipMemcpy.dst,
data->args.hipMemcpy.src,
(uint32_t)(data->args.hipMemcpy.sizeBytes),
(uint32_t)(data->args.hipMemcpy.kind));
break;
case HIP_API_ID_hipMalloc:
fprintf(stdout, "ptr(%p) size(0x%x)",
data->args.hipMalloc.ptr,
(uint32_t)(data->args.hipMalloc.size));
break;
case HIP_API_ID_hipFree:
fprintf(stdout, "ptr(%p),
data->args.hipFree.ptr);
break;
case HIP_API_ID_hipModuleLaunchKernel:
fprintf(stdout, "kernel(\"%s\") stream(%p)",
hipKernelNameRef(data->args.hipModuleLaunchKernel.f),
data->args.hipModuleLaunchKernel.stream);
break;
default:
break;
}
} else {
switch (cid) {
case HIP_API_ID_hipMalloc:
fprintf(stdout, "*ptr(0x%p)",
*(data->args.hipMalloc.ptr));
break;
default:
break;
}
}
fprintf(stdout, "\n"); fflush(stdout);
}
// Activity tracing callback
// hipMalloc id(3) correlation_id(1):
// begin_ns(1525888652762640464) end_ns(1525888652762877067)
void activity_callback(const char* begin, const char* end, void* arg) {
const roctracer_record_t* record = reinterpret_cast
<const roctracer_record_t*>(begin);
const roctracer_record_t* end_record = reinterpret_cast
<const roctracer_record_t*>(end);
fprintf(stdout, "\tActivity records:\n"); fflush(stdout);
while (record < end_record) {
const char * name = roctracer_op_string(record->domain,
record->activity_id, 0);
fprintf(stdout, "\t%s\tcorrelation_id(%lu) time_ns(%lu:%lu) \
device_id(%d) stream_id(%lu)",
name,
record->correlation_id,
record->begin_ns,
record->end_ns,
record->device_id,
record->stream_id
);
if (record->kind == hc::HSA_OP_ID_COPY)
fprintf(stdout, " bytes(0x%zx)", record->bytes);
fprintf(stdout, "\n");
fflush(stdout);
ROCTRACER_CALL(roctracer_next_record(record, &record));
}
}
// Start tracing routine
void start_tracing() {
std::cout << "# START #############################" << std::endl
<< std::flush;
// Allocating tracing pool
roctracer_properties_t properties{};
properties.buffer_size = 0x1000;
properties.buffer_callback_fun = activity_callback;
ROCTRACER_CALL(roctracer_open_pool(&properties));
// Enable API callbacks, all domains
ROCTRACER_CALL(roctracer_enable_callback(hip_api_callback, NULL));
// Enable activity tracing, all domains
ROCTRACER_CALL(roctracer_enable_activity());
}
// Stop tracing routine
void stop_tracing() {
ROCTRACER_CALL(roctracer_disable_api_callback());
ROCTRACER_CALL(roctracer_disable_api_activity());
ROCTRACER_CALL(roctracer_close_pool());
std::cout << "# STOP #############################" << std::endl
<< std::flush;
}
/////////////////////////////////////////////////////////////////////////////
```
## 5. rocTX application code annotation API
```
Basic annotation API: markers and nested ranges.
// A marker created by given ASCII massage
void roctxMark(const char* message);
// Returns the 0 based level of a nested range being started by given message associated to this range.
// A negative value is returned on the error.
int roctxRangePush(const char* message);
// Marks the end of a nested range.
// Returns the 0 based level the range.
// A negative value is returned on the error.
int roctxRangePop();
```
+17 -7
Просмотреть файл
@@ -29,9 +29,10 @@ THE SOFTWARE.
typedef enum {
ACTIVITY_DOMAIN_HSA_API = 0, // HSA API domain
ACTIVITY_DOMAIN_HSA_OPS = 1, // HSA async activity domain
ACTIVITY_DOMAIN_HCC_OPS = 2, // HCC async activity domain
ACTIVITY_DOMAIN_HIP_OPS = 2, // HIP async activity domain
ACTIVITY_DOMAIN_HCC_OPS = ACTIVITY_DOMAIN_HIP_OPS, // HCC async activity domain
ACTIVITY_DOMAIN_HIP_VDI = ACTIVITY_DOMAIN_HIP_OPS, // HIP VDI async activity domain
ACTIVITY_DOMAIN_HIP_API = 3, // HIP API domain
ACTIVITY_DOMAIN_HIP_VDI = ACTIVITY_DOMAIN_HCC_OPS, // HIP VDI domain
ACTIVITY_DOMAIN_KFD_API = 4, // KFD API domain
ACTIVITY_DOMAIN_EXT_API = 5, // External ID domain
ACTIVITY_DOMAIN_ROCTX = 6, // ROCTX domain
@@ -60,13 +61,22 @@ typedef enum {
typedef uint64_t activity_correlation_id_t;
// Activity record type
struct activity_record_t {
typedef struct activity_record_s {
uint32_t domain; // activity domain id
activity_kind_t kind; // activity kind
activity_op_t op; // activity op
activity_correlation_id_t correlation_id; // activity ID
uint64_t begin_ns; // host begin timestamp
uint64_t end_ns; // host end timestamp
union {
struct {
activity_correlation_id_t correlation_id; // activity ID
uint64_t begin_ns; // host begin timestamp
uint64_t end_ns; // host end timestamp
};
struct {
uint32_t se; // sampled SE
uint64_t cycle; // sample cycle
uint64_t pc; // sample PC
} pc_sample;
};
union {
struct {
int device_id; // device id
@@ -81,7 +91,7 @@ struct activity_record_t {
};
};
size_t bytes; // data size bytes
};
} activity_record_t;
// Activity sync calback type
typedef void* (*activity_sync_callback_t)(uint32_t cid, activity_record_t* record, const void* data, void* arg);
+60 -26
Просмотреть файл
@@ -38,10 +38,13 @@ THE SOFTWARE.
#include <stdint.h>
#include <stddef.h>
#ifndef __cplusplus
#include <stdbool.h>
#endif
#include "ext/prof_protocol.h"
#define ROCTRACER_VERSION_MAJOR 1
#define ROCTRACER_VERSION_MAJOR 2
#define ROCTRACER_VERSION_MINOR 0
#ifdef __cplusplus
@@ -64,6 +67,7 @@ typedef enum {
ROCTRACER_STATUS_BAD_PARAMETER = 5,
ROCTRACER_STATUS_HIP_API_ERR = 6,
ROCTRACER_STATUS_HCC_OPS_ERR = 7,
ROCTRACER_STATUS_HSA_ERR = 7,
ROCTRACER_STATUS_ROCTX_ERR = 8,
} roctracer_status_t;
@@ -89,7 +93,7 @@ roctracer_status_t roctracer_op_code(
uint32_t domain, // tracing domain
const char* str, // [in] op string
uint32_t* op, // [out] op code
uint32_t* kind = NULL); // [out] op kind code
uint32_t* kind); // [out] op kind code if not NULL
////////////////////////////////////////////////////////////////////////////////
// Callback API
@@ -137,7 +141,7 @@ roctracer_status_t roctracer_disable_callback();
typedef activity_record_t roctracer_record_t;
// Return next record
static inline int roctracer_next_record(
static inline roctracer_status_t roctracer_next_record(
const activity_record_t* record, // [in] record ptr
const activity_record_t** next) // [out] next record ptr
{
@@ -172,31 +176,59 @@ typedef void roctracer_pool_t;
// Create tracer memory pool
// The first invocation sets the default pool
roctracer_status_t roctracer_open_pool(
roctracer_status_t roctracer_open_pool_expl(
const roctracer_properties_t* properties, // tracer pool properties
roctracer_pool_t** pool = NULL); // [out] returns tracer pool if not NULL,
roctracer_pool_t** pool); // [out] returns tracer pool if not NULL,
// otherwise sets the default one if it is not set yet
static inline roctracer_status_t roctracer_open_pool(
const roctracer_properties_t* properties) // tracer pool properties
{
return roctracer_open_pool_expl(properties, NULL);
}
// otherwise the error is generated
// Close tracer memory pool
roctracer_status_t roctracer_close_pool(
roctracer_pool_t* pool = NULL); // [in] memory pool, NULL is a default one
roctracer_status_t roctracer_close_pool_expl(
roctracer_pool_t* pool); // [in] memory pool, NULL is a default one
static inline roctracer_status_t roctracer_close_pool()
{
return roctracer_close_pool_expl(NULL);
}
// Return current default pool
// Set new default pool if the argument is not NULL
roctracer_pool_t* roctracer_default_pool(
roctracer_pool_t* pool = NULL); // [in] new default pool if not NULL
roctracer_pool_t* roctracer_default_pool_expl(
roctracer_pool_t* pool); // [in] new default pool if not NULL
static inline roctracer_pool_t* roctracer_default_pool()
{
return roctracer_default_pool_expl(NULL);
}
// Enable activity records logging
roctracer_status_t roctracer_enable_op_activity(
roctracer_status_t roctracer_enable_op_activity_expl(
activity_domain_t domain, // tracing domain
uint32_t op, // activity op ID
roctracer_pool_t* pool = NULL); // memory pool, NULL is a default one
roctracer_status_t roctracer_enable_domain_activity(
roctracer_pool_t* pool); // memory pool, NULL is a default one
static inline roctracer_status_t roctracer_enable_op_activity(
activity_domain_t domain, // tracing domain
roctracer_pool_t* pool = NULL); // memory pool, NULL is a default one
roctracer_status_t roctracer_enable_activity(
roctracer_pool_t* pool = NULL); // memory pool, NULL is a default one
uint32_t op) // activity op ID
{
return roctracer_enable_op_activity_expl(domain, op, NULL);
}
roctracer_status_t roctracer_enable_domain_activity_expl(
activity_domain_t domain, // tracing domain
roctracer_pool_t* pool); // memory pool, NULL is a default one
static inline roctracer_status_t roctracer_enable_domain_activity(
activity_domain_t domain) // tracing domain
{
return roctracer_enable_domain_activity_expl(domain, NULL);
}
roctracer_status_t roctracer_enable_activity_expl(
roctracer_pool_t* pool); // memory pool, NULL is a default one
static inline roctracer_status_t roctracer_enable_activity()
{
return roctracer_enable_activity_expl(NULL);
}
// Disable activity records logging
roctracer_status_t roctracer_disable_op_activity(
@@ -207,24 +239,26 @@ roctracer_status_t roctracer_disable_domain_activity(
roctracer_status_t roctracer_disable_activity();
// Flush available activity records
roctracer_status_t roctracer_flush_activity(
roctracer_pool_t* pool = NULL); // memory pool, NULL is a default one
roctracer_status_t roctracer_flush_activity_expl(
roctracer_pool_t* pool); // memory pool, NULL is a default one
static inline roctracer_status_t roctracer_flush_activity()
{
return roctracer_flush_activity_expl(NULL);
}
// Get system timestamp
roctracer_status_t roctracer_get_timestamp(
uint64_t* timestamp); // [out] return timestamp
// Load/Unload methods
bool roctracer_load();
void roctracer_unload();
// Set properties
roctracer_status_t roctracer_set_properties(
roctracer_domain_t domain, // tracing domain
void* propertes); // tracing properties
struct HsaApiTable;
bool roctracer_load(
HsaApiTable* table,
uint64_t runtime_version,
uint64_t failed_tool_count,
const char* const* failed_tool_names);
void roctracer_unload(bool destruct);
#ifdef __cplusplus
} // extern "C" block
#endif // __cplusplus
+3 -3
Просмотреть файл
@@ -46,7 +46,7 @@ extern "C" {
#endif // __cplusplus
////////////////////////////////////////////////////////////////////////////////
// Application annotatin API
// Application annotation API
// Tracing start API
void roctracer_start();
@@ -63,8 +63,8 @@ roctracer_status_t roctracer_activity_push_external_correlation_id(activity_corr
// Notifies that the calling thread is leaving an external API region.
// Pop an external correlation id for the calling thread.
// 'lastId' returns the last external correlation
roctracer_status_t roctracer_activity_pop_external_correlation_id(activity_correlation_id_t* last_id = NULL);
// 'lastId' returns the last external correlation if not NULL
roctracer_status_t roctracer_activity_pop_external_correlation_id(activity_correlation_id_t* last_id);
#ifdef __cplusplus
} // extern "C" block
+10 -14
Просмотреть файл
@@ -23,26 +23,22 @@ THE SOFTWARE.
#ifndef INC_ROCTRACER_HCC_H_
#define INC_ROCTRACER_HCC_H_
#if HIP_VDI
#define HIP_OP_ID_NUMBER 3
#define HIP_OP_ID_COPY 1
enum {
HIP_OP_ID_DISPATCH = 0,
HIP_OP_ID_COPY = 1,
HIP_OP_ID_BARRIER = 2,
HIP_OP_ID_NUMBER = 3
};
#ifdef __cplusplus
extern "C" {
#endif
typedef void (hipInitAsyncActivityCallback_t)(void* id_callback, void* op_callback, void* arg);
typedef bool (hipEnableAsyncActivityCallback_t)(unsigned op, bool enable);
typedef const char* (hipGetOpName_t)(unsigned op);
#ifdef __cplusplus
}
#else // !HIP_VDI
#if LOCAL_BUILD
#include <hc_prof_runtime.h>
#else
#include <hcc/hc_prof_runtime.h>
#endif
#define HIP_OP_ID_NUMBER hc::HSA_OP_ID_NUMBER
#define HIP_OP_ID_COPY hc::HSA_OP_ID_COPY
typedef decltype(Kalmar::CLAMP::InitActivityCallback) hipInitAsyncActivityCallback_t;
typedef decltype(Kalmar::CLAMP::EnableActivityCallback) hipEnableAsyncActivityCallback_t;
typedef decltype(Kalmar::CLAMP::GetCmdName) hipGetOpName_t;
#endif // !HIP_VDI
#include "roctracer.h"
+1 -1
Просмотреть файл
@@ -33,7 +33,7 @@ extern "C" {
#endif // __cplusplus
// Traced calls ID enumeration
typedef hip_api_id_t roctracer_hip_api_cid_t;
typedef enum hip_api_id_t roctracer_hip_api_cid_t;
#ifdef __cplusplus
} // extern "C" block
+17 -4
Просмотреть файл
@@ -22,16 +22,25 @@ THE SOFTWARE.
#ifndef INC_ROCTRACER_HSA_H_
#define INC_ROCTRACER_HSA_H_
#include <iostream>
#include <mutex>
#include <hsa.h>
#include <hsa_api_trace.h>
#include <hsa_ext_amd.h>
#include "cb_table.h"
#include "roctracer.h"
// HSA OP ID enumeration
enum hsa_op_id_t {
HSA_OP_ID_DISPATCH = 0,
HSA_OP_ID_COPY = 1,
HSA_OP_ID_BARRIER = 2,
HSA_OP_ID_PCSAMPLE = 3,
HSA_OP_ID_NUMBER = 4
};
#ifdef __cplusplus
#include <iostream>
#include <hsa_api_trace.h>
namespace roctracer {
namespace hsa_support {
enum {
@@ -129,6 +138,10 @@ struct output_streamer<hsa_queue_t**> {
inline static std::ostream& put(std::ostream& out, hsa_queue_t** v) { out << "<queue " << *v << ">"; return out; }
};
};};
#else // !__cplusplus
typedef void* hsa_amd_queue_intercept_handler;
typedef void* hsa_amd_runtime_queue_notifier;
#endif //! __cplusplus
#include "inc/hsa_prof_str.h"
#endif // INC_ROCTRACER_HSA_H_
+2 -2
Просмотреть файл
@@ -23,11 +23,11 @@ THE SOFTWARE.
/////////////////////////////////////////////////////////////////////////////
#ifndef INC_ROCTRACER_KFD_H_
#define INC_ROCTRACER_KFD_H_
#include <iostream>
#include "roctracer.h"
#include "hsakmt.h"
#ifdef __cplusplus
#include "inc/kfd_ostream_ops.h"
#endif
#include "inc/kfd_prof_str.h"
#endif // INC_ROCTRACER_KFD_H_
+5 -6
Просмотреть файл
@@ -33,8 +33,6 @@ THE SOFTWARE.
#ifndef INC_ROCTRACER_ROCTX_H_
#define INC_ROCTRACER_ROCTX_H_
#include "cb_table.h"
// ROC-TX API ID enumeration
enum roctx_api_id_t {
ROCTX_API_ID_roctxMarkA = 0,
@@ -45,7 +43,7 @@ enum roctx_api_id_t {
};
// ROCTX callbacks data type
struct roctx_api_data_t {
typedef struct roctx_api_data_s {
union {
const char* message;
struct {
@@ -58,14 +56,15 @@ struct roctx_api_data_t {
const char* message;
} roctxRangePop;
} args;
};
} roctx_api_data_t;
#ifdef __cplusplus
#include "cb_table.h"
namespace roctx {
// ROCTX callbacks table type
typedef roctracer::CbTable<ROCTX_API_ID_NUMBER> cb_table_t;
} // namespace roctx
#endif
#ifdef __cplusplus
extern "C" {
Обычный файл → Исполняемый файл
+41 -32
Просмотреть файл
@@ -76,7 +76,7 @@ HEADER = \
'\n'
structs_done = {}
def process_struct(f,c,cppHeader,nname):
def process_struct(f,c,cppHeader,nname,apiname):
if c not in cppHeader.classes:
return
@@ -104,9 +104,9 @@ def process_struct(f,c,cppHeader,nname):
if mtype != "" and "union" not in mtype:
if array_size == "":
str = " roctracer::kfd_support::output_streamer<"+mtype+">::put(out,v."+name+");\n"
str = " roctracer::" + apiname.lower() + "_support::output_streamer<"+mtype+">::put(out,v."+name+");\n"
else:
str = " roctracer::kfd_support::output_streamer<"+mtype+"["+array_size+"]>::put(out,v."+name+");\n"
str = " roctracer::" + apiname.lower() + "_support::output_streamer<"+mtype+"["+array_size+"]>::put(out,v."+name+");\n"
if nname != "" and nname not in str:
#print("injecting ",nname, "in ", str)
@@ -115,70 +115,79 @@ def process_struct(f,c,cppHeader,nname):
f.write(str)
else:
nc = prop+"::"
process_struct(f,nc,cppHeader,name)
process_struct(f,nc,cppHeader,name,apiname)
nc = prop+"::"+mtype+" "
process_struct(f,nc,cppHeader,name)
process_struct(f,nc,cppHeader,name,apiname)
nc = c+"::"
process_struct(f,nc,cppHeader,name)
process_struct(f,nc,cppHeader,name,apiname)
def gen_cppheader(infilepath,outfilepath):
def gen_cppheader(infilepath, outfilepath):
try:
cppHeader = CppHeaderParser.CppHeader(infilepath)
except CppHeaderParser.CppParseError as e:
print(e)
sys.exit(1)
mpath = os.path.dirname(outfilepath)
if mpath == "":
mpath = os.getcwd()
apiname = outfilepath.replace(mpath+"/","")
apiname = apiname.replace("_ostream_ops.h","")
apiname = apiname.upper()
f = open(outfilepath,"w+")
f2 = open(mpath + "/basic_ostream_ops.h","w+")
f.write("// automatically generated\n")
f.write(LICENSE)
f.write("\n")
f2.write("// automatically generated\n")
f.write(LICENSE + '\n')
f2.write(LICENSE + '\n')
HEADER_S = \
'#ifndef INC_KFD_OSTREAM_OPS_H_\n' + \
'#define INC_KFD_OSTREAM_OPS_H_\n' + \
'#ifndef INC_' + apiname + '_OSTREAM_OPS_H_\n' + \
'#define INC_' + apiname + '_OSTREAM_OPS_H_\n' + \
'#include <iostream>\n' + \
'\n' + \
'#include "roctracer.h"\n' + \
'#include "hsakmt.h"\n'
'#include "roctracer.h"\n'
f.write(HEADER_S)
f.write('\n')
f.write('namespace roctracer {\n')
f.write('namespace kfd_support {\n')
f.write('// begin ostream ops for KFD \n')
f.write(HEADER)
f.write('namespace ' + apiname.lower() + '_support {\n')
f.write('// begin ostream ops for '+ apiname + ' \n')
f.write('#include "basic_ostream_ops.h"' + '\n')
f2.write(HEADER)
for c in cppHeader.classes:
if "union" in c:
continue
f.write("\ntemplate<>\n")
f.write("struct output_streamer<"+c+"&> {\n")
f.write(" inline static std::ostream& put(std::ostream& out, "+c+"& v)\n")
f.write("{\n")
process_struct(f,c,cppHeader,"")
f.write(" return out;\n")
f.write("}\n")
f.write("};\n")
if len(cppHeader.classes[c]["properties"]["public"])!=0:
f.write("\ntemplate<>\n")
f.write("struct output_streamer<"+c+"&> {\n")
f.write(" inline static std::ostream& put(std::ostream& out, "+c+"& v)\n")
f.write("{\n")
process_struct(f,c,cppHeader,"",apiname)
f.write(" return out;\n")
f.write("}\n")
f.write("};\n")
FOOTER = \
'// end ostream ops for KFD \n'
'// end ostream ops for '+ apiname + ' \n'
FOOTER += '};};\n' + \
'\n' + \
'#endif // INC_KFD_OSTREAM_OPS_H_\n' + \
'#endif // INC_' + apiname + '_OSTREAM_OPS_H_\n' + \
' \n'
FOOTER2 = '\n\n' + \
'#endif // INC_BASIC_OSTREAM_OPS_H_\n' + \
' \n'
f.write(FOOTER)
f.close()
f2.close()
print('File ' + outfilepath + ' generated')
print('File ' + mpath + '/basic_ostream_ops.h generated')
return
parser = argparse.ArgumentParser(description='genOstreamOps.py: generates ostream operators for all typedefs in provided input file.')
requiredNamed = parser.add_argument_group('Required arguments')
requiredNamed.add_argument('-in','--in', help='Header file to be parsed', required=True)
requiredNamed.add_argument('-out','--out', help='Output file with ostream operators', required=True)
requiredNamed.add_argument('-in', metavar='file', help='Header file to be parsed', required=True)
requiredNamed.add_argument('-out', metavar='file', help='Output file with ostream operators', required=True)
args = vars(parser.parse_args())
if __name__ == '__main__':
gen_cppheader(args['in'],args['out'])
+8 -5
Просмотреть файл
@@ -1,4 +1,5 @@
#!/usr/bin/python
from __future__ import print_function
import os, sys, re
OUT='inc/hsa_prof_str.h'
@@ -36,7 +37,7 @@ LICENSE = \
#############################################################
# Error handler
def fatal(module, msg):
print >>sys.stderr, module + ' Error: "' + msg + '"'
print (module + ' Error: "' + msg + '"', file = sys.stderr)
sys.exit(1)
# Get next text block
@@ -342,8 +343,8 @@ class API_DescrParser:
self.content += ' ' + self.api_id[call] + ' = ' + str(n) + ',\n'
else:
self.content += '\n'
self.content += ' HSA_API_ID_NUMBER = ' + str(n) + ',\n'
self.content += ' HSA_API_ID_ANY = ' + str(n + 1) + ',\n'
self.content += ' HSA_API_ID_DISPATCH = ' + str(n) + ',\n'
self.content += ' HSA_API_ID_NUMBER = ' + str(n + 1) + ',\n'
self.content += '};\n'
# generate API args structure
@@ -440,6 +441,7 @@ class API_DescrParser:
# generate stream operator
def gen_out_stream(self, n, name, call, struct):
if n == -1:
self.content += '#ifdef __cplusplus\n'
self.content += 'typedef std::pair<uint32_t, hsa_api_data_t> hsa_api_data_pair_t;\n'
self.content += 'inline std::ostream& operator<< (std::ostream& out, const hsa_api_data_pair_t& data_pair) {\n'
self.content += ' const uint32_t cid = data_pair.first;\n'
@@ -483,12 +485,13 @@ class API_DescrParser:
self.content += ' }\n'
self.content += ' return out;\n'
self.content += '}\n'
self.content += '#endif\n'
#############################################################
# main
# Usage
if len(sys.argv) != 3:
print >>sys.stderr, "Usage:", sys.argv[0], " <rocTracer root> <HSA runtime include path>"
print ("Usage:", sys.argv[0], " <rocTracer root> <HSA runtime include path>", file=sys.stderr)
sys.exit(1)
else:
ROOT = sys.argv[1] + '/'
@@ -497,7 +500,7 @@ else:
descr = API_DescrParser(OUT, HSA_DIR, API_TABLES_H, API_HEADERS_H, LICENSE)
out_file = ROOT + OUT
print 'Generating "' + out_file + '"'
print ('Generating "' + out_file + '"')
f = open(out_file, 'w')
f.write(descr.content[:-1])
f.close()
+12 -10
Просмотреть файл
@@ -1,4 +1,5 @@
#!/usr/bin/python
from __future__ import print_function
import os, sys, re
OUT_H = 'inc/kfd_prof_str.h'
@@ -33,7 +34,7 @@ LICENSE = \
#############################################################
# Error handler
def fatal(module, msg):
print >>sys.stderr, module + ' Error: "' + msg + '"'
print (module + ' Error: "' + msg + '"', file = sys.stderr)
sys.exit(1)
# Get next text block
@@ -284,7 +285,6 @@ class API_DescrParser:
self.content_h += '#include <string.h>\n'
self.content_h += '#include \"roctracer_kfd.h\"\n'
self.content_h += '#include \"hsakmt.h\"\n'
self.content_h += '#include \"cb_table.h\"\n'
self.content_h += '#define PUBLIC_API __attribute__((visibility(\"default\")))\n'
@@ -293,6 +293,7 @@ class API_DescrParser:
self.content_h += '\n'
self.content_h += '#if PROF_API_IMPL\n'
self.content_h += '#include \"cb_table.h\"\n'
self.content_h += 'namespace roctracer {\n'
self.content_h += 'namespace kfd_support {\n'
@@ -372,7 +373,7 @@ class API_DescrParser:
# generate API args structure
def gen_arg_struct(self, n, name, call, struct):
if n == -1:
self.content_h += 'struct kfd_api_data_t {\n'
self.content_h += 'typedef struct kfd_api_data_s {\n'
self.content_h += ' uint64_t correlation_id;\n'
self.content_h += ' uint32_t phase;\n'
if len(self.api_rettypes) != 0:
@@ -394,7 +395,7 @@ class API_DescrParser:
self.content_h += ' } ' + call + ';\n'
else:
self.content_h += ' } args;\n'
self.content_h += '};\n'
self.content_h += '} kfd_api_data_t;\n'
# generate API callbacks
def gen_callbacks(self, n, name, call, struct):
@@ -406,8 +407,7 @@ class API_DescrParser:
call_id = self.api_id[call];
ret_type = struct['ret']
self.content_h += ret_type + ' ' + call + '_callback(' + struct['args'] + ') {\n' # 'static ' +
if call == 'hsaKmtOpenKFD':
self.content_h += ' if (' + name + '_table == NULL) intercept_KFDApiTable();\n'
self.content_h += ' if (' + name + '_table == NULL) intercept_KFDApiTable();\n'
self.content_h += ' kfd_api_data_t api_data{};\n'
for var in struct['alst']:
self.content_h += ' api_data.args.' + call + '.' + var.replace("[]","") + ' = ' + var.replace("[]","") + ';\n'
@@ -477,6 +477,7 @@ class API_DescrParser:
# generate stream operator
def gen_out_stream(self, n, name, call, struct):
if n == -1:
self.content_h += '#ifdef __cplusplus\n'
self.content_h += 'typedef std::pair<uint32_t, kfd_api_data_t> kfd_api_data_pair_t;\n'
self.content_h += 'inline std::ostream& operator<< (std::ostream& out, const kfd_api_data_pair_t& data_pair) {\n'
self.content_h += ' const uint32_t cid = data_pair.first;\n'
@@ -510,6 +511,7 @@ class API_DescrParser:
self.content_h += ' }\n'
self.content_h += ' return out;\n'
self.content_h += '}\n'
self.content_h += '#endif\n'
self.content_cpp += 'inline std::ostream& operator<< (std::ostream& out, const HsaMemFlags& v) { out << "HsaMemFlags"; return out; }\n'
# generate PUBLIC_API for all API fcts
@@ -525,7 +527,7 @@ class API_DescrParser:
self.content_cpp += ' return true;\n';
self.content_cpp += '}\n\n';
if call != '-':
if call != '-' and call != 'hsaKmtCloseKFD' and call != 'hsaKmtOpenKFD':
self.content_cpp += 'PUBLIC_API ' + struct['ret'] + " " + call + '(' + struct['args'] + ') { return roctracer::kfd_support::' + call + '_callback('
for i in range(0,len(struct['alst'])):
if i == (len(struct['alst'])-1):
@@ -538,7 +540,7 @@ class API_DescrParser:
# main
# Usage
if len(sys.argv) != 3:
print >>sys.stderr, "Usage:", sys.argv[0], " <rocTracer root> <KFD include path>"
print ("Usage:", sys.argv[0], " <rocTracer root> <KFD include path>", file = sys.stderr)
sys.exit(1)
else:
ROOT = sys.argv[1] + '/'
@@ -547,13 +549,13 @@ else:
descr = API_DescrParser(OUT_H, KFD_DIR, API_HEADERS_H, LICENSE)
out_file = ROOT + OUT_H
print 'Generating "' + out_file + '"'
print ('Generating "' + out_file + '"')
f = open(out_file, 'w')
f.write(descr.content_h[:-1])
f.close()
out_file = ROOT + OUT_CPP
print 'Generating "' + out_file + '"'
print ('Generating "' + out_file + '"')
f = open(out_file, 'w')
f.write(descr.content_cpp[:-1])
f.close()
+10 -11
Просмотреть файл
@@ -17,17 +17,16 @@ target_link_libraries( ${TARGET_LIB} PRIVATE ${HSA_RUNTIME_LIB} c stdc++ )
execute_process ( COMMAND sh -xc "${ROOT_DIR}/script/hsaap.py ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH}" )
# Generating KFD/Thunk tracing primitives
if ( DEFINED KFD_WRAPPER )
set ( KFD_LIB "kfdwrapper64" )
set ( KFD_LIB_SRC
${LIB_DIR}/kfd/kfd_wrapper.cpp
)
execute_process ( COMMAND sh -xc "${ROOT_DIR}/script/gen_ostream_ops.py -in ${HSA_KMT_INC_PATH}/hsakmttypes.h -out ${ROOT_DIR}/inc/kfd_ostream_ops.h" )
add_library ( ${KFD_LIB} SHARED ${KFD_LIB_SRC} )
target_include_directories ( ${KFD_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} ${ROOT_DIR}/inc ${HSA_RUNTIME_INC_PATH} ${HSA_RUNTIME_HSA_INC_PATH} ${HSA_KMT_INC_PATH} )
target_link_libraries( ${KFD_LIB} PRIVATE c stdc++ )
execute_process ( COMMAND sh -xc "${ROOT_DIR}/script/kfdap.py ${ROOT_DIR} ${HSA_KMT_INC_PATH}" )
endif()
set ( KFD_LIB "kfdwrapper64" )
set ( KFD_LIB_SRC
${LIB_DIR}/kfd/kfd_wrapper.cpp
)
execute_process ( COMMAND sh -xc "${CMAKE_CXX_COMPILER} -E ${HSA_KMT_INC_PATH}/hsakmttypes.h > ${PROJECT_BINARY_DIR}/hsakmttypes_pp.h" )
execute_process ( COMMAND sh -xc "${ROOT_DIR}/script/gen_ostream_ops.py -in ${PROJECT_BINARY_DIR}/hsakmttypes_pp.h -out ${ROOT_DIR}/inc/kfd_ostream_ops.h" )
add_library ( ${KFD_LIB} SHARED ${KFD_LIB_SRC} )
target_include_directories ( ${KFD_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} ${ROOT_DIR}/inc ${HSA_RUNTIME_INC_PATH} ${HSA_RUNTIME_HSA_INC_PATH} ${HSA_KMT_INC_PATH} )
target_link_libraries( ${KFD_LIB} PRIVATE c stdc++ )
execute_process ( COMMAND sh -xc "${ROOT_DIR}/script/kfdap.py ${ROOT_DIR} ${HSA_KMT_INC_PATH}" )
set ( ROCTX_LIB "roctx64" )
set ( ROCTX_LIB_SRC
+1 -1
Просмотреть файл
@@ -47,7 +47,7 @@ class Journal {
}
~Journal() {
for (auto& val : map_) delete val.second;
for (auto& val : *map_) delete val.second;
delete map_;
}
+47 -10
Просмотреть файл
@@ -21,11 +21,10 @@ class BaseLoader : public T {
if (handle_ == NULL) return NULL;
fun_t *f = (fun_t*) dlsym(handle_, fun_name);
if (f == NULL) {
if ((to_check_symb_ == true) && (f == NULL)) {
fprintf(stderr, "roctracer: symbol lookup '%s' failed: \"%s\"\n", fun_name, dlerror());
abort();
}
dlerror();
return f;
}
@@ -46,13 +45,12 @@ class BaseLoader : public T {
private:
BaseLoader() {
const int flags = RTLD_LAZY;
const int flags = (to_load_ == true) ? RTLD_LAZY : RTLD_LAZY|RTLD_NOLOAD;
handle_ = dlopen(lib_name_, flags);
if (handle_ == NULL) {
if ((to_check_open_ == true) && (handle_ == NULL)) {
fprintf(stderr, "roctracer: Loading '%s' failed, %s\n", lib_name_, dlerror());
abort();
}
dlerror();
T::init(this);
}
@@ -61,12 +59,43 @@ class BaseLoader : public T {
if (handle_ != NULL) dlclose(handle_);
}
static bool to_load_;
static bool to_check_open_;
static bool to_check_symb_;
static mutex_t mutex_;
static const char* lib_name_;
static std::atomic<loader_t*> instance_;
void* handle_;
};
// 'rocprofiler' library loader class
class RocpApi {
public:
typedef BaseLoader<RocpApi> Loader;
typedef bool (RegisterCallback_t)(uint32_t op, void* callback, void* arg);
typedef bool (OperateCallback_t)(uint32_t op);
typedef bool (InitCallback_t)(void* callback, void* arg);
typedef bool (EnableCallback_t)(uint32_t op, bool enable);
typedef const char* (NameCallback_t)(uint32_t op);
RegisterCallback_t* RegisterApiCallback;
OperateCallback_t* RemoveApiCallback;
InitCallback_t* InitActivityCallback;
EnableCallback_t* EnableActivityCallback;
NameCallback_t* GetOpName;
protected:
void init(Loader* loader) {
RegisterApiCallback = loader->GetFun<RegisterCallback_t>("RegisterApiCallback");
RemoveApiCallback = loader->GetFun<OperateCallback_t>("RemoveApiCallback");
InitActivityCallback = loader->GetFun<InitCallback_t>("InitActivityCallback");
EnableActivityCallback = loader->GetFun<EnableCallback_t>("EnableActivityCallback");
GetOpName = loader->GetFun<NameCallback_t>("GetOpName");
}
};
// HIP runtime library loader class
class HipApi {
public:
@@ -110,9 +139,9 @@ class HccApi {
protected:
void init(Loader* loader) {
#if HIP_VDI
InitActivityCallback = loader->GetFun<hipInitAsyncActivityCallback_t>("InitActivityCallback");
EnableActivityCallback = loader->GetFun<hipEnableAsyncActivityCallback_t>("EnableActivityCallback");
GetOpName = loader->GetFun<hipGetOpName_t>("GetCmdName");
InitActivityCallback = loader->GetFun<hipInitAsyncActivityCallback_t>("hipInitActivityCallback");
EnableActivityCallback = loader->GetFun<hipEnableAsyncActivityCallback_t>("hipEnableActivityCallback");
GetOpName = loader->GetFun<hipGetOpName_t>("hipGetCmdName");
#else
InitActivityCallback = loader->GetFun<hipInitAsyncActivityCallback_t>("InitActivityCallbackImpl");
EnableActivityCallback = loader->GetFun<hipEnableAsyncActivityCallback_t>("EnableActivityCallbackImpl");
@@ -161,6 +190,7 @@ class RocTxApi {
}
};
typedef BaseLoader<RocpApi> RocpLoader;
typedef BaseLoader<HipApi> HipLoader;
typedef BaseLoader<HccApi> HccLoader;
typedef BaseLoader<KfdApi> KfdLoader;
@@ -171,9 +201,16 @@ typedef BaseLoader<RocTxApi> RocTxLoader;
#define LOADER_INSTANTIATE() \
template<class T> typename roctracer::BaseLoader<T>::mutex_t roctracer::BaseLoader<T>::mutex_; \
template<class T> std::atomic<roctracer::BaseLoader<T>*> roctracer::BaseLoader<T>::instance_{}; \
template<class T> bool roctracer::BaseLoader<T>::to_load_ = false; \
template<class T> bool roctracer::BaseLoader<T>::to_check_open_ = true; \
template<class T> bool roctracer::BaseLoader<T>::to_check_symb_ = true; \
template<> const char* roctracer::RocpLoader::lib_name_ = "librocprofiler64.so"; \
template<> const char* roctracer::HipLoader::lib_name_ = "libhip_hcc.so"; \
template<> const char* roctracer::HccLoader::lib_name_ = "libmcwamp_hsa.so"; \
template<> bool roctracer::HipLoader::to_check_open_ = false; \
template<> const char* roctracer::HccLoader::lib_name_ = "libmcwamp.so"; \
template<> bool roctracer::HccLoader::to_check_open_ = false; \
template<> const char* roctracer::KfdLoader::lib_name_ = "libkfdwrapper64.so"; \
template<> const char* roctracer::RocTxLoader::lib_name_ = "libroctx64.so";
template<> const char* roctracer::RocTxLoader::lib_name_ = "libroctx64.so"; \
template<> bool roctracer::RocTxLoader::to_load_ = true;
#endif // SRC_CORE_LOADER_H_
+170 -104
Просмотреть файл
@@ -27,9 +27,7 @@ THE SOFTWARE.
#include "inc/roctracer_roctx.h"
#define PROF_API_IMPL 1
#include "inc/roctracer_hsa.h"
#ifdef KFD_WRAPPER
#include "inc/roctracer_kfd.h"
#endif
#include <dirent.h>
#include <pthread.h>
@@ -87,9 +85,13 @@ THE SOFTWARE.
(void)err; \
return X;
#ifndef onload_debug
#define onload_debug false
#endif
#define ONLOAD_TRACE(str) \
if (getenv("ROCP_ONLOAD_TRACE")) do { \
std::cout << "PID(" << GetPid() << "): TRACER_LIB::" << __FUNCTION__ << " " << str << std::endl << std::flush; \
} while(0);
#define ONLOAD_TRACE_BEG() ONLOAD_TRACE("begin")
#define ONLOAD_TRACE_END() ONLOAD_TRACE("end")
static inline uint32_t GetPid() { return syscall(__NR_getpid); }
@@ -174,7 +176,7 @@ decltype(hsa_amd_memory_async_copy_rect)* hsa_amd_memory_async_copy_rect_fn;
typedef decltype(roctracer_enable_op_callback)* roctracer_enable_op_callback_t;
typedef decltype(roctracer_disable_op_callback)* roctracer_disable_op_callback_t;
typedef decltype(roctracer_enable_op_activity)* roctracer_enable_op_activity_t;
typedef decltype(roctracer_enable_op_activity_expl)* roctracer_enable_op_activity_t;
typedef decltype(roctracer_disable_op_activity)* roctracer_disable_op_activity_t;
struct cb_journal_data_t {
@@ -251,18 +253,16 @@ class GlobalCounter {
public:
typedef std::mutex mutex_t;
typedef uint64_t counter_t;
typedef std::atomic<counter_t> atomic_counter_t;
static counter_t Increment() {
std::lock_guard<mutex_t> lock(mutex_);
return ++counter_;
}
static counter_t Increment() { return counter_.fetch_add(1, std::memory_order_relaxed); }
private:
static mutex_t mutex_;
static counter_t counter_;
static atomic_counter_t counter_;
};
GlobalCounter::mutex_t GlobalCounter::mutex_;
GlobalCounter::counter_t GlobalCounter::counter_ = 0;
GlobalCounter::atomic_counter_t GlobalCounter::counter_{1};
// Records storage
struct roctracer_api_data_t {
@@ -284,6 +284,7 @@ typedef std::map<activity_correlation_id_t, activity_correlation_id_t> correlati
typedef std::mutex correlation_id_mutex_t;
correlation_id_map_t* correlation_id_map = NULL;
correlation_id_mutex_t correlation_id_mutex;
bool correlation_id_wait = true;
static thread_local std::stack<activity_correlation_id_t> external_id_stack;
@@ -296,6 +297,7 @@ static inline void CorrelationIdRegistr(const activity_correlation_id_t& correla
static inline activity_correlation_id_t CorrelationIdLookup(const activity_correlation_id_t& correlation_id) {
auto it = correlation_id_map->find(correlation_id);
if (correlation_id_wait) while (it == correlation_id_map->end()) it = correlation_id_map->find(correlation_id);
if (it == correlation_id_map->end()) EXC_ABORT(ROCTRACER_STATUS_ERROR, "HCC activity id lookup failed(" << correlation_id << ")");
return it->second;
}
@@ -330,6 +332,7 @@ void* HIP_SyncActivityCallback(
data = &(top.data.hip);
data_ptr = const_cast<hip_api_data_t*>(data);
data_ptr->phase = phase;
data_ptr->correlation_id = 0;
}
// Filing record info
@@ -391,8 +394,6 @@ void HCC_ActivityIdCallback(activity_correlation_id_t correlation_id) {
}
void HCC_AsyncActivityCallback(uint32_t op_id, void* record, void* arg) {
static hsa_rt_utils::Timer timer;
MemoryPool* pool = reinterpret_cast<MemoryPool*>(arg);
roctracer_record_t* record_ptr = reinterpret_cast<roctracer_record_t*>(record);
record_ptr->domain = ACTIVITY_DOMAIN_HCC_OPS;
@@ -523,14 +524,36 @@ hsa_status_t hsa_amd_memory_async_copy_rect_interceptor(
return status;
}
void HSA_AsyncActivityCallback(uint32_t op_id, void* record, void* arg) {
MemoryPool* pool = reinterpret_cast<MemoryPool*>(arg);
roctracer_record_t* record_ptr = reinterpret_cast<roctracer_record_t*>(record);
record_ptr->domain = ACTIVITY_DOMAIN_HSA_OPS;
pool->Write(*record_ptr);
}
// Logger routines and primitives
util::Logger::mutex_t util::Logger::mutex_;
std::atomic<util::Logger*> util::Logger::instance_{};
// Memory pool routines and primitives
MemoryPool* memory_pool = NULL;
typedef std::recursive_mutex memory_pool_mutex_t;
memory_pool_mutex_t memory_pool_mutex;
// Stop sttaus routines and primitives
unsigned stop_status_value = 0;
typedef std::mutex stop_status_mutex_t;
stop_status_mutex_t stop_status_mutex;
unsigned set_stopped(unsigned val) {
std::lock_guard<stop_status_mutex_t> lock(stop_status_mutex);
const unsigned ret = (stop_status_value ^ val);
stop_status_value = val;
return ret;
}
} // namespace roctracer
LOADER_INSTANTIATE();
TRACE_BUFFER_INSTANTIATE();
///////////////////////////////////////////////////////////////////////////////////////////////////
// Public library methods
@@ -555,24 +578,16 @@ PUBLIC_API const char* roctracer_op_string(
{
API_METHOD_PREFIX
switch (domain) {
case ACTIVITY_DOMAIN_HSA_API: {
case ACTIVITY_DOMAIN_HSA_API:
return roctracer::hsa_support::GetApiName(op);
break;
}
case ACTIVITY_DOMAIN_HCC_OPS: {
case ACTIVITY_DOMAIN_HSA_OPS:
return roctracer::RocpLoader::Instance().GetOpName(op);
case ACTIVITY_DOMAIN_HCC_OPS:
return roctracer::HccLoader::Instance().GetOpName(kind);
break;
}
case ACTIVITY_DOMAIN_HIP_API: {
case ACTIVITY_DOMAIN_HIP_API:
return roctracer::HipLoader::Instance().ApiName(op);
break;
}
#if KFD_WRAPPER
case ACTIVITY_DOMAIN_KFD_API: {
case ACTIVITY_DOMAIN_KFD_API:
return roctracer::kfd_support::GetApiName(op);
break;
}
#endif
default:
EXC_RAISING(ROCTRACER_STATUS_BAD_DOMAIN, "invalid domain ID(" << domain << ")");
}
@@ -593,13 +608,11 @@ PUBLIC_API roctracer_status_t roctracer_op_code(
if (kind != NULL) *kind = 0;
break;
}
#ifdef KFD_WRAPPER
case ACTIVITY_DOMAIN_KFD_API: {
*op = roctracer::kfd_support::GetApiCode(str);
if (kind != NULL) *kind = 0;
break;
}
#endif
default:
EXC_RAISING(ROCTRACER_STATUS_BAD_DOMAIN, "limited domain ID(" << domain << ")");
}
@@ -608,13 +621,11 @@ PUBLIC_API roctracer_status_t roctracer_op_code(
static inline uint32_t get_op_num(const uint32_t& domain) {
switch (domain) {
case ACTIVITY_DOMAIN_HSA_OPS: return 1;
case ACTIVITY_DOMAIN_HSA_OPS: return HSA_OP_ID_NUMBER;
case ACTIVITY_DOMAIN_HSA_API: return HSA_API_ID_NUMBER;
case ACTIVITY_DOMAIN_HCC_OPS: return HIP_OP_ID_NUMBER;
case ACTIVITY_DOMAIN_HIP_API: return HIP_API_ID_NUMBER;
#ifdef KFD_WRAPPER
case ACTIVITY_DOMAIN_KFD_API: return KFD_API_ID_NUMBER;
#endif
case ACTIVITY_DOMAIN_EXT_API: return 0;
case ACTIVITY_DOMAIN_ROCTX: return ROCTX_API_ID_NUMBER;
default:
@@ -631,28 +642,35 @@ static roctracer_status_t roctracer_enable_callback_fun(
void* user_data)
{
switch (domain) {
#ifdef KFD_WRAPPER
case ACTIVITY_DOMAIN_KFD_API: {
const bool succ = roctracer::KfdLoader::Instance().RegisterApiCallback(op, (void*)callback, user_data);
if (succ == false) EXC_RAISING(ROCTRACER_STATUS_ERROR, "KFD RegisterApiCallback error");
if (succ == false) EXC_RAISING(ROCTRACER_STATUS_ERROR, "KFD RegisterApiCallback error(" << op << ") failed");
break;
}
#endif
case ACTIVITY_DOMAIN_HSA_OPS: break;
case ACTIVITY_DOMAIN_HSA_API: {
#if 0
if (op == HSA_API_ID_DISPATCH) {
const bool succ = roctracer::RocpLoader::Instance().RegisterApiCallback(op, (void*)callback, user_data);
if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HSA_ERR, "HSA::EnableActivityCallback error(" << op << ") failed");
break;
}
#endif
roctracer::hsa_support::cb_table.set(op, callback, user_data);
break;
}
case ACTIVITY_DOMAIN_HCC_OPS: break;
case ACTIVITY_DOMAIN_HIP_API: {
if (roctracer::HipLoader::Instance().Enabled() == false) break;
hipError_t hip_err = roctracer::HipLoader::Instance().RegisterApiCallback(op, (void*)callback, user_data);
if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "hipRegisterApiCallback(" << op << ") error(" << hip_err << ")");
if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "HIP::RegisterApiCallback(" << op << ") error(" << hip_err << ")");
break;
}
case ACTIVITY_DOMAIN_ROCTX: {
if (roctracer::RocTxLoader::Instance().Enabled()) {
const bool suc = roctracer::RocTxLoader::Instance().RegisterApiCallback(op, (void*)callback, user_data);
if (suc == false) EXC_RAISING(ROCTRACER_STATUS_ROCTX_ERR, "roctxRegisterApiCallback(" << op << ") failed");
if (suc == false) EXC_RAISING(ROCTRACER_STATUS_ROCTX_ERR, "ROCTX::RegisterApiCallback(" << op << ") failed");
}
break;
}
@@ -668,8 +686,8 @@ static void roctracer_enable_callback_impl(
roctracer_rtapi_callback_t callback,
void* user_data)
{
roctracer::cb_journal->registr({domain, op, {callback, user_data}});
roctracer_enable_callback_fun((roctracer_domain_t)domain, op, callback, user_data);
roctracer::cb_journal->registr({domain, op, {callback, user_data}});
roctracer_enable_callback_fun((roctracer_domain_t)domain, op, callback, user_data);
}
PUBLIC_API roctracer_status_t roctracer_enable_op_callback(
@@ -712,25 +730,35 @@ static roctracer_status_t roctracer_disable_callback_fun(
uint32_t op)
{
switch (domain) {
#ifdef KFD_WRAPPER
case ACTIVITY_DOMAIN_KFD_API: {
const bool succ = roctracer::KfdLoader::Instance().RemoveApiCallback(op);
if (succ == false) EXC_RAISING(ROCTRACER_STATUS_ERROR, "KFD RemoveApiCallback error");
break;
}
#endif
case ACTIVITY_DOMAIN_HSA_OPS: break;
case ACTIVITY_DOMAIN_HSA_API: break;
case ACTIVITY_DOMAIN_HSA_API: {
#if 0
if (op == HSA_API_ID_DISPATCH) {
const bool succ = roctracer::RocpLoader::Instance().RemoveApiCallback(op);
if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HSA_ERR, "HSA::RemoveActivityCallback error(" << op << ") failed");
break;
}
#endif
roctracer::hsa_support::cb_table.set(op, NULL, NULL);
break;
}
case ACTIVITY_DOMAIN_HCC_OPS: break;
case ACTIVITY_DOMAIN_HIP_API: {
if (roctracer::HipLoader::Instance().Enabled() == false) break;
hipError_t hip_err = roctracer::HipLoader::Instance().RemoveApiCallback(op);
if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "hipRemoveApiCallback error(" << hip_err << ")");
if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "HIP::RemoveApiCallback(" << op << "), error(" << hip_err << ")");
break;
}
case ACTIVITY_DOMAIN_ROCTX: {
if (roctracer::RocTxLoader::Instance().Enabled()) {
const bool suc = roctracer::RocTxLoader::Instance().RemoveApiCallback(op);
if (suc == false) EXC_RAISING(ROCTRACER_STATUS_ROCTX_ERR, "roctxRemoveApiCallback(" << op << ") failed");
if (suc == false) EXC_RAISING(ROCTRACER_STATUS_ROCTX_ERR, "ROCTX::RemoveApiCallback(" << op << ") failed");
}
break;
}
@@ -777,7 +805,7 @@ PUBLIC_API roctracer_status_t roctracer_disable_callback()
}
// Return default pool and set new one if parameter pool is not NULL.
PUBLIC_API roctracer_pool_t* roctracer_default_pool(roctracer_pool_t* pool) {
PUBLIC_API roctracer_pool_t* roctracer_default_pool_expl(roctracer_pool_t* pool) {
std::lock_guard<roctracer::memory_pool_mutex_t> lock(roctracer::memory_pool_mutex);
roctracer_pool_t* p = reinterpret_cast<roctracer_pool_t*>(roctracer::memory_pool);
if (pool != NULL) roctracer::memory_pool = reinterpret_cast<roctracer::MemoryPool*>(pool);
@@ -785,7 +813,7 @@ PUBLIC_API roctracer_pool_t* roctracer_default_pool(roctracer_pool_t* pool) {
}
// Open memory pool
PUBLIC_API roctracer_status_t roctracer_open_pool(
PUBLIC_API roctracer_status_t roctracer_open_pool_expl(
const roctracer_properties_t* properties,
roctracer_pool_t** pool)
{
@@ -802,7 +830,7 @@ PUBLIC_API roctracer_status_t roctracer_open_pool(
}
// Close memory pool
PUBLIC_API roctracer_status_t roctracer_close_pool(roctracer_pool_t* pool) {
PUBLIC_API roctracer_status_t roctracer_close_pool_expl(roctracer_pool_t* pool) {
API_METHOD_PREFIX
std::lock_guard<roctracer::memory_pool_mutex_t> lock(roctracer::memory_pool_mutex);
roctracer_pool_t* ptr = (pool == NULL) ? roctracer_default_pool() : pool;
@@ -821,14 +849,34 @@ static roctracer_status_t roctracer_enable_activity_fun(
if (pool == NULL) pool = roctracer_default_pool();
switch (domain) {
case ACTIVITY_DOMAIN_HSA_OPS: {
roctracer::hsa_support::async_copy_callback_enabled = true;
rocprofiler::InterceptQueue::Enable(true);
if (op == HSA_OP_ID_COPY) {
roctracer::hsa_support::async_copy_callback_enabled = true;
} else {
const bool init_phase = (roctracer::RocpLoader::GetRef() == NULL);
if (init_phase == true) {
roctracer::RocpLoader::Instance().InitActivityCallback((void*)roctracer::HSA_AsyncActivityCallback,
(void*)pool);
}
const bool succ = roctracer::RocpLoader::Instance().EnableActivityCallback(op, true);
if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HSA_ERR, "HSA::EnableActivityCallback error");
}
break;
}
case ACTIVITY_DOMAIN_HSA_API: break;
case ACTIVITY_DOMAIN_KFD_API: break;
case ACTIVITY_DOMAIN_HCC_OPS: {
if (roctracer::HccLoader::GetRef() == NULL) {
const bool init_phase = (roctracer::HccLoader::GetRef() == NULL);
if (roctracer::HccLoader::Instance().Enabled() == false) break;
if (init_phase == true) {
if (getenv("ROCP_HCC_CORRID_WAIT") != NULL) {
roctracer::correlation_id_wait = true;
fprintf(stdout, "roctracer: HCC correlation ID wait enabled\n"); fflush(stdout);
}
if (getenv("ROCP_HCC_CORRID_NOWAIT") != NULL) {
roctracer::correlation_id_wait = false;
fprintf(stdout, "roctracer: HCC correlation ID wait disabled\n"); fflush(stdout);
}
roctracer::HccLoader::Instance().InitActivityCallback((void*)roctracer::HCC_ActivityIdCallback,
(void*)roctracer::HCC_AsyncActivityCallback,
(void*)pool);
@@ -838,6 +886,8 @@ static roctracer_status_t roctracer_enable_activity_fun(
break;
}
case ACTIVITY_DOMAIN_HIP_API: {
if (roctracer::HipLoader::Instance().Enabled() == false) break;
const hipError_t hip_err = roctracer::HipLoader::Instance().RegisterActivityCallback(op, (void*)roctracer::HIP_SyncActivityCallback, (void*)pool);
if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "hipRegisterActivityCallback error(" << hip_err << ")");
break;
@@ -858,7 +908,7 @@ static void roctracer_enable_activity_impl(
roctracer_enable_activity_fun((roctracer_domain_t)domain, op, pool);
}
PUBLIC_API roctracer_status_t roctracer_enable_op_activity(
PUBLIC_API roctracer_status_t roctracer_enable_op_activity_expl(
roctracer_domain_t domain,
uint32_t op,
roctracer_pool_t* pool)
@@ -868,7 +918,7 @@ PUBLIC_API roctracer_status_t roctracer_enable_op_activity(
API_METHOD_SUFFIX
}
PUBLIC_API roctracer_status_t roctracer_enable_domain_activity(
PUBLIC_API roctracer_status_t roctracer_enable_domain_activity_expl(
roctracer_domain_t domain,
roctracer_pool_t* pool)
{
@@ -878,7 +928,7 @@ PUBLIC_API roctracer_status_t roctracer_enable_domain_activity(
API_METHOD_SUFFIX
}
PUBLIC_API roctracer_status_t roctracer_enable_activity(
PUBLIC_API roctracer_status_t roctracer_enable_activity_expl(
roctracer_pool_t* pool)
{
API_METHOD_PREFIX
@@ -896,20 +946,29 @@ static roctracer_status_t roctracer_disable_activity_fun(
{
switch (domain) {
case ACTIVITY_DOMAIN_HSA_OPS: {
roctracer::hsa_support::async_copy_callback_enabled = false;
rocprofiler::InterceptQueue::Enable(false);
if (op == HSA_OP_ID_COPY) {
roctracer::hsa_support::async_copy_callback_enabled = true;
} else {
if (roctracer::RocpLoader::GetRef() == NULL) break;
const bool succ = roctracer::RocpLoader::Instance().EnableActivityCallback(op, false);
if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HSA_ERR, "HSA::EnableActivityCallback(false) error, op(" << op << ")");
}
break;
}
case ACTIVITY_DOMAIN_HSA_API: break;
case ACTIVITY_DOMAIN_KFD_API: break;
case ACTIVITY_DOMAIN_HCC_OPS: {
if (roctracer::HccLoader::Instance().Enabled() == false) break;
const bool succ = roctracer::HccLoader::Instance().EnableActivityCallback(op, false);
if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HCC_OPS_ERR, "HCC::EnableActivityCallback(NULL) error domain(" << domain << ") op(" << op << ")");
if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HCC_OPS_ERR, "HCC::EnableActivityCallback(NULL) error, op(" << op << ")");
break;
}
case ACTIVITY_DOMAIN_HIP_API: {
if (roctracer::HipLoader::Instance().Enabled() == false) break;
const hipError_t hip_err = roctracer::HipLoader::Instance().RemoveActivityCallback(op);
if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "hipRemoveActivityCallback error(" << hip_err << ")");
if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "HIP::RemoveActivityCallback op(" << op << "), error(" << hip_err << ")");
break;
}
case ACTIVITY_DOMAIN_ROCTX: break;
@@ -923,8 +982,8 @@ static void roctracer_disable_activity_impl(
uint32_t domain,
uint32_t op)
{
roctracer::act_journal->remove({domain, op, {}});
roctracer_disable_activity_fun((roctracer_domain_t)domain, op);
roctracer::act_journal->remove({domain, op, {}});
roctracer_disable_activity_fun((roctracer_domain_t)domain, op);
}
PUBLIC_API roctracer_status_t roctracer_disable_op_activity(
@@ -956,11 +1015,12 @@ PUBLIC_API roctracer_status_t roctracer_disable_activity()
}
// Flush available activity records
PUBLIC_API roctracer_status_t roctracer_flush_activity(roctracer_pool_t* pool) {
PUBLIC_API roctracer_status_t roctracer_flush_activity_expl(roctracer_pool_t* pool) {
API_METHOD_PREFIX
if (pool == NULL) pool = roctracer_default_pool();
roctracer::MemoryPool* memory_pool = reinterpret_cast<roctracer::MemoryPool*>(pool);
memory_pool->Flush();
roctracer::TraceBufferBase::FlushAll();
API_METHOD_SUFFIX
}
@@ -1001,16 +1061,26 @@ PUBLIC_API void roctracer_mark(const char* str) {
// Start API
PUBLIC_API void roctracer_start() {
if (roctracer::ext_support::roctracer_start_cb) roctracer::ext_support::roctracer_start_cb();
roctracer::cb_journal->foreach(roctracer::cb_en_functor_t(roctracer_enable_callback_fun));
roctracer::act_journal->foreach(roctracer::act_en_functor_t(roctracer_enable_activity_fun));
if (roctracer::set_stopped(0)) {
if (roctracer::ext_support::roctracer_start_cb) roctracer::ext_support::roctracer_start_cb();
roctracer::cb_journal->foreach(roctracer::cb_en_functor_t(roctracer_enable_callback_fun));
roctracer::act_journal->foreach(roctracer::act_en_functor_t(roctracer_enable_activity_fun));
}
}
// Stop API
PUBLIC_API void roctracer_stop() {
roctracer::cb_journal->foreach(roctracer::cb_dis_functor_t(roctracer_disable_callback_fun));
roctracer::act_journal->foreach(roctracer::act_dis_functor_t(roctracer_disable_activity_fun));
if (roctracer::ext_support::roctracer_stop_cb) roctracer::ext_support::roctracer_stop_cb();
if (roctracer::set_stopped(1)) {
roctracer::cb_journal->foreach(roctracer::cb_dis_functor_t(roctracer_disable_callback_fun));
roctracer::act_journal->foreach(roctracer::act_dis_functor_t(roctracer_disable_activity_fun));
if (roctracer::ext_support::roctracer_stop_cb) roctracer::ext_support::roctracer_stop_cb();
}
}
PUBLIC_API roctracer_status_t roctracer_get_timestamp(uint64_t* timestamp) {
API_METHOD_PREFIX
*timestamp = util::HsaRsrcFactory::Instance().TimestampNs();
API_METHOD_SUFFIX
}
// Set properties
@@ -1021,6 +1091,8 @@ PUBLIC_API roctracer_status_t roctracer_set_properties(
API_METHOD_PREFIX
switch (domain) {
case ACTIVITY_DOMAIN_HSA_OPS: {
roctracer::trace_buffer.StartWorkerThread();
// HSA OPS properties
roctracer::hsa_ops_properties_t* ops_properties = reinterpret_cast<roctracer::hsa_ops_properties_t*>(properties);
HsaApiTable* table = reinterpret_cast<HsaApiTable*>(ops_properties->table);
@@ -1046,12 +1118,10 @@ PUBLIC_API roctracer_status_t roctracer_set_properties(
break;
}
#ifdef KFD_WRAPPER
case ACTIVITY_DOMAIN_KFD_API: {
roctracer::kfd_support::intercept_KFDApiTable();
break;
}
#endif
case ACTIVITY_DOMAIN_HSA_API: {
// HSA API properties
HsaApiTable* table = reinterpret_cast<HsaApiTable*>(properties);
@@ -1062,7 +1132,7 @@ PUBLIC_API roctracer_status_t roctracer_set_properties(
}
case ACTIVITY_DOMAIN_HCC_OPS:
case ACTIVITY_DOMAIN_HIP_API: {
#ifdef HIP_VDI
#if HIP_VDI
const char* hip_lib_name = "libamdhip64.so";
roctracer::HccLoader::SetLibName(hip_lib_name);
roctracer::HipLoader::SetLibName(hip_lib_name);
@@ -1082,58 +1152,54 @@ PUBLIC_API roctracer_status_t roctracer_set_properties(
API_METHOD_SUFFIX
}
// HSA-runtime tool on-load method
PUBLIC_API bool roctracer_load(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count,
const char* const* failed_tool_names) {
if (onload_debug) { printf("LIB roctracer_load\n"); fflush(stdout); }
static bool is_loaded = false;
if (is_loaded) return true;
static bool is_loaded = false;
PUBLIC_API bool roctracer_load() {
ONLOAD_TRACE("begin, loaded(" << is_loaded << ")");
if (is_loaded == true) return true;
is_loaded = true;
if (onload_debug) { printf("LIB roctracer_load end\n"); fflush(stdout); }
if (roctracer::cb_journal == NULL) roctracer::cb_journal = new roctracer::CbJournal;
if (roctracer::act_journal == NULL) roctracer::act_journal = new roctracer::ActJournal;
ONLOAD_TRACE_END();
return true;
}
PUBLIC_API void roctracer_unload(bool destruct) {
static bool is_unloaded = false;
PUBLIC_API void roctracer_unload() {
ONLOAD_TRACE("begin, loaded(" << is_loaded << ")");
if (onload_debug) { printf("LIB roctracer_unload (%d, %d)\n", (int)destruct, (int)is_unloaded); fflush(stdout); }
if (destruct == false) return;
if (is_unloaded == true) return;
is_unloaded = true;
if (is_loaded == false) return;
is_loaded = false;
if (roctracer::cb_journal != NULL) {
delete roctracer::cb_journal;
roctracer::cb_journal = NULL;
}
if (roctracer::act_journal != NULL) {
delete roctracer::act_journal;
roctracer::act_journal = NULL;
}
roctracer::trace_buffer.Flush();
roctracer::close_output_file(roctracer::kernel_file_handle);
if (onload_debug) { printf("LIB roctracer_unload end\n"); fflush(stdout); }
}
PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count,
const char* const* failed_tool_names) {
if (onload_debug) { printf("LIB OnLoad\n"); fflush(stdout); }
const bool ret = roctracer_load(table, runtime_version, failed_tool_count, failed_tool_names);
if (onload_debug) { printf("LIB OnLoad end\n"); fflush(stdout); }
return ret;
}
PUBLIC_API void OnUnload() {
if (onload_debug) { printf("LIB OnUnload\n"); fflush(stdout); }
roctracer_unload(false);
if (onload_debug) { printf("LIB OnUnload end\n"); fflush(stdout); }
ONLOAD_TRACE_END();
}
CONSTRUCTOR_API void constructor() {
if (onload_debug) { printf("LIB constructor\n"); fflush(stdout); }
ONLOAD_TRACE_BEG();
roctracer::util::Logger::Create();
if (roctracer::cb_journal == NULL) roctracer::cb_journal = new roctracer::CbJournal;
if (roctracer::act_journal == NULL) roctracer::act_journal = new roctracer::ActJournal;
if (onload_debug) { printf("LIB constructor end\n"); fflush(stdout); }
roctracer_load();
ONLOAD_TRACE_END();
}
DESTRUCTOR_API void destructor() {
if (onload_debug) { printf("LIB destructor\n"); fflush(stdout); }
roctracer_unload(true);
ONLOAD_TRACE_BEG();
roctracer_unload();
util::HsaRsrcFactory::Destroy();
roctracer::util::Logger::Destroy();
if (onload_debug) { printf("LIB destructor end\n"); fflush(stdout); }
ONLOAD_TRACE_END();
}
} // extern "C"
+98 -22
Просмотреть файл
@@ -2,12 +2,23 @@
#define SRC_CORE_TRACE_BUFFER_H_
#include <atomic>
#include <iostream>
#include <list>
#include <mutex>
#include <sstream>
#include <pthread.h>
#include <string.h>
#include <unistd.h>
#define FATAL(stream) \
do { \
std::ostringstream oss; \
oss << __FUNCTION__ << "(), " << stream; \
std::cout << oss.str() << std::endl; \
abort(); \
} while (0)
#define PTHREAD_CALL(call) \
do { \
int err = call; \
@@ -53,8 +64,55 @@ struct trace_entry_t {
};
};
template <class T>
struct push_element_fun {
T* const elem_;
void fun(T* node) { if (node->next_elem_ == NULL) node->next_elem_ = elem_; }
push_element_fun(T* elem) : elem_(elem) {}
};
template <class T>
struct call_element_fun {
void (T::*fptr_)();
void fun(T* node) { (node->*fptr_)(); }
call_element_fun(void (T::*f)()) : fptr_(f) {}
};
struct TraceBufferBase {
typedef std::mutex mutex_t;
virtual void StartWorkerThread() = 0;
virtual void Flush() = 0;
static void StartWorkerThreadAll() { foreach(call_element_fun<TraceBufferBase>(&TraceBufferBase::StartWorkerThread)); }
static void FlushAll() { foreach(call_element_fun<TraceBufferBase>(&TraceBufferBase::Flush)); }
static void Push(TraceBufferBase* elem) {
if (head_elem_ == NULL) head_elem_ = elem;
else foreach(push_element_fun<TraceBufferBase>(elem));
}
TraceBufferBase() : next_elem_(NULL) {}
template<class F>
static void foreach(const F& f_in) {
std::lock_guard<mutex_t> lck(mutex_);
F f = f_in;
TraceBufferBase* p = head_elem_;
while (p != NULL) {
TraceBufferBase* next = p->next_elem_;
f.fun(p);
p = next;
}
}
TraceBufferBase* next_elem_;
static TraceBufferBase* head_elem_;
static mutex_t mutex_;
};
template <typename Entry>
class TraceBuffer {
class TraceBuffer : protected TraceBufferBase {
public:
typedef void (*callback_t)(Entry*);
typedef TraceBuffer<Entry> Obj;
@@ -67,7 +125,8 @@ class TraceBuffer {
};
TraceBuffer(const char* name, uint32_t size, flush_prm_t* flush_prm_arr, uint32_t flush_prm_count) :
is_flushed_(false)
is_flushed_(false),
work_thread_started_(false)
{
name_ = strdup(name);
size_ = size;
@@ -80,31 +139,43 @@ class TraceBuffer {
flush_prm_arr_ = flush_prm_arr;
flush_prm_count_ = flush_prm_count;
PTHREAD_CALL(pthread_mutex_init(&work_mutex_, NULL));
PTHREAD_CALL(pthread_cond_init(&work_cond_, NULL));
PTHREAD_CALL(pthread_create(&work_thread_, NULL, allocate_worker, this));
TraceBufferBase::Push(this);
}
~TraceBuffer() {
PTHREAD_CALL(pthread_cancel(work_thread_));
void *res;
PTHREAD_CALL(pthread_join(work_thread_, &res));
if (res != PTHREAD_CANCELED) abort_run("~TraceBuffer: consumer thread wasn't stopped correctly");
StopWorkerThread();
Flush();
}
void StartWorkerThread() {
std::lock_guard<mutex_t> lck(mutex_);
if (work_thread_started_ == false) {
PTHREAD_CALL(pthread_mutex_init(&work_mutex_, NULL));
PTHREAD_CALL(pthread_cond_init(&work_cond_, NULL));
PTHREAD_CALL(pthread_create(&work_thread_, NULL, allocate_worker, this));
work_thread_started_ = true;
}
}
void StopWorkerThread() {
std::lock_guard<mutex_t> lck(mutex_);
if (work_thread_started_ == true) {
PTHREAD_CALL(pthread_cancel(work_thread_));
void *res;
PTHREAD_CALL(pthread_join(work_thread_, &res));
if (res != PTHREAD_CANCELED) FATAL("consumer thread wasn't stopped correctly");
work_thread_started_ = false;
}
}
Entry* GetEntry() {
const pointer_t pointer = read_pointer_.fetch_add(1);
if (pointer >= end_pointer_) wrap_buffer(pointer);
if (pointer >= end_pointer_) abort_run("pointer >= end_pointer_ after buffer wrap");
if (pointer >= end_pointer_) FATAL("pointer >= end_pointer_ after buffer wrap");
return data_ + (pointer + size_ - end_pointer_);
}
void Flush() {
flush_buf();
}
void Flush() { flush_buf(); }
private:
void flush_buf() {
@@ -113,8 +184,12 @@ class TraceBuffer {
if (is_flushed == false) {
for (flush_prm_t* prm = flush_prm_arr_; prm < flush_prm_arr_ + flush_prm_count_; prm++) {
// Flushed entries type
uint32_t type = prm->type;
// Flushing function
callback_t fun = prm->fun;
if (fun == NULL) FATAL("flush function is not set");
pointer_t pointer = 0;
for (Entry* ptr : buf_list_) {
Entry* end = ptr + size_;
@@ -134,7 +209,7 @@ class TraceBuffer {
inline Entry* allocate_fun() {
Entry* ptr = (Entry*) malloc(size_ * sizeof(Entry));
if (ptr == NULL) abort_run("TraceBuffer::allocate_fun: calloc failed");
if (ptr == NULL) FATAL("malloc failed");
//memset(ptr, 0, size_ * sizeof(Entry));
return ptr;
}
@@ -156,24 +231,20 @@ class TraceBuffer {
void wrap_buffer(const pointer_t pointer) {
std::lock_guard<mutex_t> lck(mutex_);
if (work_thread_started_ == false) FATAL("worker thread is not started");
PTHREAD_CALL(pthread_mutex_lock(&work_mutex_));
if (pointer >= end_pointer_) {
data_ = next_;
next_ = NULL;
PTHREAD_CALL(pthread_cond_signal(&work_cond_));
end_pointer_ += size_;
if (end_pointer_ == 0) abort_run("TraceBuffer::wrap_buffer: pointer overflow");
if (end_pointer_ == 0) FATAL("pointer overflow");
buf_list_.push_back(data_);
}
PTHREAD_CALL(pthread_mutex_unlock(&work_mutex_));
}
void abort_run(const char* str) {
fprintf(stderr, "%s\n", str);
fflush(stderr);
abort();
}
const char* name_;
uint32_t size_;
Entry* data_;
@@ -189,9 +260,14 @@ class TraceBuffer {
pthread_t work_thread_;
pthread_mutex_t work_mutex_;
pthread_cond_t work_cond_;
bool work_thread_started_;
mutex_t mutex_;
};
} // namespace roctracer
#define TRACE_BUFFER_INSTANTIATE() \
roctracer::TraceBufferBase* roctracer::TraceBufferBase::head_elem_ = NULL; \
roctracer::TraceBufferBase::mutex_t roctracer::TraceBufferBase::mutex_;
#endif // SRC_CORE_TRACE_BUFFER_H_
+77 -13
Просмотреть файл
@@ -44,9 +44,6 @@ POSSIBILITY OF SUCH DAMAGE.
#include <string>
#include <vector>
#include "util/exception.h"
#include "util/logger.h"
namespace util {
// Callback function to get available in the system agents
@@ -149,6 +146,11 @@ HsaRsrcFactory::HsaRsrcFactory(bool initialize_hsa) : initialize_hsa_(initialize
CHECK_STATUS("HSA timer allocation failed",
(timer_ == NULL) ? HSA_STATUS_ERROR : HSA_STATUS_SUCCESS);
// Time correlation
const uint32_t corr_iters = 1000;
CorrelateTime(HsaTimer::TIME_ID_CLOCK_REALTIME, corr_iters);
CorrelateTime(HsaTimer::TIME_ID_CLOCK_MONOTONIC, corr_iters);
// System timeout
timeout_ = (timeout_ns_ == HsaTimer::TIMESTAMP_MAX) ? timeout_ns_ : timer_->ns_to_sysclock(timeout_ns_);
}
@@ -192,6 +194,8 @@ void HsaRsrcFactory::InitHsaApiTable(HsaApiTable* table) {
hsa_api_.hsa_executable_load_agent_code_object = table->core_->hsa_executable_load_agent_code_object_fn;
hsa_api_.hsa_executable_freeze = table->core_->hsa_executable_freeze_fn;
hsa_api_.hsa_executable_get_symbol = table->core_->hsa_executable_get_symbol_fn;
hsa_api_.hsa_executable_symbol_get_info = table->core_->hsa_executable_symbol_get_info_fn;
hsa_api_.hsa_executable_iterate_symbols = table->core_->hsa_executable_iterate_symbols_fn;
hsa_api_.hsa_system_get_info = table->core_->hsa_system_get_info_fn;
hsa_api_.hsa_system_get_major_extension_table = table->core_->hsa_system_get_major_extension_table_fn;
@@ -230,6 +234,8 @@ void HsaRsrcFactory::InitHsaApiTable(HsaApiTable* table) {
hsa_api_.hsa_executable_load_agent_code_object = hsa_executable_load_agent_code_object;
hsa_api_.hsa_executable_freeze = hsa_executable_freeze;
hsa_api_.hsa_executable_get_symbol = hsa_executable_get_symbol;
hsa_api_.hsa_executable_symbol_get_info = hsa_executable_symbol_get_info;
hsa_api_.hsa_executable_iterate_symbols = hsa_executable_iterate_symbols;
hsa_api_.hsa_system_get_info = hsa_system_get_info;
hsa_api_.hsa_system_get_major_extension_table = hsa_system_get_major_extension_table;
@@ -336,6 +342,11 @@ const AgentInfo* HsaRsrcFactory::AddAgentInfo(const hsa_agent_t agent) {
status = hsa_api_.hsa_amd_agent_iterate_memory_pools(agent, FindStandardPool, &agent_info->gpu_pool);
CHECK_ITER_STATUS("hsa_amd_agent_iterate_memory_pools(gpu pool)", status);
// GFX8 and GFX9 SGPR/VGPR block sizes
agent_info->sgpr_block_dflt = (strcmp(agent_info->gfxip, "gfx8") == 0) ? 1 : 2;
agent_info->sgpr_block_size = 8;
agent_info->vgpr_block_size = 4;
// Set GPU index
agent_info->dev_index = gpu_list_.size();
gpu_list_.push_back(agent_info);
@@ -508,22 +519,25 @@ uint8_t* HsaRsrcFactory::AllocateCmdMemory(const AgentInfo* agent_info, size_t s
}
// Wait signal
void HsaRsrcFactory::SignalWait(const hsa_signal_t& signal) const {
hsa_signal_value_t HsaRsrcFactory::SignalWait(const hsa_signal_t& signal, const hsa_signal_value_t& signal_value) const {
const hsa_signal_value_t exp_value = signal_value - 1;
hsa_signal_value_t ret_value = signal_value;
while (1) {
const hsa_signal_value_t signal_value =
hsa_api_.hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, 1, timeout_, HSA_WAIT_STATE_BLOCKED);
if (signal_value == 0) {
break;
} else {
if (signal_value == 1) WARN_LOGGING("signal waiting...");
else EXC_RAISING(HSA_STATUS_ERROR, "hsa_signal_wait_scacquire (" << signal_value << ")");
ret_value =
hsa_api_.hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, signal_value, timeout_, HSA_WAIT_STATE_BLOCKED);
if (ret_value == exp_value) break;
if (ret_value != signal_value) {
std::cerr << "Error: HsaRsrcFactory::SignalWait: signal_value(" << signal_value
<< "), ret_value(" << ret_value << ")" << std::endl << std::flush;
abort();
}
}
return ret_value;
}
// Wait signal with signal value restore
void HsaRsrcFactory::SignalWaitRestore(const hsa_signal_t& signal, const hsa_signal_value_t& signal_value) const {
SignalWait(signal);
SignalWait(signal, signal_value);
hsa_api_.hsa_signal_store_relaxed(const_cast<hsa_signal_t&>(signal), signal_value);
}
@@ -536,7 +550,7 @@ bool HsaRsrcFactory::Memcpy(const hsa_agent_t& agent, void* dst, const void* src
CHECK_STATUS("hsa_signal_create()", status);
status = hsa_api_.hsa_amd_memory_async_copy(dst, cpu_agents_[0], src, agent, size, 0, NULL, s);
CHECK_STATUS("hsa_amd_memory_async_copy()", status);
SignalWait(s);
SignalWait(s, 1);
status = hsa_api_.hsa_signal_destroy(s);
CHECK_STATUS("hsa_signal_destroy()", status);
}
@@ -680,9 +694,59 @@ uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, const void* packet, size_t s
return write_idx;
}
const char* HsaRsrcFactory::GetKernelName(uint64_t addr) {
std::lock_guard<mutex_t> lck(mutex_);
const auto it = symbols_map_->find(addr);
if (it == symbols_map_->end()) {
fprintf(stderr, "HsaRsrcFactory::kernel addr (0x%lx) is not found\n", addr);
abort();
}
return strdup(it->second);
}
void HsaRsrcFactory::EnableExecutableTracking(HsaApiTable* table) {
std::lock_guard<mutex_t> lck(mutex_);
executable_tracking_on_ = true;
table->core_->hsa_executable_freeze_fn = hsa_executable_freeze_interceptor;
}
hsa_status_t HsaRsrcFactory::executable_symbols_cb(hsa_executable_t exec, hsa_executable_symbol_t symbol, void *data) {
hsa_symbol_kind_t value = (hsa_symbol_kind_t)0;
hsa_status_t status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_TYPE, &value);
CHECK_STATUS("Error in getting symbol info", status);
if (value == HSA_SYMBOL_KIND_KERNEL) {
uint64_t addr = 0;
uint32_t len = 0;
status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &addr);
CHECK_STATUS("Error in getting kernel object", status);
status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, &len);
CHECK_STATUS("Error in getting name len", status);
char *name = new char[len + 1];
status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME, name);
CHECK_STATUS("Error in getting kernel name", status);
name[len] = 0;
auto ret = symbols_map_->insert({addr, name});
if (ret.second == false) {
delete[] ret.first->second;
ret.first->second = name;
}
}
return HSA_STATUS_SUCCESS;
}
hsa_status_t HsaRsrcFactory::hsa_executable_freeze_interceptor(hsa_executable_t executable, const char *options) {
std::lock_guard<mutex_t> lck(mutex_);
if (symbols_map_ == NULL) symbols_map_ = new symbols_map_t;
hsa_status_t status = hsa_api_.hsa_executable_iterate_symbols(executable, executable_symbols_cb, NULL);
CHECK_STATUS("Error in iterating executable symbols", status);
return hsa_api_.hsa_executable_freeze(executable, options);;
}
std::atomic<HsaRsrcFactory*> HsaRsrcFactory::instance_{};
HsaRsrcFactory::mutex_t HsaRsrcFactory::mutex_;
HsaRsrcFactory::timestamp_t HsaRsrcFactory::timeout_ns_ = HsaTimer::TIMESTAMP_MAX;
hsa_pfn_t HsaRsrcFactory::hsa_api_{};
bool HsaRsrcFactory::executable_tracking_on_ = false;
HsaRsrcFactory::symbols_map_t* HsaRsrcFactory::symbols_map_ = NULL;
} // namespace util
+105 -1
Просмотреть файл
@@ -35,6 +35,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <atomic>
#include <iostream>
@@ -94,6 +95,8 @@ struct hsa_pfn_t {
decltype(hsa_executable_load_agent_code_object)* hsa_executable_load_agent_code_object;
decltype(hsa_executable_freeze)* hsa_executable_freeze;
decltype(hsa_executable_get_symbol)* hsa_executable_get_symbol;
decltype(hsa_executable_symbol_get_info)* hsa_executable_symbol_get_info;
decltype(hsa_executable_iterate_symbols)* hsa_executable_iterate_symbols;
decltype(hsa_system_get_info)* hsa_system_get_info;
decltype(hsa_system_get_major_extension_table)* hsa_system_get_major_extension_table;
@@ -159,6 +162,11 @@ struct AgentInfo {
// Number of Shader Arrays Per Shader Engines in Gpu
uint32_t shader_arrays_per_se;
// SGPR/VGPR block sizes
uint32_t sgpr_block_dflt;
uint32_t sgpr_block_size;
uint32_t vgpr_block_size;
};
// HSA timer class
@@ -169,6 +177,12 @@ class HsaTimer {
static const timestamp_t TIMESTAMP_MAX = UINT64_MAX;
typedef long double freq_t;
enum time_id_t {
TIME_ID_CLOCK_REALTIME = 0,
TIME_ID_CLOCK_MONOTONIC = 1,
TIME_ID_NUMBER
};
HsaTimer(const hsa_pfn_t* hsa_api) : hsa_api_(hsa_api) {
timestamp_t sysclock_hz = 0;
hsa_status_t status = hsa_api_->hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sysclock_hz);
@@ -184,6 +198,11 @@ class HsaTimer {
return timestamp_t((freq_t)time / sysclock_factor_);
}
// Method for timespec/ns conversion
static timestamp_t timespec_to_ns(const timespec& time) {
return ((timestamp_t)time.tv_sec * 1000000000) + time.tv_nsec;
}
// Return timestamp in 'ns'
timestamp_t timestamp_ns() const {
timestamp_t sysclock;
@@ -192,6 +211,54 @@ class HsaTimer {
return sysclock_to_ns(sysclock);
}
// Return time in 'ns'
static timestamp_t clocktime_ns(clockid_t clock_id) {
timespec time;
clock_gettime(clock_id, &time);
return timespec_to_ns(time);
}
// Return pair of correlated values of profiling timestamp and time with
// correlation error for a given time ID and number of iterations
void correlated_pair_ns(time_id_t time_id, uint32_t iters,
timestamp_t* timestamp_v, timestamp_t* time_v, timestamp_t* error_v) const {
clockid_t clock_id = 0;
switch (clock_id) {
case TIME_ID_CLOCK_REALTIME:
clock_id = CLOCK_REALTIME;
break;
case TIME_ID_CLOCK_MONOTONIC:
clock_id = CLOCK_MONOTONIC;
break;
default:
CHECK_STATUS("internal error: invalid time_id", HSA_STATUS_ERROR);
}
std::vector<timestamp_t> ts_vec(iters);
std::vector<timespec> tm_vec(iters);
const uint32_t steps = iters - 1;
for (uint32_t i = 0; i < iters; ++i) {
hsa_api_->hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP, &ts_vec[i]);
clock_gettime(clock_id, &tm_vec[i]);
}
const timestamp_t ts_base = sysclock_to_ns(ts_vec.front());
const timestamp_t tm_base = timespec_to_ns(tm_vec.front());
const timestamp_t error = (ts_vec.back() - ts_vec.front()) / (2 * steps);
timestamp_t ts_accum = 0;
timestamp_t tm_accum = 0;
for (uint32_t i = 0; i < iters; ++i) {
ts_accum += (ts_vec[i] - ts_base);
tm_accum += (timespec_to_ns(tm_vec[i]) - tm_base);
}
*timestamp_v = (ts_accum / iters) + ts_base + error;
*time_v = (tm_accum / iters) + tm_base;
*error_v = error;
}
private:
// Timestamp frequency factor
freq_t sysclock_factor_;
@@ -293,7 +360,7 @@ class HsaRsrcFactory {
uint8_t* AllocateCmdMemory(const AgentInfo* agent_info, size_t size);
// Wait signal
void SignalWait(const hsa_signal_t& signal) const;
hsa_signal_value_t SignalWait(const hsa_signal_t& signal, const hsa_signal_value_t& signal_value) const;
// Wait signal with signal value restore
void SignalWaitRestore(const hsa_signal_t& signal, const hsa_signal_value_t& signal_value) const;
@@ -322,6 +389,11 @@ class HsaRsrcFactory {
static uint64_t Submit(hsa_queue_t* queue, const void* packet);
static uint64_t Submit(hsa_queue_t* queue, const void* packet, size_t size_bytes);
// Enable executables loading tracking
static bool IsExecutableTracking() { return executable_tracking_on_; }
static void EnableExecutableTracking(HsaApiTable* table);
static const char* GetKernelName(uint64_t addr);
// Initialize HSA API table
void static InitHsaApiTable(HsaApiTable* table);
static const hsa_pfn_t* HsaApi() { return &hsa_api_; }
@@ -346,6 +418,27 @@ class HsaRsrcFactory {
if (instance_ != NULL) Instance().timeout_ = Instance().timer_->ns_to_sysclock(time);
}
void CorrelateTime(HsaTimer::time_id_t time_id, uint32_t iters) {
timestamp_t timestamp_v = 0;
timestamp_t time_v = 0;
timestamp_t error_v = 0;
timer_->correlated_pair_ns(time_id, iters, &timestamp_v, &time_v, &error_v);
time_shift_[time_id] = time_v - timestamp_v;
time_error_[time_id] = error_v;
}
hsa_status_t GetTime(uint32_t time_id, timestamp_t value, uint64_t* time) {
if (time_id >= HsaTimer::TIME_ID_NUMBER) return HSA_STATUS_ERROR;
*time = value + time_shift_[time_id];
return HSA_STATUS_SUCCESS;
}
hsa_status_t GetTimestamp(uint32_t time_id, uint64_t value, timestamp_t* timestamp) {
if (time_id >= HsaTimer::TIME_ID_NUMBER) return HSA_STATUS_ERROR;
*timestamp = value - time_shift_[time_id];
return HSA_STATUS_SUCCESS;
}
private:
// System agents iterating callback
static hsa_status_t GetHsaAgentsCallback(hsa_agent_t agent, void* data);
@@ -386,6 +479,13 @@ class HsaRsrcFactory {
// System agents map
std::map<hsa_agent_handle_t, const AgentInfo*> agent_map_;
// Executables loading tracking
typedef std::map<uint64_t, const char*> symbols_map_t;
static symbols_map_t* symbols_map_;
static bool executable_tracking_on_;
static hsa_status_t hsa_executable_freeze_interceptor(hsa_executable_t executable, const char *options);
static hsa_status_t executable_symbols_cb(hsa_executable_t exec, hsa_executable_symbol_t symbol, void *data);
// HSA runtime API table
static hsa_pfn_t hsa_api_;
@@ -403,6 +503,10 @@ class HsaRsrcFactory {
// HSA timer
HsaTimer* timer_;
// Time shift array to support time conversion
timestamp_t time_shift_[HsaTimer::TIME_ID_NUMBER];
timestamp_t time_error_[HsaTimer::TIME_ID_NUMBER];
// CPU/kern-arg memory pools
hsa_amd_memory_pool_t *cpu_pool_;
hsa_amd_memory_pool_t *kern_arg_pool_;
+2 -2
Просмотреть файл
@@ -58,9 +58,9 @@ target_link_libraries ( ${TEST_LIB} ${ROCTRACER_TARGET} ${HSA_RUNTIME_LIB} c std
## Build HSA test
execute_process ( COMMAND sh -xc "if [ ! -e ${TEST_DIR}/hsa ] ; then git clone https://github.com/ROCmSoftwarePlatform/hsa-class.git ${TEST_DIR}/hsa; fi" )
execute_process ( COMMAND sh -xc "if [ -e ${TEST_DIR}/hsa ] ; then cd ${TEST_DIR}/hsa && git fetch origin && git checkout a3aabb5; fi" )
execute_process ( COMMAND sh -xc "if [ -e ${TEST_DIR}/hsa ] ; then cd ${TEST_DIR}/hsa && git fetch origin && git checkout 777c308; fi" )
set ( TEST_DIR ${HSA_TEST_DIR} )
add_subdirectory ( ${TEST_DIR} ${PROJECT_BINARY_DIR}/test/hsa )
## copying run script
execute_process ( COMMAND sh -xc "cp ${RUN_SCRIPT} ${PROJECT_BINARY_DIR}" )
execute_process ( COMMAND sh -xc "cp ${RUN_SCRIPT} ${PROJECT_BINARY_DIR}" )
+1 -2
Просмотреть файл
@@ -1,7 +1,6 @@
ROOT_PATH = ../..
LIB_PATH = $(ROOT_PATH)/build
ROC_LIBS = -L$(LIB_PATH) -lroctracer64
export LD_LIBRARY_PATH=$(LIB_PATH)
ROC_LIBS = -Wl,--rpath,${LIB_PATH} $(LIB_PATH)/libroctracer64.so $(LIB_PATH)/libroctx64.so
HIP_PATH?= $(wildcard /opt/rocm/hip)
ifeq (,$(HIP_PATH))
+11 -1
Просмотреть файл
@@ -23,8 +23,10 @@ THE SOFTWARE.
#include <iostream>
// hip header file
#include "hip/hip_runtime.h"
#include <hip/hip_runtime.h>
#include "roctracer_ext.h"
// roctx header file
#include <inc/roctx.h>
#define WIDTH 1024
@@ -94,15 +96,23 @@ int main() {
hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice);
roctracer_mark("before HIP LaunchKernel");
roctxMark("before hipLaunchKernel");
roctxRangePush("hipLaunchKernel");
// Lauching kernel from host
hipLaunchKernelGGL(matrixTranspose, dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y),
dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, gpuTransposeMatrix,
gpuMatrix, WIDTH);
roctracer_mark("after HIP LaunchKernel");
roctxMark("after hipLaunchKernel");
// Memory transfer from device to host
roctxRangePush("hipMemcpy");
hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost);
roctxRangePop(); // for "hipMemcpy"
roctxRangePop(); // for "hipLaunchKernel"
// CPU MatrixTranspose computation
matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH);
+20 -11
Просмотреть файл
@@ -1,8 +1,7 @@
ROOT_PATH = ../..
LIB_PATH = $(ROOT_PATH)/build
ROC_LIBS = -L$(LIB_PATH) -lroctracer64 -lroctx64
HSA_KMT_INC_PATH ?= /opt/rocm/include
export LD_LIBRARY_PATH=$(LIB_PATH)
ROC_LIBS = -Wl,--rpath,${LIB_PATH} $(LIB_PATH)/libroctracer64.so $(LIB_PATH)/libroctx64.so
ROCM_INC_PATH ?= /opt/rocm/include
HIP_VDI ?= 0
ITERATIONS ?= 100
@@ -12,27 +11,37 @@ ifeq (,$(HIP_PATH))
endif
HIPCC=$(HIP_PATH)/bin/hipcc
TARGET=hcc
SOURCES = MatrixTranspose.cpp
OBJECTS = $(SOURCES:.cpp=.o)
EXECUTABLE=./MatrixTranspose
OBJECTS = MatrixTranspose.o
FLAGS =-g -I$(ROOT_PATH) -I$(ROOT_PATH)/inc -I${ROCM_INC_PATH}/hsa -I${ROCM_INC_PATH} -DLOCAL_BUILD=1 -DHIP_VDI=${HIP_VDI} -DITERATIONS=$(ITERATIONS) -DAMD_INTERNAL_BUILD=1
ifeq ($(C_TEST), 1)
COMP=gcc
SOURCES = MatrixTranspose.c
FLAGS += -DHIP_TEST=0 -D__HIP_PLATFORM_HCC__=1 -I${ROCM_INC_PATH}/hcc
else
COMP=$(HIPCC)
SOURCES = MatrixTranspose.cpp
FLAGS += -DHIP_TEST=1
endif
ifeq ($(MGPU_TEST), 1)
FLAGS += -DMGPU_TEST=1
endif
.PHONY: test
all: clean $(EXECUTABLE)
CXXFLAGS =-g -I$(ROOT_PATH) -I$(ROOT_PATH)/inc -I${HSA_KMT_INC_PATH} -DLOCAL_BUILD=1 -DHIP_VDI=${HIP_VDI} -DITERATIONS=$(ITERATIONS)
CXX=$(HIPCC)
$(OBJECTS): $(SOURCES)
$(COMP) $(FLAGS) -c -o $@ $<
$(EXECUTABLE): $(OBJECTS)
$(HIPCC) $(OBJECTS) -o $@ $(ROC_LIBS)
test: $(EXECUTABLE)
$(EXECUTABLE)
LD_PRELOAD="$(LIB_PATH)/libkfdwrapper64.so librocprofiler64.so" $(EXECUTABLE)
clean:
rm -f $(EXECUTABLE)
Символическая ссылка
+1
Просмотреть файл
@@ -0,0 +1 @@
MatrixTranspose.cpp
+19 -8
Просмотреть файл
@@ -238,20 +238,22 @@ void api_callback(
fprintf(stdout, "<rocTX \"%s\">\n", data->args.message);
return;
}
if (domain == ACTIVITY_DOMAIN_KFD_API) {
const kfd_api_data_t* data = (const kfd_api_data_t*)(callback_data);
fprintf(stdout, "<%s id(%u)\tcorrelation_id(%lu) %s pid(%u) tid(%u)> \n",
fprintf(stdout, "<%s id(%u)\tcorrelation_id(%lu) %s> \n",
roctracer_op_string(ACTIVITY_DOMAIN_KFD_API, cid, 0),
cid,
data->correlation_id,
(data->phase == ACTIVITY_API_PHASE_ENTER) ? "on-enter" : "on-exit",
GetPid(),
GetTid()
);
return;
}
const hip_api_data_t* data = (const hip_api_data_t*)(callback_data);
fprintf(stdout, "<%s id(%u)\tcorrelation_id(%lu) %s> ",
roctracer_op_string(ACTIVITY_DOMAIN_HIP_API, cid, 0),
cid,
data->correlation_id,
(data->phase == ACTIVITY_API_PHASE_ENTER) ? "on-enter" : "on-exit");
if (data->phase == ACTIVITY_API_PHASE_ENTER) {
switch (cid) {
case HIP_API_ID_hipMemcpy:
@@ -326,7 +328,7 @@ void activity_callback(const char* begin, const char* end, void* arg) {
record->end_ns
);
if ((record->domain == ACTIVITY_DOMAIN_HIP_API) || (record->domain == ACTIVITY_DOMAIN_KFD_API)) {
fprintf(stdout, " process_id(%u) thread_id(%u)\n",
fprintf(stdout, " process_id(%u) thread_id(%u)",
record->process_id,
record->thread_id
);
@@ -335,7 +337,13 @@ void activity_callback(const char* begin, const char* end, void* arg) {
record->device_id,
record->queue_id
);
if (record->op == HIP_OP_ID_COPY) fprintf(stdout, " bytes(0x%zx)\n", record->bytes);
if (record->op == HIP_OP_ID_COPY) fprintf(stdout, " bytes(0x%zx)", record->bytes);
} else if (record->domain == ACTIVITY_DOMAIN_HSA_OPS) {
fprintf(stdout, " se(%u) cycle(%lu) pc(%lx)",
record->pc_sample.se,
record->pc_sample.cycle,
record->pc_sample.pc
);
} else if (record->domain == ACTIVITY_DOMAIN_EXT_API) {
fprintf(stdout, " external_id(%lu)\n",
record->external_id
@@ -365,9 +373,10 @@ void init_tracing() {
// Enable HIP activity tracing
ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_API));
ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HCC_OPS));
// Enable PC sampling
ROCTRACER_CALL(roctracer_enable_op_activity(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_PCSAMPLE));
// Enable KFD API tracing
ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_KFD_API, api_callback, NULL));
ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_KFD_API));
// Enable rocTX
ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_ROCTX, api_callback, NULL));
}
@@ -385,7 +394,9 @@ void stop_tracing() {
ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HIP_API));
ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HIP_API));
ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HCC_OPS));
ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_KFD_API));
ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HSA_OPS));
ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_KFD_API));
ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_ROCTX));
ROCTRACER_CALL(roctracer_flush_activity());
printf("# STOP #############################\n");
}
+1
Просмотреть файл
@@ -90,6 +90,7 @@ eval_test "tool SYS/HSA test" ./test/MatrixTranspose "test/MatrixTranspose_sys_h
# Tracing control <delay:length:rate>
export ROCTRACER_DOMAIN="hip"
eval_test "tool period test" "ROCP_CTRL_RATE=10:100000:1000000 ./test/MatrixTranspose" "test/MatrixTranspose_hip_trace"
eval_test "tool flushing test" "ROCP_FLUSH_RATE=100000 ./test/MatrixTranspose" "test/MatrixTranspose_hip_trace_flush"
# HSA test
export ROCTRACER_DOMAIN="hsa"
+258 -171
Просмотреть файл
@@ -37,14 +37,14 @@ THE SOFTWARE.
#include <inc/roctracer_hsa.h>
#include <inc/roctracer_hip.h>
#include <inc/roctracer_hcc.h>
#ifdef KFD_WRAPPER
#include <inc/roctracer_kfd.h>
#endif
#include <inc/ext/hsa_rt_utils.hpp>
#include <src/core/loader.h>
#include <src/core/trace_buffer.h>
#include <util/xml.h>
#include "util/hsa_rsrc_factory.h"
#define PUBLIC_API __attribute__((visibility("default")))
#define CONSTRUCTOR_API __attribute__((constructor))
#define DESTRUCTOR_API __attribute__((destructor))
@@ -59,9 +59,12 @@ THE SOFTWARE.
} \
} while (0)
#ifndef onload_debug
#define onload_debug false
#endif
#define ONLOAD_TRACE(str) \
if (getenv("ROCP_ONLOAD_TRACE")) do { \
std::cout << "PID(" << GetPid() << "): TRACER_TOOL::" << __FUNCTION__ << " " << str << std::endl << std::flush; \
} while(0);
#define ONLOAD_TRACE_BEG() ONLOAD_TRACE("begin")
#define ONLOAD_TRACE_END() ONLOAD_TRACE("end")
typedef hsa_rt_utils::Timer::timestamp_t timestamp_t;
hsa_rt_utils::Timer* timer = NULL;
@@ -74,8 +77,13 @@ bool trace_hsa_activity = false;
bool trace_hip_api = false;
bool trace_hip_activity = false;
bool trace_kfd = false;
bool trace_pcs = false;
// API trace vector
std::vector<std::string> hsa_api_vec;
std::vector<std::string> kfd_api_vec;
LOADER_INSTANTIATE();
TRACE_BUFFER_INSTANTIATE();
// Global output file handle
FILE* roctx_file_handle = NULL;
@@ -84,18 +92,25 @@ FILE* hsa_async_copy_file_handle = NULL;
FILE* hip_api_file_handle = NULL;
FILE* hcc_activity_file_handle = NULL;
FILE* kfd_api_file_handle = NULL;
FILE* pc_sample_file_handle = NULL;
void close_output_file(FILE* file_handle);
void close_file_handles() {
if (roctx_file_handle) close_output_file(roctx_file_handle);
if (hsa_api_file_handle) close_output_file(hsa_api_file_handle);
if (hsa_async_copy_file_handle) close_output_file(hsa_async_copy_file_handle);
if (hip_api_file_handle) close_output_file(hip_api_file_handle);
if (hcc_activity_file_handle) close_output_file(hcc_activity_file_handle);
if (kfd_api_file_handle) close_output_file(kfd_api_file_handle);
if (pc_sample_file_handle) close_output_file(pc_sample_file_handle);
}
static inline uint32_t GetPid() { return syscall(__NR_getpid); }
static inline uint32_t GetTid() { return syscall(__NR_gettid); }
// Error handler
void fatal(const std::string msg) {
fflush(roctx_file_handle);
fflush(hsa_api_file_handle);
fflush(hsa_async_copy_file_handle);
fflush(hip_api_file_handle);
fflush(hcc_activity_file_handle);
fflush(kfd_api_file_handle);
close_file_handles();
fflush(stdout);
fprintf(stderr, "%s\n\n", msg.c_str());
fflush(stderr);
@@ -121,19 +136,19 @@ void* control_thr_fun(void*) {
const uint32_t len_us = control_len_us % 1000000;
const uint32_t dist_sec = control_dist_us / 1000000;
const uint32_t dist_us = control_dist_us % 1000000;
bool start = true;
bool to_start = true;
sleep(delay_sec);
usleep(delay_us);
while (1) {
if (start) {
start = false;
if (to_start) {
to_start = false;
roctracer_start();
sleep(len_sec);
usleep(len_us);
} else {
start = true;
to_start = true;
roctracer_stop();
sleep(dist_sec);
usleep(dist_us);
@@ -141,6 +156,20 @@ void* control_thr_fun(void*) {
}
}
// Flushing control thread
uint32_t control_flush_us = 0;
void* flush_thr_fun(void*) {
const uint32_t dist_sec = control_flush_us / 1000000;
const uint32_t dist_us = control_flush_us % 1000000;
while (1) {
sleep(dist_sec);
usleep(dist_us);
roctracer_flush_activity();
roctracer::TraceBufferBase::FlushAll();
}
}
///////////////////////////////////////////////////////////////////////////////////////////////////////
// rocTX annotation tracing
@@ -148,7 +177,7 @@ struct roctx_trace_entry_t {
uint32_t valid;
uint32_t type;
uint32_t cid;
timestamp_t timestamp;
timestamp_t time;
uint32_t pid;
uint32_t tid;
const char* message;
@@ -165,12 +194,16 @@ static inline void roctx_callback_fun(
uint32_t tid,
const char* message)
{
const timestamp_t timestamp = timer->timestamp_fn_ns();
#if ROCTX_CLOCK_TIME
const timestamp_t time = HsaTimer::clocktime_ns(HsaTimer::TIME_ID_CLOCK_MONOTONIC);
#else
const timestamp_t time = timer->timestamp_fn_ns();
#endif
roctx_trace_entry_t* entry = roctx_trace_buffer.GetEntry();
entry->valid = roctracer::TRACE_ENTRY_COMPL;
entry->type = 0;
entry->cid = cid;
entry->timestamp = timestamp;
entry->time = time;
entry->pid = GetPid();
entry->tid = tid;
entry->message = (message != NULL) ? strdup(message) : NULL;
@@ -187,25 +220,26 @@ void roctx_api_callback(
roctx_callback_fun(domain, cid, GetTid(), data->args.message);
}
// Start/Stop callbacks
void roctx_range_stack_callback(const roctx_range_data_t* data, void* arg) {
const bool* is_stop_ptr = (bool*)arg;
const uint32_t cid = (*is_stop_ptr == true) ? ROCTX_API_ID_roctxRangePop : ROCTX_API_ID_roctxRangePushA;
const char* message = (*is_stop_ptr == true) ? NULL : data->message;
roctx_callback_fun(ACTIVITY_DOMAIN_ROCTX, cid, data->tid, message);
// rocTX Start/Stop callbacks
void roctx_range_start_callback(const roctx_range_data_t* data, void* arg) {
roctx_callback_fun(ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_roctxRangePushA, data->tid, data->message);
}
void stop_callback() {
bool is_stop = true;
roctracer::RocTxLoader::Instance().RangeStackIterate(roctx_range_stack_callback, (void*)&is_stop);
}
void start_callback() {
bool is_stop = false;
roctracer::RocTxLoader::Instance().RangeStackIterate(roctx_range_stack_callback, (void*)&is_stop);
void roctx_range_stop_callback(const roctx_range_data_t* data, void* arg) {
roctx_callback_fun(ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_roctxRangePop, data->tid, NULL);
}
void start_callback() { roctracer::RocTxLoader::Instance().RangeStackIterate(roctx_range_start_callback, NULL); }
void stop_callback() { roctracer::RocTxLoader::Instance().RangeStackIterate(roctx_range_stop_callback, NULL); }
// rocTX buffer flush function
void roctx_flush_cb(roctx_trace_entry_t* entry) {
#if ROCTX_CLOCK_TIME
timestamp_t timestamp = 0;
HsaRsrcFactory::Instance().GetTimestamp(HsaTimer::TIME_ID_CLOCK_MONOTONIC, entry->time, &timestamp);
#else
const timestamp_t timestamp = entry->time;
#endif
std::ostringstream os;
os << entry->timestamp << " " << entry->pid << ":" << entry->tid << " " << entry->cid;
os << timestamp << " " << entry->pid << ":" << entry->tid << " " << entry->cid;
if (entry->message != NULL) os << ":\"" << entry->message << "\"";
else os << ":\"\"";
fprintf(roctx_file_handle, "%s\n", os.str().c_str()); fflush(roctx_file_handle);
@@ -270,6 +304,9 @@ void hsa_activity_callback(
index++;
}
///////////////////////////////////////////////////////////////////////////////////////////////////////
// HIP API tracing
struct hip_api_trace_entry_t {
uint32_t valid;
uint32_t type;
@@ -284,9 +321,6 @@ struct hip_api_trace_entry_t {
void* ptr;
};
///////////////////////////////////////////////////////////////////////////////////////////////////////
// HIP API tracing
void hip_api_flush_cb(hip_api_trace_entry_t* entry);
roctracer::TraceBuffer<hip_api_trace_entry_t>::flush_prm_t hip_flush_prm[1] = {{0, hip_api_flush_cb}};
roctracer::TraceBuffer<hip_api_trace_entry_t> hip_api_trace_buffer("HIP", 0x200000, hip_flush_prm, 1);
@@ -413,21 +447,27 @@ void hip_api_flush_cb(hip_api_trace_entry_t* entry) {
// Activity tracing callback
// hipMalloc id(3) correlation_id(1): begin_ns(1525888652762640464) end_ns(1525888652762877067)
void hcc_activity_callback(const char* begin, const char* end, void* arg) {
void pool_activity_callback(const char* begin, const char* end, void* arg) {
const roctracer_record_t* record = reinterpret_cast<const roctracer_record_t*>(begin);
const roctracer_record_t* end_record = reinterpret_cast<const roctracer_record_t*>(end);
while (record < end_record) {
const char * name = roctracer_op_string(record->domain, record->op, record->kind);
if (record->domain == ACTIVITY_DOMAIN_HCC_OPS) {
fprintf(hcc_activity_file_handle, "%lu:%lu %d:%lu %s:%lu\n",
record->begin_ns, record->end_ns, record->device_id, record->queue_id, name, record->correlation_id);
fflush(hcc_activity_file_handle);
} else {
#if 0
fprintf(hip_api_file_handle, "%lu:%lu %u:%u %s()\n",
record->begin_ns, record->end_ns, record->process_id, record->thread_id, name);
#endif
switch(record->domain) {
case ACTIVITY_DOMAIN_HCC_OPS:
fprintf(hcc_activity_file_handle, "%lu:%lu %d:%lu %s:%lu\n",
record->begin_ns, record->end_ns,
record->device_id, record->queue_id,
name, record->correlation_id);
fflush(hcc_activity_file_handle);
break;
case ACTIVITY_DOMAIN_HSA_OPS:
if (record->op == HSA_OP_ID_PCSAMPLE) {
fprintf(pc_sample_file_handle, "%u %lu 0x%lx %s\n",
record->pc_sample.se, record->pc_sample.cycle, record->pc_sample.pc, name);
fflush(pc_sample_file_handle);
}
break;
}
ROCTRACER_CALL(roctracer_next_record(record, &record));
}
@@ -437,7 +477,7 @@ void hcc_activity_callback(const char* begin, const char* end, void* arg) {
// KFD API tracing
// KFD API callback function
#ifdef KFD_WRAPPER
static thread_local bool in_kfd_api_callback = false;
void kfd_api_callback(
uint32_t domain,
uint32_t cid,
@@ -445,6 +485,8 @@ void kfd_api_callback(
void* arg)
{
(void)arg;
if (in_kfd_api_callback) return;
in_kfd_api_callback = true;
const kfd_api_data_t* data = reinterpret_cast<const kfd_api_data_t*>(callback_data);
if (data->phase == ACTIVITY_API_PHASE_ENTER) {
kfd_begin_timestamp = timer->timestamp_fn_ns();
@@ -454,8 +496,8 @@ void kfd_api_callback(
os << kfd_begin_timestamp << ":" << end_timestamp << " " << GetPid() << ":" << GetTid() << " " << kfd_api_data_pair_t(cid, *data);
fprintf(kfd_api_file_handle, "%s\n", os.str().c_str());
}
in_kfd_api_callback = false;
}
#endif
///////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -523,14 +565,73 @@ FILE* open_output_file(const char* prefix, const char* name) {
}
void close_output_file(FILE* file_handle) {
if ((file_handle != NULL) && (file_handle != stdout)) fclose(file_handle);
if (file_handle != NULL) {
fflush(file_handle);
if (file_handle != stdout) fclose(file_handle);
}
}
// HSA-runtime tool on-load method
extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count,
const char* const* failed_tool_names) {
if (onload_debug) { printf("TOOL OnLoad\n"); fflush(stdout); }
timer = new hsa_rt_utils::Timer(table->core_->hsa_system_get_info_fn);
// Allocating tracing pool
void open_tracing_pool() {
if (roctracer_default_pool() == NULL) {
roctracer_properties_t properties{};
properties.buffer_size = 0x80000;
properties.buffer_callback_fun = pool_activity_callback;
ROCTRACER_CALL(roctracer_open_pool(&properties));
}
}
// Flush tracing pool
void close_tracing_pool() {
if (roctracer_default_pool() != NULL) {
ROCTRACER_CALL(roctracer_flush_activity());
}
}
// tool library is loaded
static bool is_loaded = false;
// tool unload method
void tool_unload() {
ONLOAD_TRACE("begin, loaded(" << is_loaded << ")");
if (is_loaded == false) return;
is_loaded = false;
if (trace_roctx) {
ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_ROCTX));
}
if (trace_hsa_api) {
ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HSA_API));
}
if (trace_hsa_activity || trace_pcs) {
ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HSA_OPS));
}
if (trace_hip_api || trace_hip_activity) {
ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HIP_API));
ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HIP_API));
ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HCC_OPS));
}
if (trace_kfd) {
ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_KFD_API));
}
// Flush tracing pool
close_tracing_pool();
roctracer::TraceBufferBase::FlushAll();
close_file_handles();
ONLOAD_TRACE_END();
}
// tool load method
void tool_load() {
ONLOAD_TRACE("begin, loaded(" << is_loaded << ")");
if (is_loaded == true) return;
is_loaded = true;
roctracer::TraceBufferBase::StartWorkerThreadAll();
// Output file
const char* output_prefix = getenv("ROCP_OUTPUT_DIR");
@@ -571,11 +672,12 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
if (std::string(trace_domain).find("kfd") != std::string::npos) {
trace_kfd = true;
}
}
// API trace vector
std::vector<std::string> hsa_api_vec;
std::vector<std::string> kfd_api_vec;
// PC sampling enabling
if (std::string(trace_domain).find("pcs") != std::string::npos) {
trace_pcs = true;
}
}
printf("ROCTracer (pid=%d): ", (int)GetPid()); fflush(stdout);
@@ -648,6 +750,84 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_ROCTX, roctx_api_callback, NULL));
}
const char* ctrl_str = getenv("ROCP_CTRL_RATE");
if (ctrl_str != NULL) {
uint32_t ctrl_delay = 0;
uint32_t ctrl_len = 0;
uint32_t ctrl_rate = 0;
sscanf(ctrl_str, "%d:%d:%d", &ctrl_delay, &ctrl_len, &ctrl_rate);
if (ctrl_len > ctrl_rate) {
fprintf(stderr, "ROCTracer: control length value (%u) > rate value (%u)\n", ctrl_len, ctrl_rate);
abort();
}
control_dist_us = ctrl_rate - ctrl_len;
control_len_us = ctrl_len;
control_delay_us = ctrl_delay;
roctracer_stop();
if (ctrl_delay != UINT32_MAX) {
fprintf(stdout, "ROCTracer: trace control: delay(%uus), length(%uus), rate(%uus)\n", ctrl_delay, ctrl_len, ctrl_rate); fflush(stdout);
pthread_t thread;
pthread_attr_t attr;
int err = pthread_attr_init(&attr);
if (err) { errno = err; perror("pthread_attr_init"); abort(); }
err = pthread_create(&thread, &attr, control_thr_fun, NULL);
} else {
fprintf(stdout, "ROCTracer: trace start disabled\n"); fflush(stdout);
}
}
const char* flush_str = getenv("ROCP_FLUSH_RATE");
if (flush_str != NULL) {
sscanf(flush_str, "%d", &control_flush_us);
if (control_flush_us == 0) {
fprintf(stderr, "ROCTracer: control flush rate bad value\n");
abort();
}
fprintf(stdout, "ROCTracer: trace control flush rate(%uus)\n", control_flush_us); fflush(stdout);
pthread_t thread;
pthread_attr_t attr;
int err = pthread_attr_init(&attr);
if (err) { errno = err; perror("pthread_attr_init"); abort(); }
err = pthread_create(&thread, &attr, flush_thr_fun, NULL);
}
// Enable KFD API callbacks/activity
if (trace_kfd) {
kfd_api_file_handle = open_output_file(output_prefix, "kfd_api_trace.txt");
// initialize KFD tracing
roctracer_set_properties(ACTIVITY_DOMAIN_KFD_API, NULL);
printf(" KFD-trace(");
if (kfd_api_vec.size() != 0) {
for (unsigned i = 0; i < kfd_api_vec.size(); ++i) {
uint32_t cid = KFD_API_ID_NUMBER;
const char* api = kfd_api_vec[i].c_str();
ROCTRACER_CALL(roctracer_op_code(ACTIVITY_DOMAIN_KFD_API, api, &cid, NULL));
ROCTRACER_CALL(roctracer_enable_op_callback(ACTIVITY_DOMAIN_KFD_API, cid, kfd_api_callback, NULL));
printf(" %s", api);
}
} else {
ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_KFD_API, kfd_api_callback, NULL));
}
printf(")\n");
}
ONLOAD_TRACE_END();
}
// HSA-runtime tool on-load method
extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count,
const char* const* failed_tool_names) {
ONLOAD_TRACE_BEG();
timer = new hsa_rt_utils::Timer(table->core_->hsa_system_get_info_fn);
const char* output_prefix = getenv("ROCP_OUTPUT_DIR");
// Enable HSA API callbacks/activity
if (trace_hsa_api) {
hsa_api_file_handle = open_output_file(output_prefix, "hsa_api_trace.txt");
@@ -660,7 +840,7 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
for (unsigned i = 0; i < hsa_api_vec.size(); ++i) {
uint32_t cid = HSA_API_ID_NUMBER;
const char* api = hsa_api_vec[i].c_str();
ROCTRACER_CALL(roctracer_op_code(ACTIVITY_DOMAIN_HSA_API, api, &cid));
ROCTRACER_CALL(roctracer_op_code(ACTIVITY_DOMAIN_HSA_API, api, &cid, NULL));
ROCTRACER_CALL(roctracer_enable_op_callback(ACTIVITY_DOMAIN_HSA_API, cid, hsa_api_callback, NULL));
printf(" %s", api);
}
@@ -684,145 +864,52 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
roctracer_set_properties(ACTIVITY_DOMAIN_HSA_OPS, &ops_properties);
fprintf(stdout, " HSA-activity-trace()\n"); fflush(stdout);
ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HSA_OPS));
ROCTRACER_CALL(roctracer_enable_op_activity(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_COPY));
}
// Enable HIP API callbacks/activity
if (trace_hip_api || trace_hip_activity) {
hip_api_file_handle = open_output_file(output_prefix, "hip_api_trace.txt");
hcc_activity_file_handle = open_output_file(output_prefix, "hcc_ops_trace.txt");
fprintf(stdout, " HIP-trace()\n"); fflush(stdout);
// roctracer properties
roctracer_set_properties(ACTIVITY_DOMAIN_HIP_API, (void*)mark_api_callback);
// Allocating tracing pool
roctracer_properties_t properties{};
properties.buffer_size = 0x80000;
properties.buffer_callback_fun = hcc_activity_callback;
ROCTRACER_CALL(roctracer_open_pool(&properties));
open_tracing_pool();
// Enable tracing
if (trace_hip_api) {
hip_api_file_handle = open_output_file(output_prefix, "hip_api_trace.txt");
ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HIP_API, hip_api_callback, NULL));
ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_API));
}
if (trace_hip_activity) {
hcc_activity_file_handle = open_output_file(output_prefix, "hcc_ops_trace.txt");
ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HCC_OPS));
}
}
const char* ctrl_str = getenv("ROCP_CTRL_RATE");
if (ctrl_str != NULL) {
uint32_t ctrl_delay = 0;
uint32_t ctrl_len = 0;
uint32_t ctrl_rate = 0;
int ret = sscanf(ctrl_str, "%d:%d:%d", &ctrl_delay, &ctrl_len, &ctrl_rate);
if (ret != 3) {
fprintf(stderr, "ROCTracer: control rate value invalid 'delay:length:rate': '%s'\n", ctrl_str);
abort();
}
if (ctrl_len > ctrl_rate) {
fprintf(stderr, "ROCTracer: control length value (%u) > rate value (%u)\n", ctrl_len, ctrl_rate);
abort();
}
control_dist_us = ctrl_rate - ctrl_len;
control_len_us = ctrl_len;
control_delay_us = ctrl_delay;
fprintf(stdout, "ROCTracer: trace control: delay(%uus), length(%uus), rate(%uus)\n", ctrl_delay, ctrl_len, ctrl_rate); fflush(stdout);
roctracer_stop();
pthread_t thread;
pthread_attr_t attr;
int err = pthread_attr_init(&attr);
if (err) { errno = err; perror("pthread_attr_init"); abort(); }
err = pthread_create(&thread, &attr, control_thr_fun, NULL);
// Enable PC sampling
if (trace_pcs) {
fprintf(stdout, " PCS-trace()\n"); fflush(stdout);
open_tracing_pool();
pc_sample_file_handle = open_output_file(output_prefix, "pc_sample_trace.txt");
ROCTRACER_CALL(roctracer_enable_op_activity(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_PCSAMPLE));
}
#ifdef KFD_WRAPPER
// Enable KFD API callbacks/activity
if (trace_kfd) {
kfd_api_file_handle = open_output_file(output_prefix, "kfd_api_trace.txt");
// initialize KFD tracing
roctracer_set_properties(ACTIVITY_DOMAIN_KFD_API, NULL);
printf(" KFD-trace(");
if (kfd_api_vec.size() != 0) {
for (unsigned i = 0; i < kfd_api_vec.size(); ++i) {
uint32_t cid = KFD_API_ID_NUMBER;
const char* api = kfd_api_vec[i].c_str();
ROCTRACER_CALL(roctracer_op_code(ACTIVITY_DOMAIN_KFD_API, api, &cid));
ROCTRACER_CALL(roctracer_enable_op_callback(ACTIVITY_DOMAIN_KFD_API, cid, kfd_api_callback, NULL));
printf(" %s", api);
}
} else {
ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_KFD_API, kfd_api_callback, NULL));
}
printf(")\n");
}
#endif
if (onload_debug) { printf("TOOL OnLoad end\n"); fflush(stdout); }
return roctracer_load(table, runtime_version, failed_tool_count, failed_tool_names);
}
// tool unload method
void tool_unload(bool destruct) {
static bool is_unloaded = false;
if (onload_debug) { printf("TOOL tool_unload (%d, %d)\n", (int)destruct, (int)is_unloaded); fflush(stdout); }
if (destruct == false) return;
if (is_unloaded == true) return;
is_unloaded = true;
roctracer_unload(destruct);
if (trace_roctx) {
ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_ROCTX));
roctx_trace_buffer.Flush();
close_output_file(roctx_file_handle);
}
if (trace_hsa_api) {
ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HSA_API));
hsa_api_trace_buffer.Flush();
close_output_file(hsa_api_file_handle);
}
if (trace_hsa_activity) {
ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HSA_OPS));
close_output_file(hsa_async_copy_file_handle);
}
if (trace_hip_api || trace_hip_activity) {
ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HIP_API));
ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HIP_API));
ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HCC_OPS));
ROCTRACER_CALL(roctracer_flush_activity());
ROCTRACER_CALL(roctracer_close_pool());
hip_api_trace_buffer.Flush();
close_output_file(hip_api_file_handle);
close_output_file(hcc_activity_file_handle);
}
if (trace_kfd) {
ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_KFD_API));
fclose(kfd_api_file_handle);
}
if (onload_debug) { printf("TOOL tool_unload end\n"); fflush(stdout); }
ONLOAD_TRACE_END();
return true;
}
// HSA-runtime on-unload method
extern "C" PUBLIC_API void OnUnload() {
if (onload_debug) { printf("TOOL OnUnload\n"); fflush(stdout); }
tool_unload(false);
if (onload_debug) { printf("TOOL OnUnload end\n"); fflush(stdout); }
ONLOAD_TRACE("");
}
extern "C" CONSTRUCTOR_API void constructor() {
if (onload_debug) { printf("TOOL constructor ...end\n"); fflush(stdout); }
ONLOAD_TRACE_BEG();
tool_load();
ONLOAD_TRACE_END();
}
extern "C" DESTRUCTOR_API void destructor() {
if (onload_debug) { printf("TOOL destructor\n"); fflush(stdout); }
tool_unload(true);
if (onload_debug) { printf("TOOL destructor end\n"); fflush(stdout); }
ONLOAD_TRACE_BEG();
tool_unload();
ONLOAD_TRACE_END();
}