diff --git a/.gitignore b/.gitignore index cafda6d07e..446848fea5 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,8 @@ b build inc/hsa_prof_str.h inc/kfd_prof_str.h +inc/basic_ostream_ops.h +inc/kfd_ostream_ops.h test/hsa test/MatrixTranspose/MatrixTranspose test/MatrixTranspose_test/MatrixTranspose diff --git a/CMakeLists.txt b/CMakeLists.txt index e90a4f7924..eb65300fd8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,6 +28,9 @@ cmake_minimum_required ( VERSION 3.5.0 ) ## Verbose output. set ( CMAKE_VERBOSE_MAKEFILE TRUE CACHE BOOL "Verbose Output" FORCE ) +# Install prefix +set(CMAKE_INSTALL_PREFIX "/opt/rocm" CACHE PATH "Install path prefix, prepended onto install directories") + ## Set module name and project name. set ( ROCTRACER_NAME "roctracer" ) set ( ROCTRACER_TARGET "${ROCTRACER_NAME}64" ) @@ -43,17 +46,25 @@ include ( env ) ## Setup the package version. get_version ( "1.0.0" ) -message ( "-- LIB-VERSION: ${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH}" ) set ( BUILD_VERSION_MAJOR ${VERSION_MAJOR} ) set ( BUILD_VERSION_MINOR ${VERSION_MINOR} ) set ( BUILD_VERSION_PATCH ${VERSION_PATCH} ) -set ( LIB_VERSION_STRING "${BUILD_VERSION_MAJOR}.${BUILD_VERSION_MINOR}.${BUILD_VERSION_PATCH}" ) + if ( DEFINED VERSION_BUILD AND NOT ${VERSION_BUILD} STREQUAL "" ) message ( "VERSION BUILD DEFINED ${VERSION_BUILD}" ) set ( BUILD_VERSION_PATCH "${BUILD_VERSION_PATCH}-${VERSION_BUILD}" ) endif () -set ( BUILD_VERSION_STRING "${BUILD_VERSION_MAJOR}.${BUILD_VERSION_MINOR}.${BUILD_VERSION_PATCH}" ) + +set ( LIB_VERSION_MAJOR ${BUILD_VERSION_MAJOR} ) +set ( LIB_VERSION_MINOR ${BUILD_VERSION_MINOR} ) +if (DEFINED ENV{ROCM_LIBPATCH_VERSION}) + set (LIB_VERSION_PATCH $ENV{ROCM_LIBPATCH_VERSION} ) +else () + set (LIB_VERSION_PATCH ${BUILD_VERSION_PATCH} ) +endif() +set ( LIB_VERSION_STRING "${LIB_VERSION_MAJOR}.${LIB_VERSION_MINOR}.${LIB_VERSION_PATCH}" ) +message ( "-- LIB-VERSION: ${LIB_VERSION_MAJOR}.${LIB_VERSION_MINOR}.${LIB_VERSION_PATCH}" ) ## Set target and root/lib/test directory set ( TARGET_NAME "${ROCTRACER_TARGET}" ) @@ -66,53 +77,81 @@ include ( ${LIB_DIR}/CMakeLists.txt ) ## Set the VERSION and SOVERSION values set_property ( TARGET ${TARGET_NAME} PROPERTY VERSION "${LIB_VERSION_STRING}" ) -set_property ( TARGET ${TARGET_NAME} PROPERTY SOVERSION "${BUILD_VERSION_MAJOR}" ) +set_property ( TARGET ${TARGET_NAME} PROPERTY SOVERSION "${LIB_VERSION_MAJOR}" ) # If the library is a release, strip the target library -if ( "${CMAKE_BUILD_TYPE}" STREQUAL release ) - add_custom_command ( TARGET ${ROCTRACER_TARGET} POST_BUILD COMMAND ${CMAKE_STRIP} *.so ) -endif () +#if ( "${CMAKE_BUILD_TYPE}" STREQUAL release ) +# add_custom_command ( TARGET ${ROCTRACER_TARGET} POST_BUILD COMMAND ${CMAKE_STRIP} *.so ) +#endif () ## Build tests add_subdirectory ( ${TEST_DIR} ${PROJECT_BINARY_DIR}/test ) -## Install and packaging -set ( CMAKE_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX}/${ROCTRACER_NAME} ) -message ( "---------Install-Dir: ${CMAKE_INSTALL_PREFIX}" ) +## pbulic headers +set ( PUBLIC_HEADERS + roctracer.h + roctracer_hip.h + roctracer_hcc.h + roctracer_ext.h + ext/prof_protocol.h + ext/hsa_rt_utils.hpp + roctx.h +) -add_custom_target ( inc-link ALL WORKING_DIRECTORY ${PROJECT_BINARY_DIR} - COMMAND ${CMAKE_COMMAND} -E create_symlink ../${ROCTRACER_NAME}/include inc-link ) +## Installation and packaging +set ( DEST_NAME ${ROCPROFILER_NAME} ) +if ( DEFINED CPACK_PACKAGING_INSTALL_PREFIX ) + get_filename_component ( DEST_NAME ${CPACK_PACKAGING_INSTALL_PREFIX} NAME ) + get_filename_component ( DEST_DIR ${CPACK_PACKAGING_INSTALL_PREFIX} DIRECTORY ) + set ( CPACK_PACKAGING_INSTALL_PREFIX ${DEST_DIR} ) +endif () +message ( "-----------Dest-name: ${DEST_NAME}" ) +message ( "------Install-prefix: ${CMAKE_INSTALL_PREFIX}" ) +message ( "-----------CPACK-dir: ${CPACK_PACKAGING_INSTALL_PREFIX}" ) + +#add_custom_target ( inc-link ALL WORKING_DIRECTORY ${PROJECT_BINARY_DIR} +# COMMAND ${CMAKE_COMMAND} -E create_symlink ../${DEST_NAME}/include inc-link ) add_custom_target ( so-link ALL WORKING_DIRECTORY ${PROJECT_BINARY_DIR} - COMMAND ${CMAKE_COMMAND} -E create_symlink ../${ROCTRACER_NAME}/lib/${ROCTRACER_LIBRARY}.so so-link ) + COMMAND ${CMAKE_COMMAND} -E create_symlink ../${DEST_NAME}/lib/${ROCTRACER_LIBRARY}.so so-link ) +add_custom_target ( so-major-link ALL WORKING_DIRECTORY ${PROJECT_BINARY_DIR} + COMMAND ${CMAKE_COMMAND} -E create_symlink ../${DEST_NAME}/lib/${ROCTRACER_LIBRARY}.so.${LIB_VERSION_MAJOR} so-major-link ) +add_custom_target ( so-patch-link ALL WORKING_DIRECTORY ${PROJECT_BINARY_DIR} + COMMAND ${CMAKE_COMMAND} -E create_symlink ../${DEST_NAME}/lib/${ROCTRACER_LIBRARY}.so.${LIB_VERSION_STRING} so-patch-link ) ## Install information -install ( TARGETS ${ROCTRACER_TARGET} LIBRARY DESTINATION lib ) -install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/roctracer.h DESTINATION include ) -install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/roctracer_hip.h DESTINATION include ) -install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/roctracer_hcc.h DESTINATION include ) -install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/roctracer_ext.h DESTINATION include ) -install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/ext/prof_protocol.h DESTINATION include/ext ) -install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/ext/hsa_rt_utils.hpp DESTINATION include/ext ) -install ( FILES ${PROJECT_BINARY_DIR}/inc-link DESTINATION ../include RENAME ${ROCTRACER_NAME} ) -install ( FILES ${PROJECT_BINARY_DIR}/so-link DESTINATION ../lib RENAME ${ROCTRACER_LIBRARY}.so ) -install ( FILES ${PROJECT_BINARY_DIR}/test/libtracer_tool.so DESTINATION tool ) +install ( TARGETS ${ROCTRACER_TARGET} LIBRARY DESTINATION ${DEST_NAME}/lib ) +foreach ( header ${PUBLIC_HEADERS} ) + install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/${header} DESTINATION ${DEST_NAME}/include ) + install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/${header} DESTINATION include/${DEST_NAME} ) +endforeach () +#install ( FILES ${PROJECT_BINARY_DIR}/inc-link DESTINATION include RENAME ${DEST_NAME} ) +install ( FILES ${PROJECT_BINARY_DIR}/so-link DESTINATION lib RENAME ${ROCTRACER_LIBRARY}.so ) +install ( FILES ${PROJECT_BINARY_DIR}/so-major-link DESTINATION lib RENAME ${ROCTRACER_LIBRARY}.so.${LIB_VERSION_MAJOR} ) +install ( FILES ${PROJECT_BINARY_DIR}/so-patch-link DESTINATION lib RENAME ${ROCTRACER_LIBRARY}.so.${LIB_VERSION_STRING} ) +install ( FILES ${PROJECT_BINARY_DIR}/test/libtracer_tool.so DESTINATION ${DEST_NAME}/tool ) ## rocTX set ( ROCTX_TARGET "roctx64" ) set ( ROCTX_LIBRARY "lib${ROCTX_TARGET}" ) -add_custom_target ( so-roctx-link ALL WORKING_DIRECTORY ${PROJECT_BINARY_DIR} - COMMAND ${CMAKE_COMMAND} -E create_symlink ../${ROCTRACER_NAME}/lib/${ROCTX_LIBRARY}.so so-roctx-link ) +## Set the VERSION and SOVERSION values +set_property ( TARGET ${ROCTX_TARGET} PROPERTY VERSION "${LIB_VERSION_STRING}" ) +set_property ( TARGET ${ROCTX_TARGET} PROPERTY SOVERSION "${LIB_VERSION_MAJOR}" ) -install ( TARGETS "roctx64" LIBRARY DESTINATION lib ) -install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/roctx.h DESTINATION include ) -install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/roctracer_roctx.h DESTINATION include ) -install ( FILES ${PROJECT_BINARY_DIR}/so-roctx-link DESTINATION ../lib RENAME ${ROCTX_LIBRARY}.so ) +add_custom_target ( so-roctx-link ALL WORKING_DIRECTORY ${PROJECT_BINARY_DIR} + COMMAND ${CMAKE_COMMAND} -E create_symlink ../${DEST_NAME}/lib/${ROCTX_LIBRARY}.so so-roctx-link ) +add_custom_target ( so-roctx-major-link ALL WORKING_DIRECTORY ${PROJECT_BINARY_DIR} + COMMAND ${CMAKE_COMMAND} -E create_symlink ../${DEST_NAME}/lib/${ROCTX_LIBRARY}.so.${LIB_VERSION_MAJOR} so-roctx-major-link ) +add_custom_target ( so-roctx-patch-link ALL WORKING_DIRECTORY ${PROJECT_BINARY_DIR} + COMMAND ${CMAKE_COMMAND} -E create_symlink ../${DEST_NAME}/lib/${ROCTX_LIBRARY}.so.${LIB_VERSION_STRING} so-roctx-patch-link ) + +install ( TARGETS "roctx64" LIBRARY DESTINATION ${DEST_NAME}/lib ) +install ( FILES ${PROJECT_BINARY_DIR}/so-roctx-link DESTINATION lib RENAME ${ROCTX_LIBRARY}.so ) +install ( FILES ${PROJECT_BINARY_DIR}/so-roctx-major-link DESTINATION lib RENAME ${ROCTX_LIBRARY}.so.${LIB_VERSION_MAJOR} ) +install ( FILES ${PROJECT_BINARY_DIR}/so-roctx-patch-link DESTINATION lib RENAME ${ROCTX_LIBRARY}.so.${LIB_VERSION_STRING} ) ## KFD wrapper -if ( DEFINED KFD_WRAPPER ) - install ( TARGETS "kfdwrapper64" LIBRARY DESTINATION lib ) -endif () +install ( TARGETS "kfdwrapper64" LIBRARY DESTINATION ${DEST_NAME}/lib ) ## Packaging directives set ( CPACK_GENERATOR "DEB" "RPM" "TGZ" ) @@ -129,7 +168,7 @@ set ( CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE" ) set ( CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/postinst;${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/prerm" ) ## RPM package specific variables -set ( CPACK_RPM_PRE_INSTALL_SCRIPT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/RPM/rpm_post" ) +set ( CPACK_RPM_POST_INSTALL_SCRIPT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/RPM/rpm_post" ) set ( CPACK_RPM_POST_UNINSTALL_SCRIPT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/RPM/rpm_postun" ) include ( CPack ) diff --git a/DEBIAN/postinst b/DEBIAN/postinst index b09a3c139b..f14a4ee14c 100644 --- a/DEBIAN/postinst +++ b/DEBIAN/postinst @@ -3,7 +3,10 @@ set -e do_ldconfig() { - echo /opt/rocm/roctracer/lib > /etc/ld.so.conf.d/libroctracer64.conf && ldconfig + INSTALL_PATH=/opt/rocm/roctracer + if [ -e "${INSTALL_PATH}" ] ; then + echo /opt/rocm/roctracer/lib > /etc/ld.so.conf.d/libroctracer64.conf && ldconfig + fi } case "$1" in diff --git a/LICENSE b/LICENSE index 9e78331e70..8384c9857d 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,5 @@ Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. +[MITx11 License] Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index f80fe90a2d..8f3e848113 100644 --- a/README.md +++ b/README.md @@ -1,52 +1,75 @@ # ROC-tracer ``` -ROC-tracer library, Runtimes Generic Callback/Activity APIs. +ROC-tracer library: Runtimes Generic Callback/Activity APIs. The goal of the implementation is to provide a generic independent from specific runtime profiler to trace API and asyncronous activity. The API provides functionality for registering the runtimes API callbacks and asyncronous activity records pool support. + +ROC-TX librray: code annotation evemts API +Includes basic API: roctxMark, roctxRangePush, roctxRangePop. +``` + +## Usage +``` +rocTracer API: + To use the rocTracer API you need the API header and to link your application with roctracer .so librray: + - the API header: /opt/rocm/roctracer/include/roctracer.h + - the .so library: /opt/rocm/lib/libroctracer64.so + +rocTX API: + To use the rocTX API you need the API header and to link your application with roctx .so librray: + - the API header: /opt/rocm/roctracer/include/roctx.h + - the .so library: /opt/rocm/lib/libroctx64.so ``` ## The library source tree ``` - - inc/roctracer.h - Library public API + - doc - documentation + - inc/roctracer.h - rocTacer library public API header + - inc/roctx.h - rocTX library puiblic API header - src - Library sources - - core - Library API sources - - util - Library utils sources + - core - rocTracer library API sources + - roctx - rocTX library API sources + - util - library utils sources - test - test suit - MatrixTranspose - test based on HIP MatrixTranspose sample ``` ## Documentation -``` - - API description: inc/roctracer.h - - Code example: test/MatrixTranspose_test/MatrixTranspose.cpp -``` + - API description: + - ['roctracer' / 'rocTX' profiling C API specification](doc/roctracer_spec.md) + - Code examples: + - [test/MatrixTranspose_test/MatrixTranspose.cpp](test/MatrixTranspose_test/MatrixTranspose.cpp) + - [test/MatrixTranspose/MatrixTranspose.cpp](test/MatrixTranspose/MatrixTranspose.cpp) ## To build and run test ``` - - ROCm-2.3 or higher is required - - cd + - ROCm is required + + - Python modules requirements: CppHeaderParser, argparse. + To install: + sudo pip install CppHeaderParser argparse - CLone development branch of roctracer: git clone -b amd-master https://github.com/ROCm-Developer-Tools/roctracer - Set environment: + export CMAKE_PREFIX_PATH=/opt/rocm + - To use custom HIP/HCC versions: export HIP_PATH=/opt/rocm/hip export HCC_HOME=/opt/rocm/hcc - export CMAKE_PREFIX_PATH=/opt/rocm - - Build ROCtracer + - To build roctracer library: export CMAKE_BUILD_TYPE= # release by default cd /roctracer && mkdir build && cd build && cmake -DCMAKE_INSTALL_PREFIX=/opt/rocm .. && make -j - - To build and run test + - To build and run test: make mytest run.sh - - To install + - To install: make install or make package && dpkg -i *.deb diff --git a/RPM/rpm_post b/RPM/rpm_post index a19ea861f1..1e5e279075 100644 --- a/RPM/rpm_post +++ b/RPM/rpm_post @@ -1 +1,4 @@ -echo /opt/rocm/roctracer/lib > /etc/ld.so.conf.d/libroctracer64.conf && ldconfig +INSTALL_PATH=/opt/rocm/roctracer +if [ -e "${INSTALL_PATH}" ] ; then + echo /opt/rocm/roctracer/lib > /etc/ld.so.conf.d/libroctracer64.conf && ldconfig +fi diff --git a/build.sh b/build.sh index b94e58da94..6d04e7b2a0 100755 --- a/build.sh +++ b/build.sh @@ -1,7 +1,8 @@ #!/bin/bash -x SRC_DIR=`dirname $0` COMPONENT="roctracer" -ROCM_PATH="/opt/rocm" +ROCM_PATH="${ROCM_PATH:=/opt/rocm}" +LD_RUNPATH_FLAG=" -Wl,--enable-new-dtags -Wl,--rpath,$ROCM_PATH/lib:$ROCM_PATH/lib64" fatal() { echo "$1" @@ -19,9 +20,9 @@ if [ -z "$PACKAGE_ROOT" ] ; then PACKAGE_ROOT=$ROCM_PATH; fi if [ -z "$PACKAGE_PREFIX" ] ; then PACKAGE_PREFIX="$ROCM_PATH/$COMPONENT"; fi if [ -z "$PREFIX_PATH" ] ; then PREFIX_PATH=$PACKAGE_ROOT; fi if [ -n "$HIP_VDI" ] ; then HIP_VDI_OPT="-DHIP_VDI=1"; fi +if [ -n "$ROCM_RPATH" ] ; then LD_RUNPATH_FLAG=" -Wl,--enable-new-dtags -Wl,--rpath,${ROCM_RPATH}"; fi ROCTRACER_ROOT=$(cd $ROCTRACER_ROOT && echo $PWD) -MAKE_OPTS="-j 8 -C $BUILD_DIR" mkdir -p $BUILD_DIR pushd $BUILD_DIR @@ -33,6 +34,7 @@ cmake \ -DCMAKE_INSTALL_PREFIX=$PACKAGE_ROOT \ -DCPACK_PACKAGING_INSTALL_PREFIX=$PACKAGE_PREFIX \ -DCPACK_GENERATOR="DEB;RPM" \ + -DCMAKE_SHARED_LINKER_FLAGS="$LD_RUNPATH_FLAG" \ $HIP_VDI_OPT \ $ROCTRACER_ROOT make diff --git a/cmake_modules/env.cmake b/cmake_modules/env.cmake index 9ad3fbf23b..5d3cde0064 100644 --- a/cmake_modules/env.cmake +++ b/cmake_modules/env.cmake @@ -50,11 +50,6 @@ else() set ( HIP_VDI 0 ) endif() -## Enable KFD wrapper -if ( DEFINED KFD_WRAPPER ) - add_definitions ( -DKFD_WRAPPER=${KFD_WRAPPER} ) -endif() - ## Enable HIP/HCC local build if ( DEFINED LOCAL_BUILD ) add_definitions ( -DLOCAL_BUILD=${LOCAL_BUILD} ) @@ -123,6 +118,7 @@ endif () find_library ( HSA_KMT_LIB "libhsakmt.so" ) get_filename_component ( HSA_KMT_LIB_PATH ${HSA_KMT_LIB} DIRECTORY ) set ( HSA_KMT_INC_PATH "${HSA_KMT_LIB_PATH}/../include" ) +set ( ROCM_INC_PATH ${HSA_KMT_INC_PATH} ) ## Basic Tool Chain Information message ( "----------------NBIT: ${NBIT}" ) @@ -136,6 +132,5 @@ message ( "-------------HCC-Inc: ${HCC_INC_DIR}" ) message ( "-------------HIP-Inc: ${HIP_INC_DIR}" ) message ( "-------------KFD-Inc: ${HSA_KMT_INC_PATH}" ) message ( "-------------HIP-VDI: ${HIP_VDI}" ) -message ( "---------KFD_WRAPPER: ${KFD_WRAPPER}" ) message ( "-----CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}" ) message ( "---CMAKE_PREFIX_PATH: ${CMAKE_PREFIX_PATH}" ) diff --git a/doc/roctracer_spec.md b/doc/roctracer_spec.md new file mode 100644 index 0000000000..28f8ffcc27 --- /dev/null +++ b/doc/roctracer_spec.md @@ -0,0 +1,748 @@ +# ROC Tracer / ROC-TX Libraries Specification +``` +ROC Tracer API version 2 +ROC-TX API version 1 + +- The rocTracer API is agnostic to specific runtime and may trace +the runtime API calls and asynchronous GPU activity. +- The rocTX API is provided for application code annotation. +``` +## 1. High level overview +``` +The goal of the implementation is to provide a runtime independent API +for tracing of runtime calls and asynchronous activity, like GPU kernel +dispatches and memory moves. The tracing includes callback API for +runtime API tracing and activity API for asynchronous activity records +logging. + +Depending on particular runtime intercepting mechanism, the rocTracer +library can be dynamically linked, dynamically loaded by the runtime as +a plugin or some API wrapper can be loaded using LD_PRELOAD. +The library has a C API. + +The rocTracer library is an API that intercepts runtime API calls and +traces asynchronous activity. The activity tracing results are recorded +in a ring buffer. + +The rocTX contains application code instrumentation API to support high +level correlation of runtime API/activity events. The API includes mark +and nested ranges. +``` +## 2. General API +### 2.1. Description +``` +The library supports method for getting the error number and error string +of the last failed library API call. It allows to check the conformance +of used library API header and the library binary, the version macros and +API methods can be used. + +Returning the error and error string methods: +• roctracer_status_t – error code enumeration +• roctracer_error_string – method for returning the error string + +Library version: +• ROCTRACER_VERSION_MAJOR – API major version macro +• ROCTRACER_VERSION_MINOR – API minor version macro +• roctracer_version_major – library major version +• roctracer_version_minor – library minor version +``` +### 2.2. Error codes and error string methods +``` +Error code enumeration: +typedef enum { + ROCTRACER_STATUS_SUCCESS = 0, + ROCTRACER_STATUS_ERROR = 1, + ROCTRACER_STATUS_UNINIT = 2, + ROCTRACER_STATUS_BREAK = 3, + ROCTRACER_STATUS_BAD_DOMAIN = 4, + ROCTRACER_STATUS_BAD_PARAMETER = 5, + ROCTRACER_STATUS_HIP_API_ERR = 6, + ROCTRACER_STATUS_HCC_OPS_ERR = 7, + ROCTRACER_STATUS_ROCTX_ERR = 8, +} roctracer_status_t; + +Return error string: +const char* roctracer_error_string(); +``` +### 2.3. Library version +``` +The library provides major and minor versions. Major version is for +incompatible API changes and minor version for bug fixes. + +API version macros defined in the library API header ‘roctracer.h’: +ROCTRACER_VERSION_MAJOR +ROCTRACER_VERSION_MINOR + +Methods to check library major and minor venison: +uint32_t roctracer_major_version(); +uint32_t roctracer_minor_version(); +``` +## 3. Frontend API +### 3.1. Description +``` +The rocTracer provides support for runtime API callbacks and activity +records logging. The APIs of different runtimes at different levels +are considered as different API domains with assigned domain IDs. For +example, language level and driver level. The API callbacks provide +the API calls arguments and are called on two phases on “enter” and +on “exit”. The activity records are logged to the ring buffer and can +be associated with the respective API calls using the correlation ID. +Activity API can be used to enable collecting of the records with +timestamping data for API calls and asynchronous activity like the +kernel submits, memory copies and barriers + +Tracing domains: +• roctracer_domain_t – runtime API domains, HIP, HSA, etc… +• roctracer_op_string – Return Op string by given domain and + activity Op code +• roctracer_op_code – Return Op code and kind by given string + +Callback API: +• roctracer_rtapi_callback_t – runtime API callback type +• roctracer_enable_op_callback – enable runtime API callback + by domain and Op code +• roctracer_enable_domain_callback – enable runtime API callback + by domain for all Ops +• roctracer_enable_callback – enable runtime API callback for + all domains, all Ops +• roctracer_disable_op_callback – disable runtime API callback + by domain and Op code +• roctracer_enable_op_callback – enable runtime API callback + by domain for all Ops +• roctracer_enable_op_callback – enable runtime API callback for + all domains, all Ops + +Activity API: +• roctracer_record_t – activity record +• roctracer_pool_t – records pool type +• roctracer_allocator_t – tracer allocator type +• roctracer_buffer_callback_t – pool callback type +• roctracer_open_pool[_expl] – create records pool +• roctracer_close_pool[_expl] – close records pool +• roctracer_default_pool[_expl] – get/set default pool +• roctracer_properties_t – tracer properties +• roctracer_enable_op_activity[_expl] – enable activity records logging +• roctracer_enable_domain_activity[_expl] – enable activity records logging +• roctracer_enable_activity[_expl] – enable activity records logging +• roctracer_disable_op_activity – disable activity records logging +• roctracer_disable_domain_activity – disable activity records logging +• roctracer_disable_activity – disable activity records logging +• roctracer_flush_activity[_expl] – disable activity records logging +• roctracer_next_record – return next record +• roctracer_get_timestamp – return correlated GPU/CPU system timestamp + +External correlation ID API: +• roctracer_activity_push_external_correlation_id - push an external + correlation id for the calling thread +• roctracer_activity_pop_external_correlation_id - pop an external + correlation id for the calling thread + +Tracing control API: +• roctracer_start – tracing start +• roctracer_stop – tracer stop + +``` +### 3.2. Tracing Domains +``` +Various tracing domains are supported. Each domain is assigned with +a domain ID. The domains include HSA, HIP, and HCC runtime levels. + +Traced API domains: +typedef enum { + ACTIVITY_DOMAIN_HSA_API = 0, // HSA API domain + ACTIVITY_DOMAIN_HSA_OPS = 1, // HSA async activity domain + ACTIVITY_DOMAIN_HIP_API = 2, // HIP API domain + ACTIVITY_DOMAIN_HIP_OPS = 3, // HIP async activity domain + ACTIVITY_DOMAIN_KFD_API = 4, // KFD API domain + ACTIVITY_DOMAIN_EXT_API = 5, // External ID domain + ACTIVITY_DOMAIN_ROCTX = 6, // ROCTX domain + ACTIVITY_DOMAIN_NUMBER = 7 +} activity_domain_t; + +Return name by given domain and Op code: +const char* roctracer_op_string( // NULL returned on error and error number is set + uint32_t domain, // tracing domain + uint32_t op, // activity op code + uint32_t kind); // activity kind +Return Op code and kind by given string: +roctracer_status_t roctracer_op_code( + uint32_t domain, // tracing domain + const char* str, // [in] op string + uint32_t* op, // [out] op code + uint32_t* kind); // [out] op kind code if not NULL +``` +### 3.3. Callback API +``` +The tracer provides support for runtime API callbacks and activity records +logging. The API callbacks provide the API calls arguments and are called +on two phases on “enter”, on “exit”. + +API phase passed to the callbacks: +typedef enum { + ROCTRACER_API_PHASE_ENTER, + ROCTRACER_API_PHASE_EXIT, +} roctracer_api_phase_t; + +Runtime API callback type: +typedef void (*roctracer_rtapi_callback_t)( + uint32_t domain, // runtime API domain + uint32_t cid, // API call ID + const void* data, // [in] callback data with correlation id and the call + // arguments + void* arg); // [in/out] user passed data + +Enable runtime API callbacks: +roctracer_status_t roctracer_enable_op_callback( + activity_domain_t domain, // tracing domain + uint32_t op, // API call ID + activity_rtapi_callback_t callback, // callback function pointer + void* arg); // [in/out] callback arg + +roctracer_status_t roctracer_enable_domain_callback( + activity_domain_t domain, // tracing domain + activity_rtapi_callback_t callback, // callback function pointer + void* arg); // [in/out] callback arg + + +roctracer_status_t roctracer_enable_callback( + activity_rtapi_callback_t callback, // callback function pointer + void* arg); // [in/out] callback arg + +Disable runtime API callbacks: +roctracer_status_t roctracer_disable_op_callback( + activity_domain_t domain, // tracing domain + uint32_t op); // API call ID + +roctracer_status_t roctracer_disable_domain_callback( + activity_domain_t domain); // tracing domain + +roctracer_status_t roctracer_disable_callback(); +``` +### 3.4 Activity API +``` +The activity records are asynchronously logged to the pool and can be +associated with the respective API callbacks using the correlation ID. +Activity API can be used to enable collecting the records with +timestamp data for API calls and GPU activity like kernel submits, +memory copies, and barriers. + +// Correlation id +typedef uint64_t activity_correlation_id_t; + +Activity record type: + +// Activity record type +struct activity_record_t { + uint32_t domain; // activity domain id + activity_kind_t kind; // activity kind + activity_op_t op; // activity op + activity_correlation_id_t correlation_id; // activity ID + uint64_t begin_ns; // host begin timestamp + uint64_t end_ns; // host end timestamp + union { + struct { + int device_id; // device id + uint64_t queue_id; // queue id + }; + struct { + uint32_t process_id; // device id + uint32_t thread_id; // thread id + }; + struct { + activity_correlation_id_t external_id; // external correlation id + }; + }; + size_t bytes; // data size bytes +}; + +Return next record: +static inline int roctracer_next_record( + const activity_record_t* record, // [in] record ptr + const activity_record_t** next); // [out] next record ptr + +Tracer allocator type: +typedef void (*roctracer_allocator_t)( + char** ptr, // memory pointer + size_t size, // memory size + void* arg); // allocator arg + +Pool callback type: +typedef void (*roctracer_buffer_callback_t)( + const char* begin, // [in] available buffered trace records + const char* end, // [in] end of buffered trace records + void* arg); // [in/out] callback arg + +Tracer properties: +typedef struct { + uint32_t mode; // roctracer mode + size_t buffer_size; // buffer size + // power of 2 + roctracer_allocator_t alloc_fun; // memory allocator + // function pointer + void* alloc_arg; // memory allocator + // function pointer + roctracer_buffer_callback_t buffer_callback_fun; // tracer record + // callback function + void* buffer_callback_arg; // tracer record + // callback arg +} roctracer_properties_t; + +Tracer memory pool handle type: +typedef void roctracer_pool_t; + +Create tracer memory pool: +roctracer_status_t roctracer_open_pool( + const roctracer_properties_t* properties); // tracer pool properties + +roctracer_status_t roctracer_open_pool_expl( + const roctracer_properties_t* properties, // tracer pool properties + roctracer_pool_t** pool); // [out] returns tracer pool if + // not NULL, otherwise sets the + // default one if it is not set + // yet; otherwise the error is + // generated + +Close tracer memory pool: +roctracer_status_t roctracer_close_pool(); + +roctracer_status_t roctracer_close_pool_expl( + roctracer_pool_t* pool); // memory pool, NULL means default pool + +Return current default pool. Set new default pool if the argument is not NULL: +roctracer_pool_t* roctracer_default_pool(); + +roctracer_pool_t* roctracer_default_pool_expl( + roctracer_pool_t* pool); // new default pool if not NULL +``` +Enable activity records logging: +``` +roctracer_status_t roctracer_enable_op_activity( + activity_domain_t domain, // tracing domain + uint32_t op); // activity op ID + +roctracer_status_t roctracer_enable_op_activity_expl( + activity_domain_t domain, // tracing domain + uint32_t op, // activity op ID + roctracer_pool_t* pool); // memory pool, NULL means default pool + +roctracer_status_t roctracer_enable_domain_activity( + activity_domain_t domain); // tracing domain + +roctracer_status_t roctracer_enable_domain_activity_expl( + activity_domain_t domain, // tracing domain + roctracer_pool_t* pool); // memory pool, NULL means default pool + +roctracer_status_t roctracer_enable_activity(); + +roctracer_status_t roctracer_enable_activity_expl( + roctracer_pool_t* pool); // memory pool, NULL means default pool + +Disable activity records logging: +roctracer_status_t roctracer_disable_op_activity( + activity_domain_t domain, // tracing domain + uint32_t op); // activity op ID + +roctracer_status_t roctracer_disable_domain_activity( + activity_domain_t domain); // tracing domain + +roctracer_status_t roctracer_disable_activity(); + +Flush available activity records: +roctracer_status_t roctracer_flush_activity(); + +roctracer_status_t roctracer_flush_activity_expl( + roctracer_pool_t* pool); // memory pool, NULL means default pool + +Return correlated GPU/CPU system timestamp: +roctracer_status_t roctracer_get_timestamp( + uint64_t* timestamp); // [out] return timestamp +``` +External correlation ID API +``` +The API provides activity records to associate rocTracer correlation IDs with +IDs provided by external APIs. The external ID records are identified by +ACTIVITY_DOMAIN_EXT_API domain value. +Using the ‘push’ method an external ID is pushed to a per CPU thread stack and +the ‘pop’ method can be used to remove the last pushed ID. +An external ID record is inserted before any generated rocTracer activity record +if the same CPU external ID stack is non-empty. + +Notifies that the calling thread is entering an external API region. +Push an external correlation id for the calling thread. +roctracer_status_t roctracer_activity_push_external_correlation_id( + activity_correlation_id_t id); // external correlation id + +Notifies that the calling thread is leaving an external API region. +Pop an external correlation id for the calling thread. +roctracer_status_t roctracer_activity_pop_external_correlation_id( + activity_correlation_id_t* last_id); // returns the last external correlation id + // if not NULL +``` +Tracing control API +``` +Tracing start: +void roctracer_start(); + +Tracing stop: +void roctracer_stop(); +``` +## 4. rocTracer Usage Code Examples +### 4.1. HIP API and HCC ops, GPU Activity Tracing +``` +#include +#include + +// HIP API callback function +void hip_api_callback( + uint32_t domain, + uint32_t cid, + const void* callback_data, + void* arg) +{ + (void)arg; + const hip_api_data_t* data = reinterpret_cast + (callback_data); + fprintf(stdout, "<%s id(%u)\tcorrelation_id(%lu) %s> ", + roctracer_id_string(ACTIVITY_DOMAIN_HIP_API, cid), + cid, + data->correlation_id, + (data->phase == ACTIVITY_API_PHASE_ENTER) ? "on-enter" : "on-exit"); + +} + +// Activity tracing callback +void activity_callback(const char* begin, const char* end, void* arg) { + const roctracer_record_t* record = reinterpret_cast(begin); + const roctracer_record_t* end_record = reinterpret_cast(end); + fprintf(stdout, "\tActivity records:\n"); + while (record < end_record) { + const char * name = roctracer_op_string(record->domain, + record->activity_id, 0); + fprintf(stdout, "\t%s\tcorrelation_id(%lu) time_ns(%lu:%lu) + device_id(%d) stream_id(%lu)\n", + name, + record->correlation_id, + record->begin_ns, + record->end_ns, + record->device_id, + record->stream_id + ); + + ROCTRACER_CALL(roctracer_next_record(record, &record)); + } +} + +int main() { + // Allocating tracing pool + roctracer_properties_t properties{}; + properties.buffer_size = 12; + properties.buffer_callback_fun = activity_callback; + ROCTRACER_CALL(roctracer_open_pool(&properties)); + + // Enable HIP API callbacks. HIP_API_ID_ANY can be used to trace all HIP + // API calls. + ROCTRACER_CALL(roctracer_enable_op_callback(ACTIVITY_DOMAIN_HIP_API, + HIP_API_ID_hipModuleLaunchKernel, + hip_api_callback, NULL)); + ROCTRACER_CALL(roctracer_enable_op_acticity(ACTIVITY_DOMAIN_HIP_API, + HIP_API_ID_hipModuleLaunchKernel)); + // Enable HIP kernel dispatch activity tracing + ROCTRACER_CALL(roctracer_enable_op_activity(ACTIVITY_DOMAIN_HCC_OPS, + hc::HSA_OP_ID_DISPATCH)); + + + + // Disable tracing and closing the pool + ROCTRACER_CALL(roctracer_disable_callback()); + ROCTRACER_CALL(roctracer_disable_activity()); + ROCTRACER_CALL(roctracer_close_pool()); +} +``` +### 4.2. MatrixTranspose HIP sample with all APIs/activity tracing enabled +``` +This shows a MatrixTranspose HIP sample with enabled tracing of +all HIP API and all GPU asynchronous activity. + +/* +Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include + +// hip header file +#include + +#ifndef ITERATIONS +# define ITERATIONS 100 +#endif +#define WIDTH 1024 + + +#define NUM (WIDTH * WIDTH) + +#define THREADS_PER_BLOCK_X 4 +#define THREADS_PER_BLOCK_Y 4 +#define THREADS_PER_BLOCK_Z 1 + +// Device (Kernel) function, it must be void +// hipLaunchParm provides the execution configuration +__global__ void matrixTranspose(hipLaunchParm lp, float* out, float* in, + const int width) { + int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; + int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; + + out[y * width + x] = in[x * width + y]; +} + +// CPU implementation of matrix transpose +void matrixTransposeCPUReference(float* output, float* input, const unsigned + int width) { + for (unsigned int j = 0; j < width; j++) { + for (unsigned int i = 0; i < width; i++) { + output[i * width + j] = input[j * width + i]; + } + } +} + +int iterations = ITERATIONS; +void start_tracing(); +void stop_tracing(); + +int main() { + float* Matrix; + float* TransposeMatrix; + float* cpuTransposeMatrix; + + float* gpuMatrix; + float* gpuTransposeMatrix; + + hipDeviceProp_t devProp; + hipGetDeviceProperties(&devProp, 0); + + std::cout << "Device name " << devProp.name << std::endl; + + int i; + int errors; + + while (iterations-- > 0) { + start_tracing(); + + Matrix = (float*)malloc(NUM * sizeof(float)); + TransposeMatrix = (float*)malloc(NUM * sizeof(float)); + cpuTransposeMatrix = (float*)malloc(NUM * sizeof(float)); + + // initialize the input data + for (i = 0; i < NUM; i++) { + Matrix[i] = (float)i * 10.0f; + } + + // allocate the memory on the device side + hipMalloc((void**)&gpuMatrix, NUM * sizeof(float)); + hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)); + + // Memory transfer from host to device + hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), + hipMemcpyHostToDevice); + + // Lauching kernel from host + hipLaunchKernel(matrixTranspose, + dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / + THREADS_PER_BLOCK_Y), + dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, + gpuTransposeMatrix, gpuMatrix, WIDTH); + + // Memory transfer from device to host + hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), + hipMemcpyDeviceToHost); + + // CPU MatrixTranspose computation + matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH); + + // verify the results + errors = 0; + double eps = 1.0E-6; + for (i = 0; i < NUM; i++) { + if (std::abs(TransposeMatrix[i] - cpuTransposeMatrix[i]) > eps) { + errors++; + } + } + if (errors != 0) { + printf("FAILED: %d errors\n", errors); + } else { + printf("PASSED!\n"); + } + + // free the resources on device side + hipFree(gpuMatrix); + hipFree(gpuTransposeMatrix); + + // free the resources on host side + free(Matrix); + free(TransposeMatrix); + free(cpuTransposeMatrix); + + stop_tracing(); + } + + return errors; +} + +///////////////////////////////////////////////////////////////////////////// +// HIP/HCC Callbacks/Activity tracing +///////////////////////////////////////////////////////////////////////////// +#include +#include + +// Macro to check ROC-tracer calls status +#define ROCTRACER_CALL(call) \ + do { \ + int err = call; \ + if (err != 0) { \ + std::cerr << roctracer_error_string() << std::endl << std::flush; \ + abort(); \ + } \ + } while (0) + +// HIP API callback function +void hip_api_callback( + uint32_t domain, + uint32_t cid, + const void* callback_data, + void* arg) +{ + (void)arg; + const hip_api_data_t* data = reinterpret_cast + (callback_data); + fprintf(stdout, "<%s id(%u)\tcorrelation_id(%lu) %s> ", + roctracer_op_string(ACTIVITY_DOMAIN_HIP_API, cid, 0), + cid, + data->correlation_id, + (data->phase == ACTIVITY_API_PHASE_ENTER) ? "on-enter" : "on-exit"); + if (data->phase == ACTIVITY_API_PHASE_ENTER) { + switch (cid) { + case HIP_API_ID_hipMemcpy: + fprintf(stdout, "dst(%p) src(%p) size(0x%x) kind(%u)", + data->args.hipMemcpy.dst, + data->args.hipMemcpy.src, + (uint32_t)(data->args.hipMemcpy.sizeBytes), + (uint32_t)(data->args.hipMemcpy.kind)); + break; + case HIP_API_ID_hipMalloc: + fprintf(stdout, "ptr(%p) size(0x%x)", + data->args.hipMalloc.ptr, + (uint32_t)(data->args.hipMalloc.size)); + break; + case HIP_API_ID_hipFree: + fprintf(stdout, "ptr(%p), + data->args.hipFree.ptr); + break; + case HIP_API_ID_hipModuleLaunchKernel: + fprintf(stdout, "kernel(\"%s\") stream(%p)", + hipKernelNameRef(data->args.hipModuleLaunchKernel.f), + data->args.hipModuleLaunchKernel.stream); + break; + default: + break; + } + } else { + switch (cid) { + case HIP_API_ID_hipMalloc: + fprintf(stdout, "*ptr(0x%p)", + *(data->args.hipMalloc.ptr)); + break; + default: + break; + } + } + fprintf(stdout, "\n"); fflush(stdout); +} + +// Activity tracing callback +// hipMalloc id(3) correlation_id(1): +// begin_ns(1525888652762640464) end_ns(1525888652762877067) +void activity_callback(const char* begin, const char* end, void* arg) { + const roctracer_record_t* record = reinterpret_cast + (begin); + const roctracer_record_t* end_record = reinterpret_cast + (end); + fprintf(stdout, "\tActivity records:\n"); fflush(stdout); + while (record < end_record) { + const char * name = roctracer_op_string(record->domain, + record->activity_id, 0); + fprintf(stdout, "\t%s\tcorrelation_id(%lu) time_ns(%lu:%lu) \ + device_id(%d) stream_id(%lu)", + name, + record->correlation_id, + record->begin_ns, + record->end_ns, + record->device_id, + record->stream_id + ); + if (record->kind == hc::HSA_OP_ID_COPY) + fprintf(stdout, " bytes(0x%zx)", record->bytes); + fprintf(stdout, "\n"); + fflush(stdout); + ROCTRACER_CALL(roctracer_next_record(record, &record)); + } +} + +// Start tracing routine +void start_tracing() { + std::cout << "# START #############################" << std::endl + << std::flush; + // Allocating tracing pool + roctracer_properties_t properties{}; + properties.buffer_size = 0x1000; + properties.buffer_callback_fun = activity_callback; + ROCTRACER_CALL(roctracer_open_pool(&properties)); + // Enable API callbacks, all domains + ROCTRACER_CALL(roctracer_enable_callback(hip_api_callback, NULL)); + // Enable activity tracing, all domains + ROCTRACER_CALL(roctracer_enable_activity()); +} + +// Stop tracing routine +void stop_tracing() { + ROCTRACER_CALL(roctracer_disable_api_callback()); + ROCTRACER_CALL(roctracer_disable_api_activity()); + ROCTRACER_CALL(roctracer_close_pool()); + std::cout << "# STOP #############################" << std::endl + << std::flush; +} +///////////////////////////////////////////////////////////////////////////// +``` +## 5. rocTX application code annotation API +``` +Basic annotation API: markers and nested ranges. +// A marker created by given ASCII massage +void roctxMark(const char* message); + +// Returns the 0 based level of a nested range being started by given message associated to this range. +// A negative value is returned on the error. +int roctxRangePush(const char* message); + +// Marks the end of a nested range. +// Returns the 0 based level the range. +// A negative value is returned on the error. +int roctxRangePop(); +``` diff --git a/inc/ext/prof_protocol.h b/inc/ext/prof_protocol.h index 6d9cd62714..c29ff0e65a 100644 --- a/inc/ext/prof_protocol.h +++ b/inc/ext/prof_protocol.h @@ -29,9 +29,10 @@ THE SOFTWARE. typedef enum { ACTIVITY_DOMAIN_HSA_API = 0, // HSA API domain ACTIVITY_DOMAIN_HSA_OPS = 1, // HSA async activity domain - ACTIVITY_DOMAIN_HCC_OPS = 2, // HCC async activity domain + ACTIVITY_DOMAIN_HIP_OPS = 2, // HIP async activity domain + ACTIVITY_DOMAIN_HCC_OPS = ACTIVITY_DOMAIN_HIP_OPS, // HCC async activity domain + ACTIVITY_DOMAIN_HIP_VDI = ACTIVITY_DOMAIN_HIP_OPS, // HIP VDI async activity domain ACTIVITY_DOMAIN_HIP_API = 3, // HIP API domain - ACTIVITY_DOMAIN_HIP_VDI = ACTIVITY_DOMAIN_HCC_OPS, // HIP VDI domain ACTIVITY_DOMAIN_KFD_API = 4, // KFD API domain ACTIVITY_DOMAIN_EXT_API = 5, // External ID domain ACTIVITY_DOMAIN_ROCTX = 6, // ROCTX domain @@ -60,13 +61,22 @@ typedef enum { typedef uint64_t activity_correlation_id_t; // Activity record type -struct activity_record_t { +typedef struct activity_record_s { uint32_t domain; // activity domain id activity_kind_t kind; // activity kind activity_op_t op; // activity op - activity_correlation_id_t correlation_id; // activity ID - uint64_t begin_ns; // host begin timestamp - uint64_t end_ns; // host end timestamp + union { + struct { + activity_correlation_id_t correlation_id; // activity ID + uint64_t begin_ns; // host begin timestamp + uint64_t end_ns; // host end timestamp + }; + struct { + uint32_t se; // sampled SE + uint64_t cycle; // sample cycle + uint64_t pc; // sample PC + } pc_sample; + }; union { struct { int device_id; // device id @@ -81,7 +91,7 @@ struct activity_record_t { }; }; size_t bytes; // data size bytes -}; +} activity_record_t; // Activity sync calback type typedef void* (*activity_sync_callback_t)(uint32_t cid, activity_record_t* record, const void* data, void* arg); diff --git a/inc/roctracer.h b/inc/roctracer.h index 5f469616d2..8b0f2114f9 100644 --- a/inc/roctracer.h +++ b/inc/roctracer.h @@ -38,10 +38,13 @@ THE SOFTWARE. #include #include +#ifndef __cplusplus +#include +#endif #include "ext/prof_protocol.h" -#define ROCTRACER_VERSION_MAJOR 1 +#define ROCTRACER_VERSION_MAJOR 2 #define ROCTRACER_VERSION_MINOR 0 #ifdef __cplusplus @@ -64,6 +67,7 @@ typedef enum { ROCTRACER_STATUS_BAD_PARAMETER = 5, ROCTRACER_STATUS_HIP_API_ERR = 6, ROCTRACER_STATUS_HCC_OPS_ERR = 7, + ROCTRACER_STATUS_HSA_ERR = 7, ROCTRACER_STATUS_ROCTX_ERR = 8, } roctracer_status_t; @@ -89,7 +93,7 @@ roctracer_status_t roctracer_op_code( uint32_t domain, // tracing domain const char* str, // [in] op string uint32_t* op, // [out] op code - uint32_t* kind = NULL); // [out] op kind code + uint32_t* kind); // [out] op kind code if not NULL //////////////////////////////////////////////////////////////////////////////// // Callback API @@ -137,7 +141,7 @@ roctracer_status_t roctracer_disable_callback(); typedef activity_record_t roctracer_record_t; // Return next record -static inline int roctracer_next_record( +static inline roctracer_status_t roctracer_next_record( const activity_record_t* record, // [in] record ptr const activity_record_t** next) // [out] next record ptr { @@ -172,31 +176,59 @@ typedef void roctracer_pool_t; // Create tracer memory pool // The first invocation sets the default pool -roctracer_status_t roctracer_open_pool( +roctracer_status_t roctracer_open_pool_expl( const roctracer_properties_t* properties, // tracer pool properties - roctracer_pool_t** pool = NULL); // [out] returns tracer pool if not NULL, + roctracer_pool_t** pool); // [out] returns tracer pool if not NULL, // otherwise sets the default one if it is not set yet +static inline roctracer_status_t roctracer_open_pool( + const roctracer_properties_t* properties) // tracer pool properties +{ + return roctracer_open_pool_expl(properties, NULL); +} // otherwise the error is generated // Close tracer memory pool -roctracer_status_t roctracer_close_pool( - roctracer_pool_t* pool = NULL); // [in] memory pool, NULL is a default one +roctracer_status_t roctracer_close_pool_expl( + roctracer_pool_t* pool); // [in] memory pool, NULL is a default one +static inline roctracer_status_t roctracer_close_pool() +{ + return roctracer_close_pool_expl(NULL); +} // Return current default pool // Set new default pool if the argument is not NULL -roctracer_pool_t* roctracer_default_pool( - roctracer_pool_t* pool = NULL); // [in] new default pool if not NULL +roctracer_pool_t* roctracer_default_pool_expl( + roctracer_pool_t* pool); // [in] new default pool if not NULL +static inline roctracer_pool_t* roctracer_default_pool() +{ + return roctracer_default_pool_expl(NULL); +} // Enable activity records logging -roctracer_status_t roctracer_enable_op_activity( +roctracer_status_t roctracer_enable_op_activity_expl( activity_domain_t domain, // tracing domain uint32_t op, // activity op ID - roctracer_pool_t* pool = NULL); // memory pool, NULL is a default one -roctracer_status_t roctracer_enable_domain_activity( + roctracer_pool_t* pool); // memory pool, NULL is a default one +static inline roctracer_status_t roctracer_enable_op_activity( activity_domain_t domain, // tracing domain - roctracer_pool_t* pool = NULL); // memory pool, NULL is a default one -roctracer_status_t roctracer_enable_activity( - roctracer_pool_t* pool = NULL); // memory pool, NULL is a default one + uint32_t op) // activity op ID +{ + return roctracer_enable_op_activity_expl(domain, op, NULL); +} +roctracer_status_t roctracer_enable_domain_activity_expl( + activity_domain_t domain, // tracing domain + roctracer_pool_t* pool); // memory pool, NULL is a default one +static inline roctracer_status_t roctracer_enable_domain_activity( + activity_domain_t domain) // tracing domain +{ + return roctracer_enable_domain_activity_expl(domain, NULL); +} +roctracer_status_t roctracer_enable_activity_expl( + roctracer_pool_t* pool); // memory pool, NULL is a default one +static inline roctracer_status_t roctracer_enable_activity() +{ + return roctracer_enable_activity_expl(NULL); +} // Disable activity records logging roctracer_status_t roctracer_disable_op_activity( @@ -207,24 +239,26 @@ roctracer_status_t roctracer_disable_domain_activity( roctracer_status_t roctracer_disable_activity(); // Flush available activity records -roctracer_status_t roctracer_flush_activity( - roctracer_pool_t* pool = NULL); // memory pool, NULL is a default one +roctracer_status_t roctracer_flush_activity_expl( + roctracer_pool_t* pool); // memory pool, NULL is a default one +static inline roctracer_status_t roctracer_flush_activity() +{ + return roctracer_flush_activity_expl(NULL); +} + +// Get system timestamp +roctracer_status_t roctracer_get_timestamp( + uint64_t* timestamp); // [out] return timestamp // Load/Unload methods +bool roctracer_load(); +void roctracer_unload(); + // Set properties roctracer_status_t roctracer_set_properties( roctracer_domain_t domain, // tracing domain void* propertes); // tracing properties -struct HsaApiTable; -bool roctracer_load( - HsaApiTable* table, - uint64_t runtime_version, - uint64_t failed_tool_count, - const char* const* failed_tool_names); - -void roctracer_unload(bool destruct); - #ifdef __cplusplus } // extern "C" block #endif // __cplusplus diff --git a/inc/roctracer_ext.h b/inc/roctracer_ext.h index c2f5c54542..172966af6d 100644 --- a/inc/roctracer_ext.h +++ b/inc/roctracer_ext.h @@ -46,7 +46,7 @@ extern "C" { #endif // __cplusplus //////////////////////////////////////////////////////////////////////////////// -// Application annotatin API +// Application annotation API // Tracing start API void roctracer_start(); @@ -63,8 +63,8 @@ roctracer_status_t roctracer_activity_push_external_correlation_id(activity_corr // Notifies that the calling thread is leaving an external API region. // Pop an external correlation id for the calling thread. -// 'lastId' returns the last external correlation -roctracer_status_t roctracer_activity_pop_external_correlation_id(activity_correlation_id_t* last_id = NULL); +// 'lastId' returns the last external correlation if not NULL +roctracer_status_t roctracer_activity_pop_external_correlation_id(activity_correlation_id_t* last_id); #ifdef __cplusplus } // extern "C" block diff --git a/inc/roctracer_hcc.h b/inc/roctracer_hcc.h index 252b984d2a..0781460145 100644 --- a/inc/roctracer_hcc.h +++ b/inc/roctracer_hcc.h @@ -23,26 +23,22 @@ THE SOFTWARE. #ifndef INC_ROCTRACER_HCC_H_ #define INC_ROCTRACER_HCC_H_ -#if HIP_VDI -#define HIP_OP_ID_NUMBER 3 -#define HIP_OP_ID_COPY 1 +enum { + HIP_OP_ID_DISPATCH = 0, + HIP_OP_ID_COPY = 1, + HIP_OP_ID_BARRIER = 2, + HIP_OP_ID_NUMBER = 3 +}; + +#ifdef __cplusplus extern "C" { +#endif typedef void (hipInitAsyncActivityCallback_t)(void* id_callback, void* op_callback, void* arg); typedef bool (hipEnableAsyncActivityCallback_t)(unsigned op, bool enable); typedef const char* (hipGetOpName_t)(unsigned op); +#ifdef __cplusplus } -#else // !HIP_VDI -#if LOCAL_BUILD -#include -#else -#include #endif -#define HIP_OP_ID_NUMBER hc::HSA_OP_ID_NUMBER -#define HIP_OP_ID_COPY hc::HSA_OP_ID_COPY -typedef decltype(Kalmar::CLAMP::InitActivityCallback) hipInitAsyncActivityCallback_t; -typedef decltype(Kalmar::CLAMP::EnableActivityCallback) hipEnableAsyncActivityCallback_t; -typedef decltype(Kalmar::CLAMP::GetCmdName) hipGetOpName_t; -#endif // !HIP_VDI #include "roctracer.h" diff --git a/inc/roctracer_hip.h b/inc/roctracer_hip.h index d365dd9444..28e4868d59 100644 --- a/inc/roctracer_hip.h +++ b/inc/roctracer_hip.h @@ -33,7 +33,7 @@ extern "C" { #endif // __cplusplus // Traced calls ID enumeration -typedef hip_api_id_t roctracer_hip_api_cid_t; +typedef enum hip_api_id_t roctracer_hip_api_cid_t; #ifdef __cplusplus } // extern "C" block diff --git a/inc/roctracer_hsa.h b/inc/roctracer_hsa.h index c01253e79f..625fa0b760 100644 --- a/inc/roctracer_hsa.h +++ b/inc/roctracer_hsa.h @@ -22,16 +22,25 @@ THE SOFTWARE. #ifndef INC_ROCTRACER_HSA_H_ #define INC_ROCTRACER_HSA_H_ -#include -#include #include -#include #include -#include "cb_table.h" #include "roctracer.h" +// HSA OP ID enumeration +enum hsa_op_id_t { + HSA_OP_ID_DISPATCH = 0, + HSA_OP_ID_COPY = 1, + HSA_OP_ID_BARRIER = 2, + HSA_OP_ID_PCSAMPLE = 3, + HSA_OP_ID_NUMBER = 4 +}; + +#ifdef __cplusplus +#include +#include + namespace roctracer { namespace hsa_support { enum { @@ -129,6 +138,10 @@ struct output_streamer { inline static std::ostream& put(std::ostream& out, hsa_queue_t** v) { out << ""; return out; } }; };}; +#else // !__cplusplus +typedef void* hsa_amd_queue_intercept_handler; +typedef void* hsa_amd_runtime_queue_notifier; +#endif //! __cplusplus #include "inc/hsa_prof_str.h" #endif // INC_ROCTRACER_HSA_H_ diff --git a/inc/roctracer_kfd.h b/inc/roctracer_kfd.h index 45113ce435..fcc1e3cd87 100644 --- a/inc/roctracer_kfd.h +++ b/inc/roctracer_kfd.h @@ -23,11 +23,11 @@ THE SOFTWARE. ///////////////////////////////////////////////////////////////////////////// #ifndef INC_ROCTRACER_KFD_H_ #define INC_ROCTRACER_KFD_H_ -#include - #include "roctracer.h" #include "hsakmt.h" +#ifdef __cplusplus #include "inc/kfd_ostream_ops.h" +#endif #include "inc/kfd_prof_str.h" #endif // INC_ROCTRACER_KFD_H_ diff --git a/inc/roctracer_roctx.h b/inc/roctracer_roctx.h index 329e974d61..accec45255 100644 --- a/inc/roctracer_roctx.h +++ b/inc/roctracer_roctx.h @@ -33,8 +33,6 @@ THE SOFTWARE. #ifndef INC_ROCTRACER_ROCTX_H_ #define INC_ROCTRACER_ROCTX_H_ -#include "cb_table.h" - // ROC-TX API ID enumeration enum roctx_api_id_t { ROCTX_API_ID_roctxMarkA = 0, @@ -45,7 +43,7 @@ enum roctx_api_id_t { }; // ROCTX callbacks data type -struct roctx_api_data_t { +typedef struct roctx_api_data_s { union { const char* message; struct { @@ -58,14 +56,15 @@ struct roctx_api_data_t { const char* message; } roctxRangePop; } args; -}; +} roctx_api_data_t; +#ifdef __cplusplus +#include "cb_table.h" namespace roctx { - // ROCTX callbacks table type typedef roctracer::CbTable cb_table_t; - } // namespace roctx +#endif #ifdef __cplusplus extern "C" { diff --git a/script/gen_ostream_ops.py b/script/gen_ostream_ops.py old mode 100644 new mode 100755 index f98a83b35b..38f7e757cd --- a/script/gen_ostream_ops.py +++ b/script/gen_ostream_ops.py @@ -76,7 +76,7 @@ HEADER = \ '\n' structs_done = {} -def process_struct(f,c,cppHeader,nname): +def process_struct(f,c,cppHeader,nname,apiname): if c not in cppHeader.classes: return @@ -104,9 +104,9 @@ def process_struct(f,c,cppHeader,nname): if mtype != "" and "union" not in mtype: if array_size == "": - str = " roctracer::kfd_support::output_streamer<"+mtype+">::put(out,v."+name+");\n" + str = " roctracer::" + apiname.lower() + "_support::output_streamer<"+mtype+">::put(out,v."+name+");\n" else: - str = " roctracer::kfd_support::output_streamer<"+mtype+"["+array_size+"]>::put(out,v."+name+");\n" + str = " roctracer::" + apiname.lower() + "_support::output_streamer<"+mtype+"["+array_size+"]>::put(out,v."+name+");\n" if nname != "" and nname not in str: #print("injecting ",nname, "in ", str) @@ -115,70 +115,79 @@ def process_struct(f,c,cppHeader,nname): f.write(str) else: nc = prop+"::" - process_struct(f,nc,cppHeader,name) + process_struct(f,nc,cppHeader,name,apiname) nc = prop+"::"+mtype+" " - process_struct(f,nc,cppHeader,name) + process_struct(f,nc,cppHeader,name,apiname) nc = c+"::" - process_struct(f,nc,cppHeader,name) + process_struct(f,nc,cppHeader,name,apiname) - -def gen_cppheader(infilepath,outfilepath): +def gen_cppheader(infilepath, outfilepath): try: cppHeader = CppHeaderParser.CppHeader(infilepath) except CppHeaderParser.CppParseError as e: print(e) sys.exit(1) - + mpath = os.path.dirname(outfilepath) + if mpath == "": + mpath = os.getcwd() + apiname = outfilepath.replace(mpath+"/","") + apiname = apiname.replace("_ostream_ops.h","") + apiname = apiname.upper() f = open(outfilepath,"w+") + f2 = open(mpath + "/basic_ostream_ops.h","w+") f.write("// automatically generated\n") - f.write(LICENSE) - f.write("\n") + f2.write("// automatically generated\n") + f.write(LICENSE + '\n') + f2.write(LICENSE + '\n') HEADER_S = \ - '#ifndef INC_KFD_OSTREAM_OPS_H_\n' + \ - '#define INC_KFD_OSTREAM_OPS_H_\n' + \ + '#ifndef INC_' + apiname + '_OSTREAM_OPS_H_\n' + \ + '#define INC_' + apiname + '_OSTREAM_OPS_H_\n' + \ '#include \n' + \ '\n' + \ - '#include "roctracer.h"\n' + \ - '#include "hsakmt.h"\n' + '#include "roctracer.h"\n' f.write(HEADER_S) f.write('\n') f.write('namespace roctracer {\n') - f.write('namespace kfd_support {\n') - f.write('// begin ostream ops for KFD \n') - f.write(HEADER) + f.write('namespace ' + apiname.lower() + '_support {\n') + f.write('// begin ostream ops for '+ apiname + ' \n') + f.write('#include "basic_ostream_ops.h"' + '\n') + f2.write(HEADER) for c in cppHeader.classes: if "union" in c: continue - f.write("\ntemplate<>\n") - f.write("struct output_streamer<"+c+"&> {\n") - f.write(" inline static std::ostream& put(std::ostream& out, "+c+"& v)\n") - f.write("{\n") - process_struct(f,c,cppHeader,"") - f.write(" return out;\n") - f.write("}\n") - f.write("};\n") + if len(cppHeader.classes[c]["properties"]["public"])!=0: + f.write("\ntemplate<>\n") + f.write("struct output_streamer<"+c+"&> {\n") + f.write(" inline static std::ostream& put(std::ostream& out, "+c+"& v)\n") + f.write("{\n") + process_struct(f,c,cppHeader,"",apiname) + f.write(" return out;\n") + f.write("}\n") + f.write("};\n") FOOTER = \ - '// end ostream ops for KFD \n' + '// end ostream ops for '+ apiname + ' \n' FOOTER += '};};\n' + \ '\n' + \ - '#endif // INC_KFD_OSTREAM_OPS_H_\n' + \ + '#endif // INC_' + apiname + '_OSTREAM_OPS_H_\n' + \ ' \n' FOOTER2 = '\n\n' + \ '#endif // INC_BASIC_OSTREAM_OPS_H_\n' + \ ' \n' f.write(FOOTER) - f.close() + f2.close() + print('File ' + outfilepath + ' generated') + print('File ' + mpath + '/basic_ostream_ops.h generated') + return parser = argparse.ArgumentParser(description='genOstreamOps.py: generates ostream operators for all typedefs in provided input file.') requiredNamed = parser.add_argument_group('Required arguments') -requiredNamed.add_argument('-in','--in', help='Header file to be parsed', required=True) -requiredNamed.add_argument('-out','--out', help='Output file with ostream operators', required=True) +requiredNamed.add_argument('-in', metavar='file', help='Header file to be parsed', required=True) +requiredNamed.add_argument('-out', metavar='file', help='Output file with ostream operators', required=True) args = vars(parser.parse_args()) if __name__ == '__main__': gen_cppheader(args['in'],args['out']) - diff --git a/script/hsaap.py b/script/hsaap.py index e9a7b0463f..07a365e2b4 100755 --- a/script/hsaap.py +++ b/script/hsaap.py @@ -1,4 +1,5 @@ #!/usr/bin/python +from __future__ import print_function import os, sys, re OUT='inc/hsa_prof_str.h' @@ -36,7 +37,7 @@ LICENSE = \ ############################################################# # Error handler def fatal(module, msg): - print >>sys.stderr, module + ' Error: "' + msg + '"' + print (module + ' Error: "' + msg + '"', file = sys.stderr) sys.exit(1) # Get next text block @@ -342,8 +343,8 @@ class API_DescrParser: self.content += ' ' + self.api_id[call] + ' = ' + str(n) + ',\n' else: self.content += '\n' - self.content += ' HSA_API_ID_NUMBER = ' + str(n) + ',\n' - self.content += ' HSA_API_ID_ANY = ' + str(n + 1) + ',\n' + self.content += ' HSA_API_ID_DISPATCH = ' + str(n) + ',\n' + self.content += ' HSA_API_ID_NUMBER = ' + str(n + 1) + ',\n' self.content += '};\n' # generate API args structure @@ -440,6 +441,7 @@ class API_DescrParser: # generate stream operator def gen_out_stream(self, n, name, call, struct): if n == -1: + self.content += '#ifdef __cplusplus\n' self.content += 'typedef std::pair hsa_api_data_pair_t;\n' self.content += 'inline std::ostream& operator<< (std::ostream& out, const hsa_api_data_pair_t& data_pair) {\n' self.content += ' const uint32_t cid = data_pair.first;\n' @@ -483,12 +485,13 @@ class API_DescrParser: self.content += ' }\n' self.content += ' return out;\n' self.content += '}\n' + self.content += '#endif\n' ############################################################# # main # Usage if len(sys.argv) != 3: - print >>sys.stderr, "Usage:", sys.argv[0], " " + print ("Usage:", sys.argv[0], " ", file=sys.stderr) sys.exit(1) else: ROOT = sys.argv[1] + '/' @@ -497,7 +500,7 @@ else: descr = API_DescrParser(OUT, HSA_DIR, API_TABLES_H, API_HEADERS_H, LICENSE) out_file = ROOT + OUT -print 'Generating "' + out_file + '"' +print ('Generating "' + out_file + '"') f = open(out_file, 'w') f.write(descr.content[:-1]) f.close() diff --git a/script/kfdap.py b/script/kfdap.py index d822cf2752..a9c6defb5f 100755 --- a/script/kfdap.py +++ b/script/kfdap.py @@ -1,4 +1,5 @@ #!/usr/bin/python +from __future__ import print_function import os, sys, re OUT_H = 'inc/kfd_prof_str.h' @@ -33,7 +34,7 @@ LICENSE = \ ############################################################# # Error handler def fatal(module, msg): - print >>sys.stderr, module + ' Error: "' + msg + '"' + print (module + ' Error: "' + msg + '"', file = sys.stderr) sys.exit(1) # Get next text block @@ -284,7 +285,6 @@ class API_DescrParser: self.content_h += '#include \n' self.content_h += '#include \"roctracer_kfd.h\"\n' self.content_h += '#include \"hsakmt.h\"\n' - self.content_h += '#include \"cb_table.h\"\n' self.content_h += '#define PUBLIC_API __attribute__((visibility(\"default\")))\n' @@ -293,6 +293,7 @@ class API_DescrParser: self.content_h += '\n' self.content_h += '#if PROF_API_IMPL\n' + self.content_h += '#include \"cb_table.h\"\n' self.content_h += 'namespace roctracer {\n' self.content_h += 'namespace kfd_support {\n' @@ -372,7 +373,7 @@ class API_DescrParser: # generate API args structure def gen_arg_struct(self, n, name, call, struct): if n == -1: - self.content_h += 'struct kfd_api_data_t {\n' + self.content_h += 'typedef struct kfd_api_data_s {\n' self.content_h += ' uint64_t correlation_id;\n' self.content_h += ' uint32_t phase;\n' if len(self.api_rettypes) != 0: @@ -394,7 +395,7 @@ class API_DescrParser: self.content_h += ' } ' + call + ';\n' else: self.content_h += ' } args;\n' - self.content_h += '};\n' + self.content_h += '} kfd_api_data_t;\n' # generate API callbacks def gen_callbacks(self, n, name, call, struct): @@ -406,8 +407,7 @@ class API_DescrParser: call_id = self.api_id[call]; ret_type = struct['ret'] self.content_h += ret_type + ' ' + call + '_callback(' + struct['args'] + ') {\n' # 'static ' + - if call == 'hsaKmtOpenKFD': - self.content_h += ' if (' + name + '_table == NULL) intercept_KFDApiTable();\n' + self.content_h += ' if (' + name + '_table == NULL) intercept_KFDApiTable();\n' self.content_h += ' kfd_api_data_t api_data{};\n' for var in struct['alst']: self.content_h += ' api_data.args.' + call + '.' + var.replace("[]","") + ' = ' + var.replace("[]","") + ';\n' @@ -477,6 +477,7 @@ class API_DescrParser: # generate stream operator def gen_out_stream(self, n, name, call, struct): if n == -1: + self.content_h += '#ifdef __cplusplus\n' self.content_h += 'typedef std::pair kfd_api_data_pair_t;\n' self.content_h += 'inline std::ostream& operator<< (std::ostream& out, const kfd_api_data_pair_t& data_pair) {\n' self.content_h += ' const uint32_t cid = data_pair.first;\n' @@ -510,6 +511,7 @@ class API_DescrParser: self.content_h += ' }\n' self.content_h += ' return out;\n' self.content_h += '}\n' + self.content_h += '#endif\n' self.content_cpp += 'inline std::ostream& operator<< (std::ostream& out, const HsaMemFlags& v) { out << "HsaMemFlags"; return out; }\n' # generate PUBLIC_API for all API fcts @@ -525,7 +527,7 @@ class API_DescrParser: self.content_cpp += ' return true;\n'; self.content_cpp += '}\n\n'; - if call != '-': + if call != '-' and call != 'hsaKmtCloseKFD' and call != 'hsaKmtOpenKFD': self.content_cpp += 'PUBLIC_API ' + struct['ret'] + " " + call + '(' + struct['args'] + ') { return roctracer::kfd_support::' + call + '_callback(' for i in range(0,len(struct['alst'])): if i == (len(struct['alst'])-1): @@ -538,7 +540,7 @@ class API_DescrParser: # main # Usage if len(sys.argv) != 3: - print >>sys.stderr, "Usage:", sys.argv[0], " " + print ("Usage:", sys.argv[0], " ", file = sys.stderr) sys.exit(1) else: ROOT = sys.argv[1] + '/' @@ -547,13 +549,13 @@ else: descr = API_DescrParser(OUT_H, KFD_DIR, API_HEADERS_H, LICENSE) out_file = ROOT + OUT_H -print 'Generating "' + out_file + '"' +print ('Generating "' + out_file + '"') f = open(out_file, 'w') f.write(descr.content_h[:-1]) f.close() out_file = ROOT + OUT_CPP -print 'Generating "' + out_file + '"' +print ('Generating "' + out_file + '"') f = open(out_file, 'w') f.write(descr.content_cpp[:-1]) f.close() diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 44b9fd81a3..2dd3ed11d0 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -17,17 +17,16 @@ target_link_libraries( ${TARGET_LIB} PRIVATE ${HSA_RUNTIME_LIB} c stdc++ ) execute_process ( COMMAND sh -xc "${ROOT_DIR}/script/hsaap.py ${ROOT_DIR} ${HSA_RUNTIME_INC_PATH}" ) # Generating KFD/Thunk tracing primitives -if ( DEFINED KFD_WRAPPER ) - set ( KFD_LIB "kfdwrapper64" ) - set ( KFD_LIB_SRC - ${LIB_DIR}/kfd/kfd_wrapper.cpp - ) - execute_process ( COMMAND sh -xc "${ROOT_DIR}/script/gen_ostream_ops.py -in ${HSA_KMT_INC_PATH}/hsakmttypes.h -out ${ROOT_DIR}/inc/kfd_ostream_ops.h" ) - add_library ( ${KFD_LIB} SHARED ${KFD_LIB_SRC} ) - target_include_directories ( ${KFD_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} ${ROOT_DIR}/inc ${HSA_RUNTIME_INC_PATH} ${HSA_RUNTIME_HSA_INC_PATH} ${HSA_KMT_INC_PATH} ) - target_link_libraries( ${KFD_LIB} PRIVATE c stdc++ ) - execute_process ( COMMAND sh -xc "${ROOT_DIR}/script/kfdap.py ${ROOT_DIR} ${HSA_KMT_INC_PATH}" ) -endif() +set ( KFD_LIB "kfdwrapper64" ) +set ( KFD_LIB_SRC + ${LIB_DIR}/kfd/kfd_wrapper.cpp +) +execute_process ( COMMAND sh -xc "${CMAKE_CXX_COMPILER} -E ${HSA_KMT_INC_PATH}/hsakmttypes.h > ${PROJECT_BINARY_DIR}/hsakmttypes_pp.h" ) +execute_process ( COMMAND sh -xc "${ROOT_DIR}/script/gen_ostream_ops.py -in ${PROJECT_BINARY_DIR}/hsakmttypes_pp.h -out ${ROOT_DIR}/inc/kfd_ostream_ops.h" ) +add_library ( ${KFD_LIB} SHARED ${KFD_LIB_SRC} ) +target_include_directories ( ${KFD_LIB} PRIVATE ${LIB_DIR} ${ROOT_DIR} ${ROOT_DIR}/inc ${HSA_RUNTIME_INC_PATH} ${HSA_RUNTIME_HSA_INC_PATH} ${HSA_KMT_INC_PATH} ) +target_link_libraries( ${KFD_LIB} PRIVATE c stdc++ ) +execute_process ( COMMAND sh -xc "${ROOT_DIR}/script/kfdap.py ${ROOT_DIR} ${HSA_KMT_INC_PATH}" ) set ( ROCTX_LIB "roctx64" ) set ( ROCTX_LIB_SRC diff --git a/src/core/journal.h b/src/core/journal.h index f4d8a676b7..68f1e67709 100644 --- a/src/core/journal.h +++ b/src/core/journal.h @@ -47,7 +47,7 @@ class Journal { } ~Journal() { - for (auto& val : map_) delete val.second; + for (auto& val : *map_) delete val.second; delete map_; } diff --git a/src/core/loader.h b/src/core/loader.h index fa2b0e62ad..dd30dc72dd 100644 --- a/src/core/loader.h +++ b/src/core/loader.h @@ -21,11 +21,10 @@ class BaseLoader : public T { if (handle_ == NULL) return NULL; fun_t *f = (fun_t*) dlsym(handle_, fun_name); - if (f == NULL) { + if ((to_check_symb_ == true) && (f == NULL)) { fprintf(stderr, "roctracer: symbol lookup '%s' failed: \"%s\"\n", fun_name, dlerror()); abort(); } - dlerror(); return f; } @@ -46,13 +45,12 @@ class BaseLoader : public T { private: BaseLoader() { - const int flags = RTLD_LAZY; + const int flags = (to_load_ == true) ? RTLD_LAZY : RTLD_LAZY|RTLD_NOLOAD; handle_ = dlopen(lib_name_, flags); - if (handle_ == NULL) { + if ((to_check_open_ == true) && (handle_ == NULL)) { fprintf(stderr, "roctracer: Loading '%s' failed, %s\n", lib_name_, dlerror()); abort(); } - dlerror(); T::init(this); } @@ -61,12 +59,43 @@ class BaseLoader : public T { if (handle_ != NULL) dlclose(handle_); } + static bool to_load_; + static bool to_check_open_; + static bool to_check_symb_; + static mutex_t mutex_; static const char* lib_name_; static std::atomic instance_; void* handle_; }; +// 'rocprofiler' library loader class +class RocpApi { + public: + typedef BaseLoader Loader; + + typedef bool (RegisterCallback_t)(uint32_t op, void* callback, void* arg); + typedef bool (OperateCallback_t)(uint32_t op); + typedef bool (InitCallback_t)(void* callback, void* arg); + typedef bool (EnableCallback_t)(uint32_t op, bool enable); + typedef const char* (NameCallback_t)(uint32_t op); + + RegisterCallback_t* RegisterApiCallback; + OperateCallback_t* RemoveApiCallback; + InitCallback_t* InitActivityCallback; + EnableCallback_t* EnableActivityCallback; + NameCallback_t* GetOpName; + + protected: + void init(Loader* loader) { + RegisterApiCallback = loader->GetFun("RegisterApiCallback"); + RemoveApiCallback = loader->GetFun("RemoveApiCallback"); + InitActivityCallback = loader->GetFun("InitActivityCallback"); + EnableActivityCallback = loader->GetFun("EnableActivityCallback"); + GetOpName = loader->GetFun("GetOpName"); + } +}; + // HIP runtime library loader class class HipApi { public: @@ -110,9 +139,9 @@ class HccApi { protected: void init(Loader* loader) { #if HIP_VDI - InitActivityCallback = loader->GetFun("InitActivityCallback"); - EnableActivityCallback = loader->GetFun("EnableActivityCallback"); - GetOpName = loader->GetFun("GetCmdName"); + InitActivityCallback = loader->GetFun("hipInitActivityCallback"); + EnableActivityCallback = loader->GetFun("hipEnableActivityCallback"); + GetOpName = loader->GetFun("hipGetCmdName"); #else InitActivityCallback = loader->GetFun("InitActivityCallbackImpl"); EnableActivityCallback = loader->GetFun("EnableActivityCallbackImpl"); @@ -161,6 +190,7 @@ class RocTxApi { } }; +typedef BaseLoader RocpLoader; typedef BaseLoader HipLoader; typedef BaseLoader HccLoader; typedef BaseLoader KfdLoader; @@ -171,9 +201,16 @@ typedef BaseLoader RocTxLoader; #define LOADER_INSTANTIATE() \ template typename roctracer::BaseLoader::mutex_t roctracer::BaseLoader::mutex_; \ template std::atomic*> roctracer::BaseLoader::instance_{}; \ + template bool roctracer::BaseLoader::to_load_ = false; \ + template bool roctracer::BaseLoader::to_check_open_ = true; \ + template bool roctracer::BaseLoader::to_check_symb_ = true; \ + template<> const char* roctracer::RocpLoader::lib_name_ = "librocprofiler64.so"; \ template<> const char* roctracer::HipLoader::lib_name_ = "libhip_hcc.so"; \ - template<> const char* roctracer::HccLoader::lib_name_ = "libmcwamp_hsa.so"; \ + template<> bool roctracer::HipLoader::to_check_open_ = false; \ + template<> const char* roctracer::HccLoader::lib_name_ = "libmcwamp.so"; \ + template<> bool roctracer::HccLoader::to_check_open_ = false; \ template<> const char* roctracer::KfdLoader::lib_name_ = "libkfdwrapper64.so"; \ - template<> const char* roctracer::RocTxLoader::lib_name_ = "libroctx64.so"; + template<> const char* roctracer::RocTxLoader::lib_name_ = "libroctx64.so"; \ + template<> bool roctracer::RocTxLoader::to_load_ = true; #endif // SRC_CORE_LOADER_H_ diff --git a/src/core/roctracer.cpp b/src/core/roctracer.cpp index 1ae25fb885..daedb97db9 100644 --- a/src/core/roctracer.cpp +++ b/src/core/roctracer.cpp @@ -27,9 +27,7 @@ THE SOFTWARE. #include "inc/roctracer_roctx.h" #define PROF_API_IMPL 1 #include "inc/roctracer_hsa.h" -#ifdef KFD_WRAPPER #include "inc/roctracer_kfd.h" -#endif #include #include @@ -87,9 +85,13 @@ THE SOFTWARE. (void)err; \ return X; -#ifndef onload_debug -#define onload_debug false -#endif +#define ONLOAD_TRACE(str) \ + if (getenv("ROCP_ONLOAD_TRACE")) do { \ + std::cout << "PID(" << GetPid() << "): TRACER_LIB::" << __FUNCTION__ << " " << str << std::endl << std::flush; \ + } while(0); +#define ONLOAD_TRACE_BEG() ONLOAD_TRACE("begin") +#define ONLOAD_TRACE_END() ONLOAD_TRACE("end") + static inline uint32_t GetPid() { return syscall(__NR_getpid); } @@ -174,7 +176,7 @@ decltype(hsa_amd_memory_async_copy_rect)* hsa_amd_memory_async_copy_rect_fn; typedef decltype(roctracer_enable_op_callback)* roctracer_enable_op_callback_t; typedef decltype(roctracer_disable_op_callback)* roctracer_disable_op_callback_t; -typedef decltype(roctracer_enable_op_activity)* roctracer_enable_op_activity_t; +typedef decltype(roctracer_enable_op_activity_expl)* roctracer_enable_op_activity_t; typedef decltype(roctracer_disable_op_activity)* roctracer_disable_op_activity_t; struct cb_journal_data_t { @@ -251,18 +253,16 @@ class GlobalCounter { public: typedef std::mutex mutex_t; typedef uint64_t counter_t; + typedef std::atomic atomic_counter_t; - static counter_t Increment() { - std::lock_guard lock(mutex_); - return ++counter_; - } + static counter_t Increment() { return counter_.fetch_add(1, std::memory_order_relaxed); } private: static mutex_t mutex_; - static counter_t counter_; + static atomic_counter_t counter_; }; GlobalCounter::mutex_t GlobalCounter::mutex_; -GlobalCounter::counter_t GlobalCounter::counter_ = 0; +GlobalCounter::atomic_counter_t GlobalCounter::counter_{1}; // Records storage struct roctracer_api_data_t { @@ -284,6 +284,7 @@ typedef std::map correlati typedef std::mutex correlation_id_mutex_t; correlation_id_map_t* correlation_id_map = NULL; correlation_id_mutex_t correlation_id_mutex; +bool correlation_id_wait = true; static thread_local std::stack external_id_stack; @@ -296,6 +297,7 @@ static inline void CorrelationIdRegistr(const activity_correlation_id_t& correla static inline activity_correlation_id_t CorrelationIdLookup(const activity_correlation_id_t& correlation_id) { auto it = correlation_id_map->find(correlation_id); + if (correlation_id_wait) while (it == correlation_id_map->end()) it = correlation_id_map->find(correlation_id); if (it == correlation_id_map->end()) EXC_ABORT(ROCTRACER_STATUS_ERROR, "HCC activity id lookup failed(" << correlation_id << ")"); return it->second; } @@ -330,6 +332,7 @@ void* HIP_SyncActivityCallback( data = &(top.data.hip); data_ptr = const_cast(data); data_ptr->phase = phase; + data_ptr->correlation_id = 0; } // Filing record info @@ -391,8 +394,6 @@ void HCC_ActivityIdCallback(activity_correlation_id_t correlation_id) { } void HCC_AsyncActivityCallback(uint32_t op_id, void* record, void* arg) { - static hsa_rt_utils::Timer timer; - MemoryPool* pool = reinterpret_cast(arg); roctracer_record_t* record_ptr = reinterpret_cast(record); record_ptr->domain = ACTIVITY_DOMAIN_HCC_OPS; @@ -523,14 +524,36 @@ hsa_status_t hsa_amd_memory_async_copy_rect_interceptor( return status; } +void HSA_AsyncActivityCallback(uint32_t op_id, void* record, void* arg) { + MemoryPool* pool = reinterpret_cast(arg); + roctracer_record_t* record_ptr = reinterpret_cast(record); + record_ptr->domain = ACTIVITY_DOMAIN_HSA_OPS; + pool->Write(*record_ptr); +} + +// Logger routines and primitives util::Logger::mutex_t util::Logger::mutex_; std::atomic util::Logger::instance_{}; + +// Memory pool routines and primitives MemoryPool* memory_pool = NULL; typedef std::recursive_mutex memory_pool_mutex_t; memory_pool_mutex_t memory_pool_mutex; + +// Stop sttaus routines and primitives +unsigned stop_status_value = 0; +typedef std::mutex stop_status_mutex_t; +stop_status_mutex_t stop_status_mutex; +unsigned set_stopped(unsigned val) { + std::lock_guard lock(stop_status_mutex); + const unsigned ret = (stop_status_value ^ val); + stop_status_value = val; + return ret; +} } // namespace roctracer LOADER_INSTANTIATE(); +TRACE_BUFFER_INSTANTIATE(); /////////////////////////////////////////////////////////////////////////////////////////////////// // Public library methods @@ -555,24 +578,16 @@ PUBLIC_API const char* roctracer_op_string( { API_METHOD_PREFIX switch (domain) { - case ACTIVITY_DOMAIN_HSA_API: { + case ACTIVITY_DOMAIN_HSA_API: return roctracer::hsa_support::GetApiName(op); - break; - } - case ACTIVITY_DOMAIN_HCC_OPS: { + case ACTIVITY_DOMAIN_HSA_OPS: + return roctracer::RocpLoader::Instance().GetOpName(op); + case ACTIVITY_DOMAIN_HCC_OPS: return roctracer::HccLoader::Instance().GetOpName(kind); - break; - } - case ACTIVITY_DOMAIN_HIP_API: { + case ACTIVITY_DOMAIN_HIP_API: return roctracer::HipLoader::Instance().ApiName(op); - break; - } -#if KFD_WRAPPER - case ACTIVITY_DOMAIN_KFD_API: { + case ACTIVITY_DOMAIN_KFD_API: return roctracer::kfd_support::GetApiName(op); - break; - } -#endif default: EXC_RAISING(ROCTRACER_STATUS_BAD_DOMAIN, "invalid domain ID(" << domain << ")"); } @@ -593,13 +608,11 @@ PUBLIC_API roctracer_status_t roctracer_op_code( if (kind != NULL) *kind = 0; break; } -#ifdef KFD_WRAPPER case ACTIVITY_DOMAIN_KFD_API: { *op = roctracer::kfd_support::GetApiCode(str); if (kind != NULL) *kind = 0; break; } -#endif default: EXC_RAISING(ROCTRACER_STATUS_BAD_DOMAIN, "limited domain ID(" << domain << ")"); } @@ -608,13 +621,11 @@ PUBLIC_API roctracer_status_t roctracer_op_code( static inline uint32_t get_op_num(const uint32_t& domain) { switch (domain) { - case ACTIVITY_DOMAIN_HSA_OPS: return 1; + case ACTIVITY_DOMAIN_HSA_OPS: return HSA_OP_ID_NUMBER; case ACTIVITY_DOMAIN_HSA_API: return HSA_API_ID_NUMBER; case ACTIVITY_DOMAIN_HCC_OPS: return HIP_OP_ID_NUMBER; case ACTIVITY_DOMAIN_HIP_API: return HIP_API_ID_NUMBER; -#ifdef KFD_WRAPPER case ACTIVITY_DOMAIN_KFD_API: return KFD_API_ID_NUMBER; -#endif case ACTIVITY_DOMAIN_EXT_API: return 0; case ACTIVITY_DOMAIN_ROCTX: return ROCTX_API_ID_NUMBER; default: @@ -631,28 +642,35 @@ static roctracer_status_t roctracer_enable_callback_fun( void* user_data) { switch (domain) { -#ifdef KFD_WRAPPER case ACTIVITY_DOMAIN_KFD_API: { const bool succ = roctracer::KfdLoader::Instance().RegisterApiCallback(op, (void*)callback, user_data); - if (succ == false) EXC_RAISING(ROCTRACER_STATUS_ERROR, "KFD RegisterApiCallback error"); + if (succ == false) EXC_RAISING(ROCTRACER_STATUS_ERROR, "KFD RegisterApiCallback error(" << op << ") failed"); break; } -#endif case ACTIVITY_DOMAIN_HSA_OPS: break; case ACTIVITY_DOMAIN_HSA_API: { +#if 0 + if (op == HSA_API_ID_DISPATCH) { + const bool succ = roctracer::RocpLoader::Instance().RegisterApiCallback(op, (void*)callback, user_data); + if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HSA_ERR, "HSA::EnableActivityCallback error(" << op << ") failed"); + break; + } +#endif roctracer::hsa_support::cb_table.set(op, callback, user_data); break; } case ACTIVITY_DOMAIN_HCC_OPS: break; case ACTIVITY_DOMAIN_HIP_API: { + if (roctracer::HipLoader::Instance().Enabled() == false) break; + hipError_t hip_err = roctracer::HipLoader::Instance().RegisterApiCallback(op, (void*)callback, user_data); - if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "hipRegisterApiCallback(" << op << ") error(" << hip_err << ")"); + if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "HIP::RegisterApiCallback(" << op << ") error(" << hip_err << ")"); break; } case ACTIVITY_DOMAIN_ROCTX: { if (roctracer::RocTxLoader::Instance().Enabled()) { const bool suc = roctracer::RocTxLoader::Instance().RegisterApiCallback(op, (void*)callback, user_data); - if (suc == false) EXC_RAISING(ROCTRACER_STATUS_ROCTX_ERR, "roctxRegisterApiCallback(" << op << ") failed"); + if (suc == false) EXC_RAISING(ROCTRACER_STATUS_ROCTX_ERR, "ROCTX::RegisterApiCallback(" << op << ") failed"); } break; } @@ -668,8 +686,8 @@ static void roctracer_enable_callback_impl( roctracer_rtapi_callback_t callback, void* user_data) { - roctracer::cb_journal->registr({domain, op, {callback, user_data}}); - roctracer_enable_callback_fun((roctracer_domain_t)domain, op, callback, user_data); + roctracer::cb_journal->registr({domain, op, {callback, user_data}}); + roctracer_enable_callback_fun((roctracer_domain_t)domain, op, callback, user_data); } PUBLIC_API roctracer_status_t roctracer_enable_op_callback( @@ -712,25 +730,35 @@ static roctracer_status_t roctracer_disable_callback_fun( uint32_t op) { switch (domain) { -#ifdef KFD_WRAPPER case ACTIVITY_DOMAIN_KFD_API: { const bool succ = roctracer::KfdLoader::Instance().RemoveApiCallback(op); if (succ == false) EXC_RAISING(ROCTRACER_STATUS_ERROR, "KFD RemoveApiCallback error"); break; } -#endif case ACTIVITY_DOMAIN_HSA_OPS: break; - case ACTIVITY_DOMAIN_HSA_API: break; + case ACTIVITY_DOMAIN_HSA_API: { +#if 0 + if (op == HSA_API_ID_DISPATCH) { + const bool succ = roctracer::RocpLoader::Instance().RemoveApiCallback(op); + if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HSA_ERR, "HSA::RemoveActivityCallback error(" << op << ") failed"); + break; + } +#endif + roctracer::hsa_support::cb_table.set(op, NULL, NULL); + break; + } case ACTIVITY_DOMAIN_HCC_OPS: break; case ACTIVITY_DOMAIN_HIP_API: { + if (roctracer::HipLoader::Instance().Enabled() == false) break; + hipError_t hip_err = roctracer::HipLoader::Instance().RemoveApiCallback(op); - if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "hipRemoveApiCallback error(" << hip_err << ")"); + if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "HIP::RemoveApiCallback(" << op << "), error(" << hip_err << ")"); break; } case ACTIVITY_DOMAIN_ROCTX: { if (roctracer::RocTxLoader::Instance().Enabled()) { const bool suc = roctracer::RocTxLoader::Instance().RemoveApiCallback(op); - if (suc == false) EXC_RAISING(ROCTRACER_STATUS_ROCTX_ERR, "roctxRemoveApiCallback(" << op << ") failed"); + if (suc == false) EXC_RAISING(ROCTRACER_STATUS_ROCTX_ERR, "ROCTX::RemoveApiCallback(" << op << ") failed"); } break; } @@ -777,7 +805,7 @@ PUBLIC_API roctracer_status_t roctracer_disable_callback() } // Return default pool and set new one if parameter pool is not NULL. -PUBLIC_API roctracer_pool_t* roctracer_default_pool(roctracer_pool_t* pool) { +PUBLIC_API roctracer_pool_t* roctracer_default_pool_expl(roctracer_pool_t* pool) { std::lock_guard lock(roctracer::memory_pool_mutex); roctracer_pool_t* p = reinterpret_cast(roctracer::memory_pool); if (pool != NULL) roctracer::memory_pool = reinterpret_cast(pool); @@ -785,7 +813,7 @@ PUBLIC_API roctracer_pool_t* roctracer_default_pool(roctracer_pool_t* pool) { } // Open memory pool -PUBLIC_API roctracer_status_t roctracer_open_pool( +PUBLIC_API roctracer_status_t roctracer_open_pool_expl( const roctracer_properties_t* properties, roctracer_pool_t** pool) { @@ -802,7 +830,7 @@ PUBLIC_API roctracer_status_t roctracer_open_pool( } // Close memory pool -PUBLIC_API roctracer_status_t roctracer_close_pool(roctracer_pool_t* pool) { +PUBLIC_API roctracer_status_t roctracer_close_pool_expl(roctracer_pool_t* pool) { API_METHOD_PREFIX std::lock_guard lock(roctracer::memory_pool_mutex); roctracer_pool_t* ptr = (pool == NULL) ? roctracer_default_pool() : pool; @@ -821,14 +849,34 @@ static roctracer_status_t roctracer_enable_activity_fun( if (pool == NULL) pool = roctracer_default_pool(); switch (domain) { case ACTIVITY_DOMAIN_HSA_OPS: { - roctracer::hsa_support::async_copy_callback_enabled = true; - rocprofiler::InterceptQueue::Enable(true); + if (op == HSA_OP_ID_COPY) { + roctracer::hsa_support::async_copy_callback_enabled = true; + } else { + const bool init_phase = (roctracer::RocpLoader::GetRef() == NULL); + if (init_phase == true) { + roctracer::RocpLoader::Instance().InitActivityCallback((void*)roctracer::HSA_AsyncActivityCallback, + (void*)pool); + } + const bool succ = roctracer::RocpLoader::Instance().EnableActivityCallback(op, true); + if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HSA_ERR, "HSA::EnableActivityCallback error"); + } break; } case ACTIVITY_DOMAIN_HSA_API: break; case ACTIVITY_DOMAIN_KFD_API: break; case ACTIVITY_DOMAIN_HCC_OPS: { - if (roctracer::HccLoader::GetRef() == NULL) { + const bool init_phase = (roctracer::HccLoader::GetRef() == NULL); + if (roctracer::HccLoader::Instance().Enabled() == false) break; + + if (init_phase == true) { + if (getenv("ROCP_HCC_CORRID_WAIT") != NULL) { + roctracer::correlation_id_wait = true; + fprintf(stdout, "roctracer: HCC correlation ID wait enabled\n"); fflush(stdout); + } + if (getenv("ROCP_HCC_CORRID_NOWAIT") != NULL) { + roctracer::correlation_id_wait = false; + fprintf(stdout, "roctracer: HCC correlation ID wait disabled\n"); fflush(stdout); + } roctracer::HccLoader::Instance().InitActivityCallback((void*)roctracer::HCC_ActivityIdCallback, (void*)roctracer::HCC_AsyncActivityCallback, (void*)pool); @@ -838,6 +886,8 @@ static roctracer_status_t roctracer_enable_activity_fun( break; } case ACTIVITY_DOMAIN_HIP_API: { + if (roctracer::HipLoader::Instance().Enabled() == false) break; + const hipError_t hip_err = roctracer::HipLoader::Instance().RegisterActivityCallback(op, (void*)roctracer::HIP_SyncActivityCallback, (void*)pool); if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "hipRegisterActivityCallback error(" << hip_err << ")"); break; @@ -858,7 +908,7 @@ static void roctracer_enable_activity_impl( roctracer_enable_activity_fun((roctracer_domain_t)domain, op, pool); } -PUBLIC_API roctracer_status_t roctracer_enable_op_activity( +PUBLIC_API roctracer_status_t roctracer_enable_op_activity_expl( roctracer_domain_t domain, uint32_t op, roctracer_pool_t* pool) @@ -868,7 +918,7 @@ PUBLIC_API roctracer_status_t roctracer_enable_op_activity( API_METHOD_SUFFIX } -PUBLIC_API roctracer_status_t roctracer_enable_domain_activity( +PUBLIC_API roctracer_status_t roctracer_enable_domain_activity_expl( roctracer_domain_t domain, roctracer_pool_t* pool) { @@ -878,7 +928,7 @@ PUBLIC_API roctracer_status_t roctracer_enable_domain_activity( API_METHOD_SUFFIX } -PUBLIC_API roctracer_status_t roctracer_enable_activity( +PUBLIC_API roctracer_status_t roctracer_enable_activity_expl( roctracer_pool_t* pool) { API_METHOD_PREFIX @@ -896,20 +946,29 @@ static roctracer_status_t roctracer_disable_activity_fun( { switch (domain) { case ACTIVITY_DOMAIN_HSA_OPS: { - roctracer::hsa_support::async_copy_callback_enabled = false; - rocprofiler::InterceptQueue::Enable(false); + if (op == HSA_OP_ID_COPY) { + roctracer::hsa_support::async_copy_callback_enabled = true; + } else { + if (roctracer::RocpLoader::GetRef() == NULL) break; + const bool succ = roctracer::RocpLoader::Instance().EnableActivityCallback(op, false); + if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HSA_ERR, "HSA::EnableActivityCallback(false) error, op(" << op << ")"); + } break; } case ACTIVITY_DOMAIN_HSA_API: break; case ACTIVITY_DOMAIN_KFD_API: break; case ACTIVITY_DOMAIN_HCC_OPS: { + if (roctracer::HccLoader::Instance().Enabled() == false) break; + const bool succ = roctracer::HccLoader::Instance().EnableActivityCallback(op, false); - if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HCC_OPS_ERR, "HCC::EnableActivityCallback(NULL) error domain(" << domain << ") op(" << op << ")"); + if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HCC_OPS_ERR, "HCC::EnableActivityCallback(NULL) error, op(" << op << ")"); break; } case ACTIVITY_DOMAIN_HIP_API: { + if (roctracer::HipLoader::Instance().Enabled() == false) break; + const hipError_t hip_err = roctracer::HipLoader::Instance().RemoveActivityCallback(op); - if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "hipRemoveActivityCallback error(" << hip_err << ")"); + if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "HIP::RemoveActivityCallback op(" << op << "), error(" << hip_err << ")"); break; } case ACTIVITY_DOMAIN_ROCTX: break; @@ -923,8 +982,8 @@ static void roctracer_disable_activity_impl( uint32_t domain, uint32_t op) { - roctracer::act_journal->remove({domain, op, {}}); - roctracer_disable_activity_fun((roctracer_domain_t)domain, op); + roctracer::act_journal->remove({domain, op, {}}); + roctracer_disable_activity_fun((roctracer_domain_t)domain, op); } PUBLIC_API roctracer_status_t roctracer_disable_op_activity( @@ -956,11 +1015,12 @@ PUBLIC_API roctracer_status_t roctracer_disable_activity() } // Flush available activity records -PUBLIC_API roctracer_status_t roctracer_flush_activity(roctracer_pool_t* pool) { +PUBLIC_API roctracer_status_t roctracer_flush_activity_expl(roctracer_pool_t* pool) { API_METHOD_PREFIX if (pool == NULL) pool = roctracer_default_pool(); roctracer::MemoryPool* memory_pool = reinterpret_cast(pool); memory_pool->Flush(); + roctracer::TraceBufferBase::FlushAll(); API_METHOD_SUFFIX } @@ -1001,16 +1061,26 @@ PUBLIC_API void roctracer_mark(const char* str) { // Start API PUBLIC_API void roctracer_start() { - if (roctracer::ext_support::roctracer_start_cb) roctracer::ext_support::roctracer_start_cb(); - roctracer::cb_journal->foreach(roctracer::cb_en_functor_t(roctracer_enable_callback_fun)); - roctracer::act_journal->foreach(roctracer::act_en_functor_t(roctracer_enable_activity_fun)); + if (roctracer::set_stopped(0)) { + if (roctracer::ext_support::roctracer_start_cb) roctracer::ext_support::roctracer_start_cb(); + roctracer::cb_journal->foreach(roctracer::cb_en_functor_t(roctracer_enable_callback_fun)); + roctracer::act_journal->foreach(roctracer::act_en_functor_t(roctracer_enable_activity_fun)); + } } // Stop API PUBLIC_API void roctracer_stop() { - roctracer::cb_journal->foreach(roctracer::cb_dis_functor_t(roctracer_disable_callback_fun)); - roctracer::act_journal->foreach(roctracer::act_dis_functor_t(roctracer_disable_activity_fun)); - if (roctracer::ext_support::roctracer_stop_cb) roctracer::ext_support::roctracer_stop_cb(); + if (roctracer::set_stopped(1)) { + roctracer::cb_journal->foreach(roctracer::cb_dis_functor_t(roctracer_disable_callback_fun)); + roctracer::act_journal->foreach(roctracer::act_dis_functor_t(roctracer_disable_activity_fun)); + if (roctracer::ext_support::roctracer_stop_cb) roctracer::ext_support::roctracer_stop_cb(); + } +} + +PUBLIC_API roctracer_status_t roctracer_get_timestamp(uint64_t* timestamp) { + API_METHOD_PREFIX + *timestamp = util::HsaRsrcFactory::Instance().TimestampNs(); + API_METHOD_SUFFIX } // Set properties @@ -1021,6 +1091,8 @@ PUBLIC_API roctracer_status_t roctracer_set_properties( API_METHOD_PREFIX switch (domain) { case ACTIVITY_DOMAIN_HSA_OPS: { + roctracer::trace_buffer.StartWorkerThread(); + // HSA OPS properties roctracer::hsa_ops_properties_t* ops_properties = reinterpret_cast(properties); HsaApiTable* table = reinterpret_cast(ops_properties->table); @@ -1046,12 +1118,10 @@ PUBLIC_API roctracer_status_t roctracer_set_properties( break; } -#ifdef KFD_WRAPPER case ACTIVITY_DOMAIN_KFD_API: { roctracer::kfd_support::intercept_KFDApiTable(); break; } -#endif case ACTIVITY_DOMAIN_HSA_API: { // HSA API properties HsaApiTable* table = reinterpret_cast(properties); @@ -1062,7 +1132,7 @@ PUBLIC_API roctracer_status_t roctracer_set_properties( } case ACTIVITY_DOMAIN_HCC_OPS: case ACTIVITY_DOMAIN_HIP_API: { -#ifdef HIP_VDI +#if HIP_VDI const char* hip_lib_name = "libamdhip64.so"; roctracer::HccLoader::SetLibName(hip_lib_name); roctracer::HipLoader::SetLibName(hip_lib_name); @@ -1082,58 +1152,54 @@ PUBLIC_API roctracer_status_t roctracer_set_properties( API_METHOD_SUFFIX } -// HSA-runtime tool on-load method -PUBLIC_API bool roctracer_load(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count, - const char* const* failed_tool_names) { - if (onload_debug) { printf("LIB roctracer_load\n"); fflush(stdout); } - static bool is_loaded = false; - if (is_loaded) return true; +static bool is_loaded = false; + +PUBLIC_API bool roctracer_load() { + ONLOAD_TRACE("begin, loaded(" << is_loaded << ")"); + + if (is_loaded == true) return true; is_loaded = true; - if (onload_debug) { printf("LIB roctracer_load end\n"); fflush(stdout); } + if (roctracer::cb_journal == NULL) roctracer::cb_journal = new roctracer::CbJournal; + if (roctracer::act_journal == NULL) roctracer::act_journal = new roctracer::ActJournal; + + ONLOAD_TRACE_END(); return true; } -PUBLIC_API void roctracer_unload(bool destruct) { - static bool is_unloaded = false; +PUBLIC_API void roctracer_unload() { + ONLOAD_TRACE("begin, loaded(" << is_loaded << ")"); - if (onload_debug) { printf("LIB roctracer_unload (%d, %d)\n", (int)destruct, (int)is_unloaded); fflush(stdout); } - if (destruct == false) return; - if (is_unloaded == true) return; - is_unloaded = true; + if (is_loaded == false) return; + is_loaded = false; + + if (roctracer::cb_journal != NULL) { + delete roctracer::cb_journal; + roctracer::cb_journal = NULL; + } + if (roctracer::act_journal != NULL) { + delete roctracer::act_journal; + roctracer::act_journal = NULL; + } roctracer::trace_buffer.Flush(); roctracer::close_output_file(roctracer::kernel_file_handle); - if (onload_debug) { printf("LIB roctracer_unload end\n"); fflush(stdout); } -} - -PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count, - const char* const* failed_tool_names) { - if (onload_debug) { printf("LIB OnLoad\n"); fflush(stdout); } - const bool ret = roctracer_load(table, runtime_version, failed_tool_count, failed_tool_names); - if (onload_debug) { printf("LIB OnLoad end\n"); fflush(stdout); } - return ret; -} -PUBLIC_API void OnUnload() { - if (onload_debug) { printf("LIB OnUnload\n"); fflush(stdout); } - roctracer_unload(false); - if (onload_debug) { printf("LIB OnUnload end\n"); fflush(stdout); } + ONLOAD_TRACE_END(); } CONSTRUCTOR_API void constructor() { - if (onload_debug) { printf("LIB constructor\n"); fflush(stdout); } + ONLOAD_TRACE_BEG(); roctracer::util::Logger::Create(); - if (roctracer::cb_journal == NULL) roctracer::cb_journal = new roctracer::CbJournal; - if (roctracer::act_journal == NULL) roctracer::act_journal = new roctracer::ActJournal; - if (onload_debug) { printf("LIB constructor end\n"); fflush(stdout); } + roctracer_load(); + ONLOAD_TRACE_END(); } DESTRUCTOR_API void destructor() { - if (onload_debug) { printf("LIB destructor\n"); fflush(stdout); } - roctracer_unload(true); + ONLOAD_TRACE_BEG(); + roctracer_unload(); util::HsaRsrcFactory::Destroy(); roctracer::util::Logger::Destroy(); - if (onload_debug) { printf("LIB destructor end\n"); fflush(stdout); } + ONLOAD_TRACE_END(); } } // extern "C" diff --git a/src/core/trace_buffer.h b/src/core/trace_buffer.h index 0cbcb5bdda..fc778bb95e 100644 --- a/src/core/trace_buffer.h +++ b/src/core/trace_buffer.h @@ -2,12 +2,23 @@ #define SRC_CORE_TRACE_BUFFER_H_ #include +#include #include #include +#include + #include #include #include +#define FATAL(stream) \ + do { \ + std::ostringstream oss; \ + oss << __FUNCTION__ << "(), " << stream; \ + std::cout << oss.str() << std::endl; \ + abort(); \ + } while (0) + #define PTHREAD_CALL(call) \ do { \ int err = call; \ @@ -53,8 +64,55 @@ struct trace_entry_t { }; }; +template +struct push_element_fun { + T* const elem_; + void fun(T* node) { if (node->next_elem_ == NULL) node->next_elem_ = elem_; } + push_element_fun(T* elem) : elem_(elem) {} +}; + +template +struct call_element_fun { + void (T::*fptr_)(); + void fun(T* node) { (node->*fptr_)(); } + call_element_fun(void (T::*f)()) : fptr_(f) {} +}; + +struct TraceBufferBase { + typedef std::mutex mutex_t; + + virtual void StartWorkerThread() = 0; + virtual void Flush() = 0; + + static void StartWorkerThreadAll() { foreach(call_element_fun(&TraceBufferBase::StartWorkerThread)); } + static void FlushAll() { foreach(call_element_fun(&TraceBufferBase::Flush)); } + + static void Push(TraceBufferBase* elem) { + if (head_elem_ == NULL) head_elem_ = elem; + else foreach(push_element_fun(elem)); + } + + TraceBufferBase() : next_elem_(NULL) {} + + template + static void foreach(const F& f_in) { + std::lock_guard lck(mutex_); + F f = f_in; + TraceBufferBase* p = head_elem_; + while (p != NULL) { + TraceBufferBase* next = p->next_elem_; + f.fun(p); + p = next; + } + } + + TraceBufferBase* next_elem_; + static TraceBufferBase* head_elem_; + static mutex_t mutex_; +}; + template -class TraceBuffer { +class TraceBuffer : protected TraceBufferBase { public: typedef void (*callback_t)(Entry*); typedef TraceBuffer Obj; @@ -67,7 +125,8 @@ class TraceBuffer { }; TraceBuffer(const char* name, uint32_t size, flush_prm_t* flush_prm_arr, uint32_t flush_prm_count) : - is_flushed_(false) + is_flushed_(false), + work_thread_started_(false) { name_ = strdup(name); size_ = size; @@ -80,31 +139,43 @@ class TraceBuffer { flush_prm_arr_ = flush_prm_arr; flush_prm_count_ = flush_prm_count; - PTHREAD_CALL(pthread_mutex_init(&work_mutex_, NULL)); - PTHREAD_CALL(pthread_cond_init(&work_cond_, NULL)); - PTHREAD_CALL(pthread_create(&work_thread_, NULL, allocate_worker, this)); + TraceBufferBase::Push(this); } ~TraceBuffer() { - PTHREAD_CALL(pthread_cancel(work_thread_)); - void *res; - PTHREAD_CALL(pthread_join(work_thread_, &res)); - if (res != PTHREAD_CANCELED) abort_run("~TraceBuffer: consumer thread wasn't stopped correctly"); - + StopWorkerThread(); Flush(); } + void StartWorkerThread() { + std::lock_guard lck(mutex_); + if (work_thread_started_ == false) { + PTHREAD_CALL(pthread_mutex_init(&work_mutex_, NULL)); + PTHREAD_CALL(pthread_cond_init(&work_cond_, NULL)); + PTHREAD_CALL(pthread_create(&work_thread_, NULL, allocate_worker, this)); + work_thread_started_ = true; + } + } + + void StopWorkerThread() { + std::lock_guard lck(mutex_); + if (work_thread_started_ == true) { + PTHREAD_CALL(pthread_cancel(work_thread_)); + void *res; + PTHREAD_CALL(pthread_join(work_thread_, &res)); + if (res != PTHREAD_CANCELED) FATAL("consumer thread wasn't stopped correctly"); + work_thread_started_ = false; + } + } Entry* GetEntry() { const pointer_t pointer = read_pointer_.fetch_add(1); if (pointer >= end_pointer_) wrap_buffer(pointer); - if (pointer >= end_pointer_) abort_run("pointer >= end_pointer_ after buffer wrap"); + if (pointer >= end_pointer_) FATAL("pointer >= end_pointer_ after buffer wrap"); return data_ + (pointer + size_ - end_pointer_); } - void Flush() { - flush_buf(); - } + void Flush() { flush_buf(); } private: void flush_buf() { @@ -113,8 +184,12 @@ class TraceBuffer { if (is_flushed == false) { for (flush_prm_t* prm = flush_prm_arr_; prm < flush_prm_arr_ + flush_prm_count_; prm++) { + // Flushed entries type uint32_t type = prm->type; + // Flushing function callback_t fun = prm->fun; + if (fun == NULL) FATAL("flush function is not set"); + pointer_t pointer = 0; for (Entry* ptr : buf_list_) { Entry* end = ptr + size_; @@ -134,7 +209,7 @@ class TraceBuffer { inline Entry* allocate_fun() { Entry* ptr = (Entry*) malloc(size_ * sizeof(Entry)); - if (ptr == NULL) abort_run("TraceBuffer::allocate_fun: calloc failed"); + if (ptr == NULL) FATAL("malloc failed"); //memset(ptr, 0, size_ * sizeof(Entry)); return ptr; } @@ -156,24 +231,20 @@ class TraceBuffer { void wrap_buffer(const pointer_t pointer) { std::lock_guard lck(mutex_); + if (work_thread_started_ == false) FATAL("worker thread is not started"); + PTHREAD_CALL(pthread_mutex_lock(&work_mutex_)); if (pointer >= end_pointer_) { data_ = next_; next_ = NULL; PTHREAD_CALL(pthread_cond_signal(&work_cond_)); end_pointer_ += size_; - if (end_pointer_ == 0) abort_run("TraceBuffer::wrap_buffer: pointer overflow"); + if (end_pointer_ == 0) FATAL("pointer overflow"); buf_list_.push_back(data_); } PTHREAD_CALL(pthread_mutex_unlock(&work_mutex_)); } - void abort_run(const char* str) { - fprintf(stderr, "%s\n", str); - fflush(stderr); - abort(); - } - const char* name_; uint32_t size_; Entry* data_; @@ -189,9 +260,14 @@ class TraceBuffer { pthread_t work_thread_; pthread_mutex_t work_mutex_; pthread_cond_t work_cond_; + bool work_thread_started_; mutex_t mutex_; }; } // namespace roctracer +#define TRACE_BUFFER_INSTANTIATE() \ + roctracer::TraceBufferBase* roctracer::TraceBufferBase::head_elem_ = NULL; \ + roctracer::TraceBufferBase::mutex_t roctracer::TraceBufferBase::mutex_; + #endif // SRC_CORE_TRACE_BUFFER_H_ diff --git a/src/util/hsa_rsrc_factory.cpp b/src/util/hsa_rsrc_factory.cpp index ccb1cd9de3..e1ef92683e 100644 --- a/src/util/hsa_rsrc_factory.cpp +++ b/src/util/hsa_rsrc_factory.cpp @@ -44,9 +44,6 @@ POSSIBILITY OF SUCH DAMAGE. #include #include -#include "util/exception.h" -#include "util/logger.h" - namespace util { // Callback function to get available in the system agents @@ -149,6 +146,11 @@ HsaRsrcFactory::HsaRsrcFactory(bool initialize_hsa) : initialize_hsa_(initialize CHECK_STATUS("HSA timer allocation failed", (timer_ == NULL) ? HSA_STATUS_ERROR : HSA_STATUS_SUCCESS); + // Time correlation + const uint32_t corr_iters = 1000; + CorrelateTime(HsaTimer::TIME_ID_CLOCK_REALTIME, corr_iters); + CorrelateTime(HsaTimer::TIME_ID_CLOCK_MONOTONIC, corr_iters); + // System timeout timeout_ = (timeout_ns_ == HsaTimer::TIMESTAMP_MAX) ? timeout_ns_ : timer_->ns_to_sysclock(timeout_ns_); } @@ -192,6 +194,8 @@ void HsaRsrcFactory::InitHsaApiTable(HsaApiTable* table) { hsa_api_.hsa_executable_load_agent_code_object = table->core_->hsa_executable_load_agent_code_object_fn; hsa_api_.hsa_executable_freeze = table->core_->hsa_executable_freeze_fn; hsa_api_.hsa_executable_get_symbol = table->core_->hsa_executable_get_symbol_fn; + hsa_api_.hsa_executable_symbol_get_info = table->core_->hsa_executable_symbol_get_info_fn; + hsa_api_.hsa_executable_iterate_symbols = table->core_->hsa_executable_iterate_symbols_fn; hsa_api_.hsa_system_get_info = table->core_->hsa_system_get_info_fn; hsa_api_.hsa_system_get_major_extension_table = table->core_->hsa_system_get_major_extension_table_fn; @@ -230,6 +234,8 @@ void HsaRsrcFactory::InitHsaApiTable(HsaApiTable* table) { hsa_api_.hsa_executable_load_agent_code_object = hsa_executable_load_agent_code_object; hsa_api_.hsa_executable_freeze = hsa_executable_freeze; hsa_api_.hsa_executable_get_symbol = hsa_executable_get_symbol; + hsa_api_.hsa_executable_symbol_get_info = hsa_executable_symbol_get_info; + hsa_api_.hsa_executable_iterate_symbols = hsa_executable_iterate_symbols; hsa_api_.hsa_system_get_info = hsa_system_get_info; hsa_api_.hsa_system_get_major_extension_table = hsa_system_get_major_extension_table; @@ -336,6 +342,11 @@ const AgentInfo* HsaRsrcFactory::AddAgentInfo(const hsa_agent_t agent) { status = hsa_api_.hsa_amd_agent_iterate_memory_pools(agent, FindStandardPool, &agent_info->gpu_pool); CHECK_ITER_STATUS("hsa_amd_agent_iterate_memory_pools(gpu pool)", status); + // GFX8 and GFX9 SGPR/VGPR block sizes + agent_info->sgpr_block_dflt = (strcmp(agent_info->gfxip, "gfx8") == 0) ? 1 : 2; + agent_info->sgpr_block_size = 8; + agent_info->vgpr_block_size = 4; + // Set GPU index agent_info->dev_index = gpu_list_.size(); gpu_list_.push_back(agent_info); @@ -508,22 +519,25 @@ uint8_t* HsaRsrcFactory::AllocateCmdMemory(const AgentInfo* agent_info, size_t s } // Wait signal -void HsaRsrcFactory::SignalWait(const hsa_signal_t& signal) const { +hsa_signal_value_t HsaRsrcFactory::SignalWait(const hsa_signal_t& signal, const hsa_signal_value_t& signal_value) const { + const hsa_signal_value_t exp_value = signal_value - 1; + hsa_signal_value_t ret_value = signal_value; while (1) { - const hsa_signal_value_t signal_value = - hsa_api_.hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, 1, timeout_, HSA_WAIT_STATE_BLOCKED); - if (signal_value == 0) { - break; - } else { - if (signal_value == 1) WARN_LOGGING("signal waiting..."); - else EXC_RAISING(HSA_STATUS_ERROR, "hsa_signal_wait_scacquire (" << signal_value << ")"); + ret_value = + hsa_api_.hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, signal_value, timeout_, HSA_WAIT_STATE_BLOCKED); + if (ret_value == exp_value) break; + if (ret_value != signal_value) { + std::cerr << "Error: HsaRsrcFactory::SignalWait: signal_value(" << signal_value + << "), ret_value(" << ret_value << ")" << std::endl << std::flush; + abort(); } } + return ret_value; } // Wait signal with signal value restore void HsaRsrcFactory::SignalWaitRestore(const hsa_signal_t& signal, const hsa_signal_value_t& signal_value) const { - SignalWait(signal); + SignalWait(signal, signal_value); hsa_api_.hsa_signal_store_relaxed(const_cast(signal), signal_value); } @@ -536,7 +550,7 @@ bool HsaRsrcFactory::Memcpy(const hsa_agent_t& agent, void* dst, const void* src CHECK_STATUS("hsa_signal_create()", status); status = hsa_api_.hsa_amd_memory_async_copy(dst, cpu_agents_[0], src, agent, size, 0, NULL, s); CHECK_STATUS("hsa_amd_memory_async_copy()", status); - SignalWait(s); + SignalWait(s, 1); status = hsa_api_.hsa_signal_destroy(s); CHECK_STATUS("hsa_signal_destroy()", status); } @@ -680,9 +694,59 @@ uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, const void* packet, size_t s return write_idx; } +const char* HsaRsrcFactory::GetKernelName(uint64_t addr) { + std::lock_guard lck(mutex_); + const auto it = symbols_map_->find(addr); + if (it == symbols_map_->end()) { + fprintf(stderr, "HsaRsrcFactory::kernel addr (0x%lx) is not found\n", addr); + abort(); + } + return strdup(it->second); +} + +void HsaRsrcFactory::EnableExecutableTracking(HsaApiTable* table) { + std::lock_guard lck(mutex_); + executable_tracking_on_ = true; + table->core_->hsa_executable_freeze_fn = hsa_executable_freeze_interceptor; +} + +hsa_status_t HsaRsrcFactory::executable_symbols_cb(hsa_executable_t exec, hsa_executable_symbol_t symbol, void *data) { + hsa_symbol_kind_t value = (hsa_symbol_kind_t)0; + hsa_status_t status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_TYPE, &value); + CHECK_STATUS("Error in getting symbol info", status); + if (value == HSA_SYMBOL_KIND_KERNEL) { + uint64_t addr = 0; + uint32_t len = 0; + status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &addr); + CHECK_STATUS("Error in getting kernel object", status); + status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, &len); + CHECK_STATUS("Error in getting name len", status); + char *name = new char[len + 1]; + status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME, name); + CHECK_STATUS("Error in getting kernel name", status); + name[len] = 0; + auto ret = symbols_map_->insert({addr, name}); + if (ret.second == false) { + delete[] ret.first->second; + ret.first->second = name; + } + } + return HSA_STATUS_SUCCESS; +} + +hsa_status_t HsaRsrcFactory::hsa_executable_freeze_interceptor(hsa_executable_t executable, const char *options) { + std::lock_guard lck(mutex_); + if (symbols_map_ == NULL) symbols_map_ = new symbols_map_t; + hsa_status_t status = hsa_api_.hsa_executable_iterate_symbols(executable, executable_symbols_cb, NULL); + CHECK_STATUS("Error in iterating executable symbols", status); + return hsa_api_.hsa_executable_freeze(executable, options);; +} + std::atomic HsaRsrcFactory::instance_{}; HsaRsrcFactory::mutex_t HsaRsrcFactory::mutex_; HsaRsrcFactory::timestamp_t HsaRsrcFactory::timeout_ns_ = HsaTimer::TIMESTAMP_MAX; hsa_pfn_t HsaRsrcFactory::hsa_api_{}; +bool HsaRsrcFactory::executable_tracking_on_ = false; +HsaRsrcFactory::symbols_map_t* HsaRsrcFactory::symbols_map_ = NULL; } // namespace util diff --git a/src/util/hsa_rsrc_factory.h b/src/util/hsa_rsrc_factory.h index 51824a5212..466ccf1f95 100644 --- a/src/util/hsa_rsrc_factory.h +++ b/src/util/hsa_rsrc_factory.h @@ -35,6 +35,7 @@ POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include #include @@ -94,6 +95,8 @@ struct hsa_pfn_t { decltype(hsa_executable_load_agent_code_object)* hsa_executable_load_agent_code_object; decltype(hsa_executable_freeze)* hsa_executable_freeze; decltype(hsa_executable_get_symbol)* hsa_executable_get_symbol; + decltype(hsa_executable_symbol_get_info)* hsa_executable_symbol_get_info; + decltype(hsa_executable_iterate_symbols)* hsa_executable_iterate_symbols; decltype(hsa_system_get_info)* hsa_system_get_info; decltype(hsa_system_get_major_extension_table)* hsa_system_get_major_extension_table; @@ -159,6 +162,11 @@ struct AgentInfo { // Number of Shader Arrays Per Shader Engines in Gpu uint32_t shader_arrays_per_se; + + // SGPR/VGPR block sizes + uint32_t sgpr_block_dflt; + uint32_t sgpr_block_size; + uint32_t vgpr_block_size; }; // HSA timer class @@ -169,6 +177,12 @@ class HsaTimer { static const timestamp_t TIMESTAMP_MAX = UINT64_MAX; typedef long double freq_t; + enum time_id_t { + TIME_ID_CLOCK_REALTIME = 0, + TIME_ID_CLOCK_MONOTONIC = 1, + TIME_ID_NUMBER + }; + HsaTimer(const hsa_pfn_t* hsa_api) : hsa_api_(hsa_api) { timestamp_t sysclock_hz = 0; hsa_status_t status = hsa_api_->hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sysclock_hz); @@ -184,6 +198,11 @@ class HsaTimer { return timestamp_t((freq_t)time / sysclock_factor_); } + // Method for timespec/ns conversion + static timestamp_t timespec_to_ns(const timespec& time) { + return ((timestamp_t)time.tv_sec * 1000000000) + time.tv_nsec; + } + // Return timestamp in 'ns' timestamp_t timestamp_ns() const { timestamp_t sysclock; @@ -192,6 +211,54 @@ class HsaTimer { return sysclock_to_ns(sysclock); } + // Return time in 'ns' + static timestamp_t clocktime_ns(clockid_t clock_id) { + timespec time; + clock_gettime(clock_id, &time); + return timespec_to_ns(time); + } + + // Return pair of correlated values of profiling timestamp and time with + // correlation error for a given time ID and number of iterations + void correlated_pair_ns(time_id_t time_id, uint32_t iters, + timestamp_t* timestamp_v, timestamp_t* time_v, timestamp_t* error_v) const { + clockid_t clock_id = 0; + switch (clock_id) { + case TIME_ID_CLOCK_REALTIME: + clock_id = CLOCK_REALTIME; + break; + case TIME_ID_CLOCK_MONOTONIC: + clock_id = CLOCK_MONOTONIC; + break; + default: + CHECK_STATUS("internal error: invalid time_id", HSA_STATUS_ERROR); + } + + std::vector ts_vec(iters); + std::vector tm_vec(iters); + const uint32_t steps = iters - 1; + + for (uint32_t i = 0; i < iters; ++i) { + hsa_api_->hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP, &ts_vec[i]); + clock_gettime(clock_id, &tm_vec[i]); + } + + const timestamp_t ts_base = sysclock_to_ns(ts_vec.front()); + const timestamp_t tm_base = timespec_to_ns(tm_vec.front()); + const timestamp_t error = (ts_vec.back() - ts_vec.front()) / (2 * steps); + + timestamp_t ts_accum = 0; + timestamp_t tm_accum = 0; + for (uint32_t i = 0; i < iters; ++i) { + ts_accum += (ts_vec[i] - ts_base); + tm_accum += (timespec_to_ns(tm_vec[i]) - tm_base); + } + + *timestamp_v = (ts_accum / iters) + ts_base + error; + *time_v = (tm_accum / iters) + tm_base; + *error_v = error; + } + private: // Timestamp frequency factor freq_t sysclock_factor_; @@ -293,7 +360,7 @@ class HsaRsrcFactory { uint8_t* AllocateCmdMemory(const AgentInfo* agent_info, size_t size); // Wait signal - void SignalWait(const hsa_signal_t& signal) const; + hsa_signal_value_t SignalWait(const hsa_signal_t& signal, const hsa_signal_value_t& signal_value) const; // Wait signal with signal value restore void SignalWaitRestore(const hsa_signal_t& signal, const hsa_signal_value_t& signal_value) const; @@ -322,6 +389,11 @@ class HsaRsrcFactory { static uint64_t Submit(hsa_queue_t* queue, const void* packet); static uint64_t Submit(hsa_queue_t* queue, const void* packet, size_t size_bytes); + // Enable executables loading tracking + static bool IsExecutableTracking() { return executable_tracking_on_; } + static void EnableExecutableTracking(HsaApiTable* table); + static const char* GetKernelName(uint64_t addr); + // Initialize HSA API table void static InitHsaApiTable(HsaApiTable* table); static const hsa_pfn_t* HsaApi() { return &hsa_api_; } @@ -346,6 +418,27 @@ class HsaRsrcFactory { if (instance_ != NULL) Instance().timeout_ = Instance().timer_->ns_to_sysclock(time); } + void CorrelateTime(HsaTimer::time_id_t time_id, uint32_t iters) { + timestamp_t timestamp_v = 0; + timestamp_t time_v = 0; + timestamp_t error_v = 0; + timer_->correlated_pair_ns(time_id, iters, ×tamp_v, &time_v, &error_v); + time_shift_[time_id] = time_v - timestamp_v; + time_error_[time_id] = error_v; + } + + hsa_status_t GetTime(uint32_t time_id, timestamp_t value, uint64_t* time) { + if (time_id >= HsaTimer::TIME_ID_NUMBER) return HSA_STATUS_ERROR; + *time = value + time_shift_[time_id]; + return HSA_STATUS_SUCCESS; + } + + hsa_status_t GetTimestamp(uint32_t time_id, uint64_t value, timestamp_t* timestamp) { + if (time_id >= HsaTimer::TIME_ID_NUMBER) return HSA_STATUS_ERROR; + *timestamp = value - time_shift_[time_id]; + return HSA_STATUS_SUCCESS; + } + private: // System agents iterating callback static hsa_status_t GetHsaAgentsCallback(hsa_agent_t agent, void* data); @@ -386,6 +479,13 @@ class HsaRsrcFactory { // System agents map std::map agent_map_; + // Executables loading tracking + typedef std::map symbols_map_t; + static symbols_map_t* symbols_map_; + static bool executable_tracking_on_; + static hsa_status_t hsa_executable_freeze_interceptor(hsa_executable_t executable, const char *options); + static hsa_status_t executable_symbols_cb(hsa_executable_t exec, hsa_executable_symbol_t symbol, void *data); + // HSA runtime API table static hsa_pfn_t hsa_api_; @@ -403,6 +503,10 @@ class HsaRsrcFactory { // HSA timer HsaTimer* timer_; + // Time shift array to support time conversion + timestamp_t time_shift_[HsaTimer::TIME_ID_NUMBER]; + timestamp_t time_error_[HsaTimer::TIME_ID_NUMBER]; + // CPU/kern-arg memory pools hsa_amd_memory_pool_t *cpu_pool_; hsa_amd_memory_pool_t *kern_arg_pool_; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index ba23540c3f..2082e2ef05 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -58,9 +58,9 @@ target_link_libraries ( ${TEST_LIB} ${ROCTRACER_TARGET} ${HSA_RUNTIME_LIB} c std ## Build HSA test execute_process ( COMMAND sh -xc "if [ ! -e ${TEST_DIR}/hsa ] ; then git clone https://github.com/ROCmSoftwarePlatform/hsa-class.git ${TEST_DIR}/hsa; fi" ) -execute_process ( COMMAND sh -xc "if [ -e ${TEST_DIR}/hsa ] ; then cd ${TEST_DIR}/hsa && git fetch origin && git checkout a3aabb5; fi" ) +execute_process ( COMMAND sh -xc "if [ -e ${TEST_DIR}/hsa ] ; then cd ${TEST_DIR}/hsa && git fetch origin && git checkout 777c308; fi" ) set ( TEST_DIR ${HSA_TEST_DIR} ) add_subdirectory ( ${TEST_DIR} ${PROJECT_BINARY_DIR}/test/hsa ) ## copying run script -execute_process ( COMMAND sh -xc "cp ${RUN_SCRIPT} ${PROJECT_BINARY_DIR}" ) +execute_process ( COMMAND sh -xc "cp ${RUN_SCRIPT} ${PROJECT_BINARY_DIR}" ) \ No newline at end of file diff --git a/test/MatrixTranspose/Makefile b/test/MatrixTranspose/Makefile index daa48b2561..63fae09943 100644 --- a/test/MatrixTranspose/Makefile +++ b/test/MatrixTranspose/Makefile @@ -1,7 +1,6 @@ ROOT_PATH = ../.. LIB_PATH = $(ROOT_PATH)/build -ROC_LIBS = -L$(LIB_PATH) -lroctracer64 -export LD_LIBRARY_PATH=$(LIB_PATH) +ROC_LIBS = -Wl,--rpath,${LIB_PATH} $(LIB_PATH)/libroctracer64.so $(LIB_PATH)/libroctx64.so HIP_PATH?= $(wildcard /opt/rocm/hip) ifeq (,$(HIP_PATH)) diff --git a/test/MatrixTranspose/MatrixTranspose.cpp b/test/MatrixTranspose/MatrixTranspose.cpp index d2ecfb8484..264cf2d93b 100644 --- a/test/MatrixTranspose/MatrixTranspose.cpp +++ b/test/MatrixTranspose/MatrixTranspose.cpp @@ -23,8 +23,10 @@ THE SOFTWARE. #include // hip header file -#include "hip/hip_runtime.h" +#include #include "roctracer_ext.h" +// roctx header file +#include #define WIDTH 1024 @@ -94,15 +96,23 @@ int main() { hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice); roctracer_mark("before HIP LaunchKernel"); + roctxMark("before hipLaunchKernel"); + roctxRangePush("hipLaunchKernel"); // Lauching kernel from host hipLaunchKernelGGL(matrixTranspose, dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y), dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, gpuTransposeMatrix, gpuMatrix, WIDTH); roctracer_mark("after HIP LaunchKernel"); + roctxMark("after hipLaunchKernel"); // Memory transfer from device to host + roctxRangePush("hipMemcpy"); + hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost); + roctxRangePop(); // for "hipMemcpy" + roctxRangePop(); // for "hipLaunchKernel" + // CPU MatrixTranspose computation matrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH); diff --git a/test/MatrixTranspose_test/Makefile b/test/MatrixTranspose_test/Makefile index 202980b804..571725fd1d 100644 --- a/test/MatrixTranspose_test/Makefile +++ b/test/MatrixTranspose_test/Makefile @@ -1,8 +1,7 @@ ROOT_PATH = ../.. LIB_PATH = $(ROOT_PATH)/build -ROC_LIBS = -L$(LIB_PATH) -lroctracer64 -lroctx64 -HSA_KMT_INC_PATH ?= /opt/rocm/include -export LD_LIBRARY_PATH=$(LIB_PATH) +ROC_LIBS = -Wl,--rpath,${LIB_PATH} $(LIB_PATH)/libroctracer64.so $(LIB_PATH)/libroctx64.so +ROCM_INC_PATH ?= /opt/rocm/include HIP_VDI ?= 0 ITERATIONS ?= 100 @@ -12,27 +11,37 @@ ifeq (,$(HIP_PATH)) endif HIPCC=$(HIP_PATH)/bin/hipcc - TARGET=hcc -SOURCES = MatrixTranspose.cpp -OBJECTS = $(SOURCES:.cpp=.o) - EXECUTABLE=./MatrixTranspose +OBJECTS = MatrixTranspose.o +FLAGS =-g -I$(ROOT_PATH) -I$(ROOT_PATH)/inc -I${ROCM_INC_PATH}/hsa -I${ROCM_INC_PATH} -DLOCAL_BUILD=1 -DHIP_VDI=${HIP_VDI} -DITERATIONS=$(ITERATIONS) -DAMD_INTERNAL_BUILD=1 + +ifeq ($(C_TEST), 1) + COMP=gcc + SOURCES = MatrixTranspose.c + FLAGS += -DHIP_TEST=0 -D__HIP_PLATFORM_HCC__=1 -I${ROCM_INC_PATH}/hcc +else + COMP=$(HIPCC) + SOURCES = MatrixTranspose.cpp + FLAGS += -DHIP_TEST=1 +endif +ifeq ($(MGPU_TEST), 1) + FLAGS += -DMGPU_TEST=1 +endif .PHONY: test - all: clean $(EXECUTABLE) -CXXFLAGS =-g -I$(ROOT_PATH) -I$(ROOT_PATH)/inc -I${HSA_KMT_INC_PATH} -DLOCAL_BUILD=1 -DHIP_VDI=${HIP_VDI} -DITERATIONS=$(ITERATIONS) -CXX=$(HIPCC) +$(OBJECTS): $(SOURCES) + $(COMP) $(FLAGS) -c -o $@ $< $(EXECUTABLE): $(OBJECTS) $(HIPCC) $(OBJECTS) -o $@ $(ROC_LIBS) test: $(EXECUTABLE) - $(EXECUTABLE) + LD_PRELOAD="$(LIB_PATH)/libkfdwrapper64.so librocprofiler64.so" $(EXECUTABLE) clean: rm -f $(EXECUTABLE) diff --git a/test/MatrixTranspose_test/MatrixTranspose.c b/test/MatrixTranspose_test/MatrixTranspose.c new file mode 120000 index 0000000000..14d96acbc8 --- /dev/null +++ b/test/MatrixTranspose_test/MatrixTranspose.c @@ -0,0 +1 @@ +MatrixTranspose.cpp \ No newline at end of file diff --git a/test/MatrixTranspose_test/MatrixTranspose.cpp b/test/MatrixTranspose_test/MatrixTranspose.cpp index 76ce261fda..eadeb8c3c5 100644 --- a/test/MatrixTranspose_test/MatrixTranspose.cpp +++ b/test/MatrixTranspose_test/MatrixTranspose.cpp @@ -238,20 +238,22 @@ void api_callback( fprintf(stdout, "\n", data->args.message); return; } - if (domain == ACTIVITY_DOMAIN_KFD_API) { const kfd_api_data_t* data = (const kfd_api_data_t*)(callback_data); - fprintf(stdout, "<%s id(%u)\tcorrelation_id(%lu) %s pid(%u) tid(%u)> \n", + fprintf(stdout, "<%s id(%u)\tcorrelation_id(%lu) %s> \n", roctracer_op_string(ACTIVITY_DOMAIN_KFD_API, cid, 0), cid, data->correlation_id, (data->phase == ACTIVITY_API_PHASE_ENTER) ? "on-enter" : "on-exit", - GetPid(), - GetTid() ); return; } const hip_api_data_t* data = (const hip_api_data_t*)(callback_data); + fprintf(stdout, "<%s id(%u)\tcorrelation_id(%lu) %s> ", + roctracer_op_string(ACTIVITY_DOMAIN_HIP_API, cid, 0), + cid, + data->correlation_id, + (data->phase == ACTIVITY_API_PHASE_ENTER) ? "on-enter" : "on-exit"); if (data->phase == ACTIVITY_API_PHASE_ENTER) { switch (cid) { case HIP_API_ID_hipMemcpy: @@ -326,7 +328,7 @@ void activity_callback(const char* begin, const char* end, void* arg) { record->end_ns ); if ((record->domain == ACTIVITY_DOMAIN_HIP_API) || (record->domain == ACTIVITY_DOMAIN_KFD_API)) { - fprintf(stdout, " process_id(%u) thread_id(%u)\n", + fprintf(stdout, " process_id(%u) thread_id(%u)", record->process_id, record->thread_id ); @@ -335,7 +337,13 @@ void activity_callback(const char* begin, const char* end, void* arg) { record->device_id, record->queue_id ); - if (record->op == HIP_OP_ID_COPY) fprintf(stdout, " bytes(0x%zx)\n", record->bytes); + if (record->op == HIP_OP_ID_COPY) fprintf(stdout, " bytes(0x%zx)", record->bytes); + } else if (record->domain == ACTIVITY_DOMAIN_HSA_OPS) { + fprintf(stdout, " se(%u) cycle(%lu) pc(%lx)", + record->pc_sample.se, + record->pc_sample.cycle, + record->pc_sample.pc + ); } else if (record->domain == ACTIVITY_DOMAIN_EXT_API) { fprintf(stdout, " external_id(%lu)\n", record->external_id @@ -365,9 +373,10 @@ void init_tracing() { // Enable HIP activity tracing ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_API)); ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HCC_OPS)); + // Enable PC sampling + ROCTRACER_CALL(roctracer_enable_op_activity(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_PCSAMPLE)); // Enable KFD API tracing ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_KFD_API, api_callback, NULL)); - ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_KFD_API)); // Enable rocTX ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_ROCTX, api_callback, NULL)); } @@ -385,7 +394,9 @@ void stop_tracing() { ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HIP_API)); ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HIP_API)); ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HCC_OPS)); - ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_KFD_API)); + ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HSA_OPS)); + ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_KFD_API)); + ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_ROCTX)); ROCTRACER_CALL(roctracer_flush_activity()); printf("# STOP #############################\n"); } diff --git a/test/run.sh b/test/run.sh index cade66cf07..48dfda752d 100755 --- a/test/run.sh +++ b/test/run.sh @@ -90,6 +90,7 @@ eval_test "tool SYS/HSA test" ./test/MatrixTranspose "test/MatrixTranspose_sys_h # Tracing control export ROCTRACER_DOMAIN="hip" eval_test "tool period test" "ROCP_CTRL_RATE=10:100000:1000000 ./test/MatrixTranspose" "test/MatrixTranspose_hip_trace" +eval_test "tool flushing test" "ROCP_FLUSH_RATE=100000 ./test/MatrixTranspose" "test/MatrixTranspose_hip_trace_flush" # HSA test export ROCTRACER_DOMAIN="hsa" diff --git a/test/tool/tracer_tool.cpp b/test/tool/tracer_tool.cpp index 71da132baa..b1114ecd75 100644 --- a/test/tool/tracer_tool.cpp +++ b/test/tool/tracer_tool.cpp @@ -37,14 +37,14 @@ THE SOFTWARE. #include #include #include -#ifdef KFD_WRAPPER #include -#endif #include #include #include #include +#include "util/hsa_rsrc_factory.h" + #define PUBLIC_API __attribute__((visibility("default"))) #define CONSTRUCTOR_API __attribute__((constructor)) #define DESTRUCTOR_API __attribute__((destructor)) @@ -59,9 +59,12 @@ THE SOFTWARE. } \ } while (0) -#ifndef onload_debug -#define onload_debug false -#endif +#define ONLOAD_TRACE(str) \ + if (getenv("ROCP_ONLOAD_TRACE")) do { \ + std::cout << "PID(" << GetPid() << "): TRACER_TOOL::" << __FUNCTION__ << " " << str << std::endl << std::flush; \ + } while(0); +#define ONLOAD_TRACE_BEG() ONLOAD_TRACE("begin") +#define ONLOAD_TRACE_END() ONLOAD_TRACE("end") typedef hsa_rt_utils::Timer::timestamp_t timestamp_t; hsa_rt_utils::Timer* timer = NULL; @@ -74,8 +77,13 @@ bool trace_hsa_activity = false; bool trace_hip_api = false; bool trace_hip_activity = false; bool trace_kfd = false; +bool trace_pcs = false; +// API trace vector +std::vector hsa_api_vec; +std::vector kfd_api_vec; LOADER_INSTANTIATE(); +TRACE_BUFFER_INSTANTIATE(); // Global output file handle FILE* roctx_file_handle = NULL; @@ -84,18 +92,25 @@ FILE* hsa_async_copy_file_handle = NULL; FILE* hip_api_file_handle = NULL; FILE* hcc_activity_file_handle = NULL; FILE* kfd_api_file_handle = NULL; +FILE* pc_sample_file_handle = NULL; + +void close_output_file(FILE* file_handle); +void close_file_handles() { + if (roctx_file_handle) close_output_file(roctx_file_handle); + if (hsa_api_file_handle) close_output_file(hsa_api_file_handle); + if (hsa_async_copy_file_handle) close_output_file(hsa_async_copy_file_handle); + if (hip_api_file_handle) close_output_file(hip_api_file_handle); + if (hcc_activity_file_handle) close_output_file(hcc_activity_file_handle); + if (kfd_api_file_handle) close_output_file(kfd_api_file_handle); + if (pc_sample_file_handle) close_output_file(pc_sample_file_handle); +} static inline uint32_t GetPid() { return syscall(__NR_getpid); } static inline uint32_t GetTid() { return syscall(__NR_gettid); } // Error handler void fatal(const std::string msg) { - fflush(roctx_file_handle); - fflush(hsa_api_file_handle); - fflush(hsa_async_copy_file_handle); - fflush(hip_api_file_handle); - fflush(hcc_activity_file_handle); - fflush(kfd_api_file_handle); + close_file_handles(); fflush(stdout); fprintf(stderr, "%s\n\n", msg.c_str()); fflush(stderr); @@ -121,19 +136,19 @@ void* control_thr_fun(void*) { const uint32_t len_us = control_len_us % 1000000; const uint32_t dist_sec = control_dist_us / 1000000; const uint32_t dist_us = control_dist_us % 1000000; - bool start = true; + bool to_start = true; sleep(delay_sec); usleep(delay_us); while (1) { - if (start) { - start = false; + if (to_start) { + to_start = false; roctracer_start(); sleep(len_sec); usleep(len_us); } else { - start = true; + to_start = true; roctracer_stop(); sleep(dist_sec); usleep(dist_us); @@ -141,6 +156,20 @@ void* control_thr_fun(void*) { } } +// Flushing control thread +uint32_t control_flush_us = 0; +void* flush_thr_fun(void*) { + const uint32_t dist_sec = control_flush_us / 1000000; + const uint32_t dist_us = control_flush_us % 1000000; + + while (1) { + sleep(dist_sec); + usleep(dist_us); + roctracer_flush_activity(); + roctracer::TraceBufferBase::FlushAll(); + } +} + /////////////////////////////////////////////////////////////////////////////////////////////////////// // rocTX annotation tracing @@ -148,7 +177,7 @@ struct roctx_trace_entry_t { uint32_t valid; uint32_t type; uint32_t cid; - timestamp_t timestamp; + timestamp_t time; uint32_t pid; uint32_t tid; const char* message; @@ -165,12 +194,16 @@ static inline void roctx_callback_fun( uint32_t tid, const char* message) { - const timestamp_t timestamp = timer->timestamp_fn_ns(); +#if ROCTX_CLOCK_TIME + const timestamp_t time = HsaTimer::clocktime_ns(HsaTimer::TIME_ID_CLOCK_MONOTONIC); +#else + const timestamp_t time = timer->timestamp_fn_ns(); +#endif roctx_trace_entry_t* entry = roctx_trace_buffer.GetEntry(); entry->valid = roctracer::TRACE_ENTRY_COMPL; entry->type = 0; entry->cid = cid; - entry->timestamp = timestamp; + entry->time = time; entry->pid = GetPid(); entry->tid = tid; entry->message = (message != NULL) ? strdup(message) : NULL; @@ -187,25 +220,26 @@ void roctx_api_callback( roctx_callback_fun(domain, cid, GetTid(), data->args.message); } -// Start/Stop callbacks -void roctx_range_stack_callback(const roctx_range_data_t* data, void* arg) { - const bool* is_stop_ptr = (bool*)arg; - const uint32_t cid = (*is_stop_ptr == true) ? ROCTX_API_ID_roctxRangePop : ROCTX_API_ID_roctxRangePushA; - const char* message = (*is_stop_ptr == true) ? NULL : data->message; - roctx_callback_fun(ACTIVITY_DOMAIN_ROCTX, cid, data->tid, message); +// rocTX Start/Stop callbacks +void roctx_range_start_callback(const roctx_range_data_t* data, void* arg) { + roctx_callback_fun(ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_roctxRangePushA, data->tid, data->message); } -void stop_callback() { - bool is_stop = true; - roctracer::RocTxLoader::Instance().RangeStackIterate(roctx_range_stack_callback, (void*)&is_stop); -} -void start_callback() { - bool is_stop = false; - roctracer::RocTxLoader::Instance().RangeStackIterate(roctx_range_stack_callback, (void*)&is_stop); +void roctx_range_stop_callback(const roctx_range_data_t* data, void* arg) { + roctx_callback_fun(ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_roctxRangePop, data->tid, NULL); } +void start_callback() { roctracer::RocTxLoader::Instance().RangeStackIterate(roctx_range_start_callback, NULL); } +void stop_callback() { roctracer::RocTxLoader::Instance().RangeStackIterate(roctx_range_stop_callback, NULL); } +// rocTX buffer flush function void roctx_flush_cb(roctx_trace_entry_t* entry) { +#if ROCTX_CLOCK_TIME + timestamp_t timestamp = 0; + HsaRsrcFactory::Instance().GetTimestamp(HsaTimer::TIME_ID_CLOCK_MONOTONIC, entry->time, ×tamp); +#else + const timestamp_t timestamp = entry->time; +#endif std::ostringstream os; - os << entry->timestamp << " " << entry->pid << ":" << entry->tid << " " << entry->cid; + os << timestamp << " " << entry->pid << ":" << entry->tid << " " << entry->cid; if (entry->message != NULL) os << ":\"" << entry->message << "\""; else os << ":\"\""; fprintf(roctx_file_handle, "%s\n", os.str().c_str()); fflush(roctx_file_handle); @@ -270,6 +304,9 @@ void hsa_activity_callback( index++; } +/////////////////////////////////////////////////////////////////////////////////////////////////////// +// HIP API tracing + struct hip_api_trace_entry_t { uint32_t valid; uint32_t type; @@ -284,9 +321,6 @@ struct hip_api_trace_entry_t { void* ptr; }; -/////////////////////////////////////////////////////////////////////////////////////////////////////// -// HIP API tracing - void hip_api_flush_cb(hip_api_trace_entry_t* entry); roctracer::TraceBuffer::flush_prm_t hip_flush_prm[1] = {{0, hip_api_flush_cb}}; roctracer::TraceBuffer hip_api_trace_buffer("HIP", 0x200000, hip_flush_prm, 1); @@ -413,21 +447,27 @@ void hip_api_flush_cb(hip_api_trace_entry_t* entry) { // Activity tracing callback // hipMalloc id(3) correlation_id(1): begin_ns(1525888652762640464) end_ns(1525888652762877067) -void hcc_activity_callback(const char* begin, const char* end, void* arg) { +void pool_activity_callback(const char* begin, const char* end, void* arg) { const roctracer_record_t* record = reinterpret_cast(begin); const roctracer_record_t* end_record = reinterpret_cast(end); while (record < end_record) { const char * name = roctracer_op_string(record->domain, record->op, record->kind); - if (record->domain == ACTIVITY_DOMAIN_HCC_OPS) { - fprintf(hcc_activity_file_handle, "%lu:%lu %d:%lu %s:%lu\n", - record->begin_ns, record->end_ns, record->device_id, record->queue_id, name, record->correlation_id); - fflush(hcc_activity_file_handle); - } else { -#if 0 - fprintf(hip_api_file_handle, "%lu:%lu %u:%u %s()\n", - record->begin_ns, record->end_ns, record->process_id, record->thread_id, name); -#endif + switch(record->domain) { + case ACTIVITY_DOMAIN_HCC_OPS: + fprintf(hcc_activity_file_handle, "%lu:%lu %d:%lu %s:%lu\n", + record->begin_ns, record->end_ns, + record->device_id, record->queue_id, + name, record->correlation_id); + fflush(hcc_activity_file_handle); + break; + case ACTIVITY_DOMAIN_HSA_OPS: + if (record->op == HSA_OP_ID_PCSAMPLE) { + fprintf(pc_sample_file_handle, "%u %lu 0x%lx %s\n", + record->pc_sample.se, record->pc_sample.cycle, record->pc_sample.pc, name); + fflush(pc_sample_file_handle); + } + break; } ROCTRACER_CALL(roctracer_next_record(record, &record)); } @@ -437,7 +477,7 @@ void hcc_activity_callback(const char* begin, const char* end, void* arg) { // KFD API tracing // KFD API callback function -#ifdef KFD_WRAPPER +static thread_local bool in_kfd_api_callback = false; void kfd_api_callback( uint32_t domain, uint32_t cid, @@ -445,6 +485,8 @@ void kfd_api_callback( void* arg) { (void)arg; + if (in_kfd_api_callback) return; + in_kfd_api_callback = true; const kfd_api_data_t* data = reinterpret_cast(callback_data); if (data->phase == ACTIVITY_API_PHASE_ENTER) { kfd_begin_timestamp = timer->timestamp_fn_ns(); @@ -454,8 +496,8 @@ void kfd_api_callback( os << kfd_begin_timestamp << ":" << end_timestamp << " " << GetPid() << ":" << GetTid() << " " << kfd_api_data_pair_t(cid, *data); fprintf(kfd_api_file_handle, "%s\n", os.str().c_str()); } + in_kfd_api_callback = false; } -#endif /////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -523,14 +565,73 @@ FILE* open_output_file(const char* prefix, const char* name) { } void close_output_file(FILE* file_handle) { - if ((file_handle != NULL) && (file_handle != stdout)) fclose(file_handle); + if (file_handle != NULL) { + fflush(file_handle); + if (file_handle != stdout) fclose(file_handle); + } } -// HSA-runtime tool on-load method -extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count, - const char* const* failed_tool_names) { - if (onload_debug) { printf("TOOL OnLoad\n"); fflush(stdout); } - timer = new hsa_rt_utils::Timer(table->core_->hsa_system_get_info_fn); +// Allocating tracing pool +void open_tracing_pool() { + if (roctracer_default_pool() == NULL) { + roctracer_properties_t properties{}; + properties.buffer_size = 0x80000; + properties.buffer_callback_fun = pool_activity_callback; + ROCTRACER_CALL(roctracer_open_pool(&properties)); + } +} + +// Flush tracing pool +void close_tracing_pool() { + if (roctracer_default_pool() != NULL) { + ROCTRACER_CALL(roctracer_flush_activity()); + } +} + +// tool library is loaded +static bool is_loaded = false; + +// tool unload method +void tool_unload() { + ONLOAD_TRACE("begin, loaded(" << is_loaded << ")"); + + if (is_loaded == false) return; + is_loaded = false; + + if (trace_roctx) { + ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_ROCTX)); + } + if (trace_hsa_api) { + ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HSA_API)); + } + if (trace_hsa_activity || trace_pcs) { + ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HSA_OPS)); + } + if (trace_hip_api || trace_hip_activity) { + ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HIP_API)); + ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HIP_API)); + ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HCC_OPS)); + } + if (trace_kfd) { + ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_KFD_API)); + } + + // Flush tracing pool + close_tracing_pool(); + roctracer::TraceBufferBase::FlushAll(); + close_file_handles(); + + ONLOAD_TRACE_END(); +} + +// tool load method +void tool_load() { + ONLOAD_TRACE("begin, loaded(" << is_loaded << ")"); + + if (is_loaded == true) return; + is_loaded = true; + + roctracer::TraceBufferBase::StartWorkerThreadAll(); // Output file const char* output_prefix = getenv("ROCP_OUTPUT_DIR"); @@ -571,11 +672,12 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, if (std::string(trace_domain).find("kfd") != std::string::npos) { trace_kfd = true; } - } - // API trace vector - std::vector hsa_api_vec; - std::vector kfd_api_vec; + // PC sampling enabling + if (std::string(trace_domain).find("pcs") != std::string::npos) { + trace_pcs = true; + } + } printf("ROCTracer (pid=%d): ", (int)GetPid()); fflush(stdout); @@ -648,6 +750,84 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_ROCTX, roctx_api_callback, NULL)); } + const char* ctrl_str = getenv("ROCP_CTRL_RATE"); + if (ctrl_str != NULL) { + uint32_t ctrl_delay = 0; + uint32_t ctrl_len = 0; + uint32_t ctrl_rate = 0; + + sscanf(ctrl_str, "%d:%d:%d", &ctrl_delay, &ctrl_len, &ctrl_rate); + + if (ctrl_len > ctrl_rate) { + fprintf(stderr, "ROCTracer: control length value (%u) > rate value (%u)\n", ctrl_len, ctrl_rate); + abort(); + } + control_dist_us = ctrl_rate - ctrl_len; + control_len_us = ctrl_len; + control_delay_us = ctrl_delay; + + roctracer_stop(); + + if (ctrl_delay != UINT32_MAX) { + fprintf(stdout, "ROCTracer: trace control: delay(%uus), length(%uus), rate(%uus)\n", ctrl_delay, ctrl_len, ctrl_rate); fflush(stdout); + pthread_t thread; + pthread_attr_t attr; + int err = pthread_attr_init(&attr); + if (err) { errno = err; perror("pthread_attr_init"); abort(); } + err = pthread_create(&thread, &attr, control_thr_fun, NULL); + } else { + fprintf(stdout, "ROCTracer: trace start disabled\n"); fflush(stdout); + } + } + + const char* flush_str = getenv("ROCP_FLUSH_RATE"); + if (flush_str != NULL) { + sscanf(flush_str, "%d", &control_flush_us); + if (control_flush_us == 0) { + fprintf(stderr, "ROCTracer: control flush rate bad value\n"); + abort(); + } + + fprintf(stdout, "ROCTracer: trace control flush rate(%uus)\n", control_flush_us); fflush(stdout); + pthread_t thread; + pthread_attr_t attr; + int err = pthread_attr_init(&attr); + if (err) { errno = err; perror("pthread_attr_init"); abort(); } + err = pthread_create(&thread, &attr, flush_thr_fun, NULL); + } + + // Enable KFD API callbacks/activity + if (trace_kfd) { + kfd_api_file_handle = open_output_file(output_prefix, "kfd_api_trace.txt"); + // initialize KFD tracing + roctracer_set_properties(ACTIVITY_DOMAIN_KFD_API, NULL); + + printf(" KFD-trace("); + if (kfd_api_vec.size() != 0) { + for (unsigned i = 0; i < kfd_api_vec.size(); ++i) { + uint32_t cid = KFD_API_ID_NUMBER; + const char* api = kfd_api_vec[i].c_str(); + ROCTRACER_CALL(roctracer_op_code(ACTIVITY_DOMAIN_KFD_API, api, &cid, NULL)); + ROCTRACER_CALL(roctracer_enable_op_callback(ACTIVITY_DOMAIN_KFD_API, cid, kfd_api_callback, NULL)); + printf(" %s", api); + } + } else { + ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_KFD_API, kfd_api_callback, NULL)); + } + printf(")\n"); + } + + ONLOAD_TRACE_END(); +} + +// HSA-runtime tool on-load method +extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count, + const char* const* failed_tool_names) { + ONLOAD_TRACE_BEG(); + timer = new hsa_rt_utils::Timer(table->core_->hsa_system_get_info_fn); + + const char* output_prefix = getenv("ROCP_OUTPUT_DIR"); + // Enable HSA API callbacks/activity if (trace_hsa_api) { hsa_api_file_handle = open_output_file(output_prefix, "hsa_api_trace.txt"); @@ -660,7 +840,7 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, for (unsigned i = 0; i < hsa_api_vec.size(); ++i) { uint32_t cid = HSA_API_ID_NUMBER; const char* api = hsa_api_vec[i].c_str(); - ROCTRACER_CALL(roctracer_op_code(ACTIVITY_DOMAIN_HSA_API, api, &cid)); + ROCTRACER_CALL(roctracer_op_code(ACTIVITY_DOMAIN_HSA_API, api, &cid, NULL)); ROCTRACER_CALL(roctracer_enable_op_callback(ACTIVITY_DOMAIN_HSA_API, cid, hsa_api_callback, NULL)); printf(" %s", api); } @@ -684,145 +864,52 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, roctracer_set_properties(ACTIVITY_DOMAIN_HSA_OPS, &ops_properties); fprintf(stdout, " HSA-activity-trace()\n"); fflush(stdout); - ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HSA_OPS)); + ROCTRACER_CALL(roctracer_enable_op_activity(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_COPY)); } // Enable HIP API callbacks/activity if (trace_hip_api || trace_hip_activity) { - hip_api_file_handle = open_output_file(output_prefix, "hip_api_trace.txt"); - hcc_activity_file_handle = open_output_file(output_prefix, "hcc_ops_trace.txt"); - fprintf(stdout, " HIP-trace()\n"); fflush(stdout); // roctracer properties roctracer_set_properties(ACTIVITY_DOMAIN_HIP_API, (void*)mark_api_callback); // Allocating tracing pool - roctracer_properties_t properties{}; - properties.buffer_size = 0x80000; - properties.buffer_callback_fun = hcc_activity_callback; - ROCTRACER_CALL(roctracer_open_pool(&properties)); + open_tracing_pool(); + // Enable tracing if (trace_hip_api) { + hip_api_file_handle = open_output_file(output_prefix, "hip_api_trace.txt"); ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HIP_API, hip_api_callback, NULL)); ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_API)); } if (trace_hip_activity) { + hcc_activity_file_handle = open_output_file(output_prefix, "hcc_ops_trace.txt"); ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HCC_OPS)); } } - const char* ctrl_str = getenv("ROCP_CTRL_RATE"); - if (ctrl_str != NULL) { - uint32_t ctrl_delay = 0; - uint32_t ctrl_len = 0; - uint32_t ctrl_rate = 0; - int ret = sscanf(ctrl_str, "%d:%d:%d", &ctrl_delay, &ctrl_len, &ctrl_rate); - if (ret != 3) { - fprintf(stderr, "ROCTracer: control rate value invalid 'delay:length:rate': '%s'\n", ctrl_str); - abort(); - } - if (ctrl_len > ctrl_rate) { - fprintf(stderr, "ROCTracer: control length value (%u) > rate value (%u)\n", ctrl_len, ctrl_rate); - abort(); - } - control_dist_us = ctrl_rate - ctrl_len; - control_len_us = ctrl_len; - control_delay_us = ctrl_delay; - - fprintf(stdout, "ROCTracer: trace control: delay(%uus), length(%uus), rate(%uus)\n", ctrl_delay, ctrl_len, ctrl_rate); fflush(stdout); - - roctracer_stop(); - - pthread_t thread; - pthread_attr_t attr; - int err = pthread_attr_init(&attr); - if (err) { errno = err; perror("pthread_attr_init"); abort(); } - err = pthread_create(&thread, &attr, control_thr_fun, NULL); + // Enable PC sampling + if (trace_pcs) { + fprintf(stdout, " PCS-trace()\n"); fflush(stdout); + open_tracing_pool(); + pc_sample_file_handle = open_output_file(output_prefix, "pc_sample_trace.txt"); + ROCTRACER_CALL(roctracer_enable_op_activity(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_PCSAMPLE)); } -#ifdef KFD_WRAPPER - // Enable KFD API callbacks/activity - if (trace_kfd) { - kfd_api_file_handle = open_output_file(output_prefix, "kfd_api_trace.txt"); - // initialize KFD tracing - roctracer_set_properties(ACTIVITY_DOMAIN_KFD_API, NULL); - - printf(" KFD-trace("); - if (kfd_api_vec.size() != 0) { - for (unsigned i = 0; i < kfd_api_vec.size(); ++i) { - uint32_t cid = KFD_API_ID_NUMBER; - const char* api = kfd_api_vec[i].c_str(); - ROCTRACER_CALL(roctracer_op_code(ACTIVITY_DOMAIN_KFD_API, api, &cid)); - ROCTRACER_CALL(roctracer_enable_op_callback(ACTIVITY_DOMAIN_KFD_API, cid, kfd_api_callback, NULL)); - printf(" %s", api); - } - } else { - ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_KFD_API, kfd_api_callback, NULL)); - } - printf(")\n"); - } -#endif - - if (onload_debug) { printf("TOOL OnLoad end\n"); fflush(stdout); } - return roctracer_load(table, runtime_version, failed_tool_count, failed_tool_names); -} - -// tool unload method -void tool_unload(bool destruct) { - static bool is_unloaded = false; - - if (onload_debug) { printf("TOOL tool_unload (%d, %d)\n", (int)destruct, (int)is_unloaded); fflush(stdout); } - if (destruct == false) return; - if (is_unloaded == true) return; - is_unloaded = true; - roctracer_unload(destruct); - - if (trace_roctx) { - ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_ROCTX)); - - roctx_trace_buffer.Flush(); - close_output_file(roctx_file_handle); - } - if (trace_hsa_api) { - ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HSA_API)); - - hsa_api_trace_buffer.Flush(); - close_output_file(hsa_api_file_handle); - } - if (trace_hsa_activity) { - ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HSA_OPS)); - - close_output_file(hsa_async_copy_file_handle); - } - if (trace_hip_api || trace_hip_activity) { - ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HIP_API)); - ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HIP_API)); - ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HCC_OPS)); - ROCTRACER_CALL(roctracer_flush_activity()); - ROCTRACER_CALL(roctracer_close_pool()); - - hip_api_trace_buffer.Flush(); - close_output_file(hip_api_file_handle); - close_output_file(hcc_activity_file_handle); - } - - if (trace_kfd) { - ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_KFD_API)); - fclose(kfd_api_file_handle); - } - if (onload_debug) { printf("TOOL tool_unload end\n"); fflush(stdout); } + ONLOAD_TRACE_END(); + return true; } // HSA-runtime on-unload method extern "C" PUBLIC_API void OnUnload() { - if (onload_debug) { printf("TOOL OnUnload\n"); fflush(stdout); } - tool_unload(false); - if (onload_debug) { printf("TOOL OnUnload end\n"); fflush(stdout); } + ONLOAD_TRACE(""); } extern "C" CONSTRUCTOR_API void constructor() { - if (onload_debug) { printf("TOOL constructor ...end\n"); fflush(stdout); } + ONLOAD_TRACE_BEG(); + tool_load(); + ONLOAD_TRACE_END(); } extern "C" DESTRUCTOR_API void destructor() { - if (onload_debug) { printf("TOOL destructor\n"); fflush(stdout); } - tool_unload(true); - if (onload_debug) { printf("TOOL destructor end\n"); fflush(stdout); } + ONLOAD_TRACE_BEG(); + tool_unload(); + ONLOAD_TRACE_END(); }