Merge "Consolidate all sources of timestamps" into amd-staging

[ROCm/roctracer commit: 7c4f7625b1]
This commit is contained in:
Laurent Morichetti
2022-05-10 14:53:36 -04:00
zatwierdzone przez Gerrit Code Review
11 zmienionych plików z 84 dodań i 363 usunięć
-1
Wyświetl plik
@@ -93,7 +93,6 @@ set ( PUBLIC_HEADERS
roctracer_hsa.h
roctracer_roctx.h
ext/prof_protocol.h
ext/hsa_rt_utils.hpp
)
set ( GEN_HEADERS
hip_ostream_ops.h
@@ -1,82 +0,0 @@
################################################################################
## Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
##
## Permission is hereby granted, free of charge, to any person obtaining a copy
## of this software and associated documentation files (the "Software"), to
## deal in the Software without restriction, including without limitation the
## rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
## sell copies of the Software, and to permit persons to whom the Software is
## furnished to do so, subject to the following conditions:
##
## The above copyright notice and this permission notice shall be included in
## all copies or substantial portions of the Software.
##
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
## FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
## IN THE SOFTWARE.
################################################################################
cmake_minimum_required ( VERSION 3.5.0 )
## Verbose output.
set ( CMAKE_VERBOSE_MAKEFILE TRUE CACHE BOOL "Verbose Output" FORCE )
## Set module name and project name.
set ( ROCTRACER_NAME "roctracer" )
project ( ${ROCTRACER_NAME} )
set ( ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../../" )
## Adding default path cmake modules
list ( APPEND CMAKE_MODULE_PATH "${ROOT_DIR}/cmake_modules" )
## Include common cmake modules
include ( utils )
## Set build environment
include ( env )
## Setup the package version.
get_version ( "1.0.0" )
message ( "-- LIB-VERSION: ${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH}" )
set ( BUILD_VERSION_MAJOR ${VERSION_MAJOR} )
set ( BUILD_VERSION_MINOR ${VERSION_MINOR} )
set ( BUILD_VERSION_PATCH ${VERSION_PATCH} )
set ( LIB_VERSION_STRING "${BUILD_VERSION_MAJOR}.${BUILD_VERSION_MINOR}.${BUILD_VERSION_PATCH}" )
if ( DEFINED VERSION_BUILD AND NOT ${VERSION_BUILD} STREQUAL "" )
message ( "VERSION BUILD DEFINED ${VERSION_BUILD}" )
set ( BUILD_VERSION_PATCH "${BUILD_VERSION_PATCH}-${VERSION_BUILD}" )
endif ()
set ( BUILD_VERSION_STRING "${BUILD_VERSION_MAJOR}.${BUILD_VERSION_MINOR}.${BUILD_VERSION_PATCH}" )
## Install and packaging
set ( CMAKE_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX}/${ROCTRACER_NAME} )
message ( "---------Install-Dir: ${CMAKE_INSTALL_PREFIX}" )
add_custom_target( pkgconfig_install COMMAND sh -x "${CMAKE_CURRENT_SOURCE_DIR}/RPM/rpm_post" )
## Install information
install ( FILES ${ROOT_DIR}/inc/ext/prof_protocol.h DESTINATION include/ext )
install ( FILES ${ROOT_DIR}/inc/ext/hsa_rt_utils.hpp DESTINATION include/ext )
## Packaging directives
set ( CPACK_GENERATOR "DEB" "RPM" "TGZ" )
set ( CPACK_PACKAGE_NAME "${ROCTRACER_NAME}-proto" )
set ( CPACK_PACKAGE_VENDOR "AMD" )
set ( CPACK_PACKAGE_VERSION_MAJOR ${BUILD_VERSION_MAJOR} )
set ( CPACK_PACKAGE_VERSION_MINOR ${BUILD_VERSION_MINOR} )
set ( CPACK_PACKAGE_VERSION_PATCH ${BUILD_VERSION_PATCH} )
set ( CPACK_PACKAGE_CONTACT "Advanced Micro Devices Inc." )
set ( CPACK_PACKAGE_DESCRIPTION_SUMMARY "AMD ROCTRACER library" )
set ( CPACK_RESOURCE_FILE_LICENSE "${ROOT_DIR}/LICENSE" )
## Debian package specific variables
set ( CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/postinst;${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/prerm" )
## RPM package specific variables
set ( CPACK_RPM_PRE_INSTALL_SCRIPT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/RPM/rpm_post" )
set ( CPACK_RPM_POST_UNINSTALL_SCRIPT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/RPM/rpm_postun" )
include ( CPack )
@@ -1,28 +0,0 @@
#/bin/bash
set -e
do_pkgconfig() {
if [ ! -d /usr/lib/pkgconfig ] ; then mkdir -p /usr/lib/pkgconfig; fi
cat > /usr/lib/pkgconfig/roctracer-proto.pc << EOF
prefix=/opt/rocm/roctracer
includedir=\${prefix}/include/ext
Name: roctracer-proto
Version: 1.0.0
Description: roctracer protocol
Cflags: -I\${includedir}
EOF
}
case "$1" in
configure)
do_pkgconfig
;;
abort-upgrade|abort-remove|abort-deconfigure)
echo "$1"
;;
*)
exit 0
;;
esac
@@ -1,18 +0,0 @@
#!/bin/bash
set -e
rm_pkgconfig() {
rm -f /usr/lib/pkgconfig/roctracer-proto.pc
}
case "$1" in
remove | upgrade )
rm_pkgconfig
;;
purge)
;;
*)
exit 0
;;
esac
@@ -1,10 +0,0 @@
if [ ! -d /usr/lib/pkgconfig ] ; then mkdir -p /usr/lib/pkgconfig; fi
cat > /usr/lib/pkgconfig/roctracer-proto.pc << EOF
prefix=/opt/rocm/roctracer
includedir=\${prefix}/include/ext
Name: roctracer-proto
Version: 1.0.0
Description: roctracer protocol
Cflags: -I\${includedir}
EOF
@@ -1 +0,0 @@
rm -f /usr/lib/pkgconfig/roctracer-proto.pc
@@ -1,117 +0,0 @@
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#ifndef INC_ROCTRACER_HSA_RT_UTILS_HPP_
#define INC_ROCTRACER_HSA_RT_UTILS_HPP_
#include <hsa/hsa.h>
#include <cstdint>
#include <cstddef>
#include <iostream>
#include <mutex>
#define HSART_CALL(call) \
do { \
hsa_status_t status = call; \
if (status != HSA_STATUS_SUCCESS) { \
std::cerr << "1HSA-rt call '" << #call << "' error(" << std::hex << status << ")" \
<< std::dec << std::endl << std::flush; \
abort(); \
} \
} while (0)
namespace hsa_rt_utils {
// HSA runtime timer implementation
class Timer {
public:
typedef uint64_t timestamp_t;
typedef long double freq_t;
typedef decltype(hsa_system_get_info)* hsa_system_get_info_fn_t;
// Initialization
inline void init(const hsa_system_get_info_fn_t& get_info_fn) {
hsa_system_get_info_fn = get_info_fn;
timestamp_t timestamp_hz = 0;
if (get_info_fn == NULL) {
timestamp_rate_ = 0;
} else {
HSART_CALL(get_info_fn(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &timestamp_hz));
timestamp_rate_ = (freq_t)1000000000 / (freq_t)timestamp_hz;
}
}
// Returns HSA runtime timestamp rate
freq_t timestamp_rate() const { return timestamp_rate_; }
// Convert a given timestamp to ns
timestamp_t timestamp_to_ns(const timestamp_t &timestamp) const {
return timestamp_t((freq_t)timestamp * timestamp_rate_);
}
// Return timestamp in 'ns'
timestamp_t timestamp_ns() const {
timestamp_t timestamp;
HSART_CALL(hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP, &timestamp));
return timestamp_to_ns(timestamp);
}
timestamp_t timestamp_fn_ns() const {
timestamp_t timestamp;
HSART_CALL(hsa_system_get_info_fn(HSA_SYSTEM_INFO_TIMESTAMP, &timestamp));
return timestamp_to_ns(timestamp);
}
Timer(hsa_system_get_info_fn_t f = NULL) {
if (f != NULL) init(f);
else init(hsa_system_get_info);
}
private:
// hsa_system_get_info function
hsa_system_get_info_fn_t hsa_system_get_info_fn;
// Timestamp rate
freq_t timestamp_rate_;
};
class TimerFactory {
public:
typedef std::mutex mutex_t;
static Timer* Create(Timer::hsa_system_get_info_fn_t f = NULL) {
if (instance_ == NULL) {
std::lock_guard<mutex_t> lck(mutex_);
if (instance_ == NULL) instance_ = new Timer(f);
}
return instance_;
}
static Timer& Instance() {
return *instance_;
}
private:
static Timer* instance_;
static mutex_t mutex_;
};
} // namespace hsa_rt_utils
#endif // INC_ROCTRACER_HSA_RT_UTILS_HPP_
+45 -76
Wyświetl plik
@@ -41,9 +41,7 @@
#include "core/loader.h"
#include "core/memory_pool.h"
#include "core/tracker.h"
#include "ext/hsa_rt_utils.hpp"
#include "util/exception.h"
#include "util/hsa_rsrc_factory.h"
#include "util/logger.h"
#define PUBLIC_API __attribute__((visibility("default")))
@@ -98,78 +96,50 @@ mark_api_callback_t* mark_api_callback_ptr = NULL;
///////////////////////////////////////////////////////////////////////////////////////////////////
// Internal library methods
//
namespace rocprofiler {
decltype(hsa_queue_create)* hsa_queue_create_fn;
decltype(hsa_queue_destroy)* hsa_queue_destroy_fn;
decltype(hsa_signal_store_relaxed)* hsa_signal_store_relaxed_fn;
decltype(hsa_signal_store_relaxed)* hsa_signal_store_screlease_fn;
decltype(hsa_queue_load_write_index_relaxed)* hsa_queue_load_write_index_relaxed_fn;
decltype(hsa_queue_store_write_index_relaxed)* hsa_queue_store_write_index_relaxed_fn;
decltype(hsa_queue_load_read_index_relaxed)* hsa_queue_load_read_index_relaxed_fn;
decltype(hsa_queue_load_write_index_scacquire)* hsa_queue_load_write_index_scacquire_fn;
decltype(hsa_queue_store_write_index_screlease)* hsa_queue_store_write_index_screlease_fn;
decltype(hsa_queue_load_read_index_scacquire)* hsa_queue_load_read_index_scacquire_fn;
decltype(hsa_amd_queue_intercept_create)* hsa_amd_queue_intercept_create_fn;
decltype(hsa_amd_queue_intercept_register)* hsa_amd_queue_intercept_register_fn;
decltype(hsa_amd_memory_async_copy)* hsa_amd_memory_async_copy_fn;
decltype(hsa_amd_memory_async_copy_rect)* hsa_amd_memory_async_copy_rect_fn;
namespace roctracer {
decltype(hsa_system_get_info)* hsa_system_get_info_fn = hsa_system_get_info;
decltype(hsa_amd_memory_async_copy)* hsa_amd_memory_async_copy_fn = hsa_amd_memory_async_copy;
decltype(hsa_amd_memory_async_copy_rect)* hsa_amd_memory_async_copy_rect_fn =
hsa_amd_memory_async_copy_rect;
::HsaApiTable* kHsaApiTable;
void SaveHsaApi(::HsaApiTable* table) {
util::HsaRsrcFactory::InitHsaApiTable(table);
kHsaApiTable = table;
hsa_queue_create_fn = table->core_->hsa_queue_create_fn;
hsa_queue_destroy_fn = table->core_->hsa_queue_destroy_fn;
hsa_signal_store_relaxed_fn = table->core_->hsa_signal_store_relaxed_fn;
hsa_signal_store_screlease_fn = table->core_->hsa_signal_store_screlease_fn;
hsa_queue_load_write_index_relaxed_fn = table->core_->hsa_queue_load_write_index_relaxed_fn;
hsa_queue_store_write_index_relaxed_fn = table->core_->hsa_queue_store_write_index_relaxed_fn;
hsa_queue_load_read_index_relaxed_fn = table->core_->hsa_queue_load_read_index_relaxed_fn;
hsa_queue_load_write_index_scacquire_fn = table->core_->hsa_queue_load_write_index_scacquire_fn;
hsa_queue_store_write_index_screlease_fn = table->core_->hsa_queue_store_write_index_screlease_fn;
hsa_queue_load_read_index_scacquire_fn = table->core_->hsa_queue_load_read_index_scacquire_fn;
hsa_amd_queue_intercept_create_fn = table->amd_ext_->hsa_amd_queue_intercept_create_fn;
hsa_amd_queue_intercept_register_fn = table->amd_ext_->hsa_amd_queue_intercept_register_fn;
hsa_system_get_info_fn = table->core_->hsa_system_get_info_fn;
hsa_amd_memory_async_copy_fn = table->amd_ext_->hsa_amd_memory_async_copy_fn;
hsa_amd_memory_async_copy_rect_fn = table->amd_ext_->hsa_amd_memory_async_copy_rect_fn;
}
void RestoreHsaApi() {
::HsaApiTable* table = kHsaApiTable;
table->core_->hsa_queue_create_fn = hsa_queue_create_fn;
table->core_->hsa_queue_destroy_fn = hsa_queue_destroy_fn;
table->core_->hsa_signal_store_relaxed_fn = hsa_signal_store_relaxed_fn;
table->core_->hsa_signal_store_screlease_fn = hsa_signal_store_screlease_fn;
table->core_->hsa_queue_load_write_index_relaxed_fn = hsa_queue_load_write_index_relaxed_fn;
table->core_->hsa_queue_store_write_index_relaxed_fn = hsa_queue_store_write_index_relaxed_fn;
table->core_->hsa_queue_load_read_index_relaxed_fn = hsa_queue_load_read_index_relaxed_fn;
table->core_->hsa_queue_load_write_index_scacquire_fn = hsa_queue_load_write_index_scacquire_fn;
table->core_->hsa_queue_store_write_index_screlease_fn = hsa_queue_store_write_index_screlease_fn;
table->core_->hsa_queue_load_read_index_scacquire_fn = hsa_queue_load_read_index_scacquire_fn;
table->amd_ext_->hsa_amd_queue_intercept_create_fn = hsa_amd_queue_intercept_create_fn;
table->amd_ext_->hsa_amd_queue_intercept_register_fn = hsa_amd_queue_intercept_register_fn;
}
table->core_->hsa_system_get_info_fn = hsa_system_get_info_fn;
table->amd_ext_->hsa_amd_memory_async_copy_fn = hsa_amd_memory_async_copy_fn;
table->amd_ext_->hsa_amd_memory_async_copy_rect_fn = hsa_amd_memory_async_copy_rect_fn;
}
namespace roctracer {
// timestamp definitino
typedef hsa_rt_utils::Timer::timestamp_t timestamp_t;
decltype(hsa_amd_memory_async_copy)* hsa_amd_memory_async_copy_fn;
decltype(hsa_amd_memory_async_copy_rect)* hsa_amd_memory_async_copy_rect_fn;
namespace util {
uint64_t timestamp_ns() {
uint64_t sysclock;
hsa_status_t status = roctracer::hsa_system_get_info_fn(HSA_SYSTEM_INFO_TIMESTAMP, &sysclock);
if (status == HSA_STATUS_ERROR_NOT_INITIALIZED) return 0;
CHECK_STATUS("hsa_system_get_info()", status);
static uint64_t sysclock_period = []() {
uint64_t sysclock_hz = 0;
hsa_status_t status =
roctracer::hsa_system_get_info_fn(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sysclock_hz);
CHECK_STATUS("hsa_system_get_info()", status);
return (uint64_t)1000000000 / sysclock_hz;
}();
return sysclock * sysclock_period;
}
} // namespace util
typedef decltype(roctracer_enable_op_callback)* roctracer_enable_op_callback_t;
typedef decltype(roctracer_disable_op_callback)* roctracer_disable_op_callback_t;
@@ -347,7 +317,6 @@ inline uint32_t HipActActivityDisableCheck(uint32_t op) {
void* HIP_SyncApiDataCallback(uint32_t op_id, roctracer_record_t* record, const void* callback_data,
void* arg) {
static hsa_rt_utils::Timer timer;
if (record_pair_stack == NULL) record_pair_stack = new record_pair_stack_t;
void* ret = NULL;
@@ -399,15 +368,14 @@ void* HIP_SyncApiDataCallback(uint32_t op_id, roctracer_record_t* record, const
"correlation_id(%lu) time_ns(%lu)\n",
roctracer_op_string(ACTIVITY_DOMAIN_HIP_API, op_id, 0), phase, op_id, record, data, pool,
(int)(record_pair_stack->size()), (data_ptr) ? data_ptr->correlation_id : 0,
timer.timestamp_ns());
util::timestamp_ns());
return ret;
}
void* HIP_SyncActivityCallback(uint32_t op_id, roctracer_record_t* record,
const void* callback_data, void* arg) {
static hsa_rt_utils::Timer timer;
const timestamp_t timestamp_ns = timer.timestamp_ns();
const uint64_t timestamp_ns = util::timestamp_ns();
if (record_pair_stack == NULL) record_pair_stack = new record_pair_stack_t;
void* ret = NULL;
@@ -1205,7 +1173,7 @@ PUBLIC_API void roctracer_stop() {
PUBLIC_API roctracer_status_t roctracer_get_timestamp(uint64_t* timestamp) {
API_METHOD_PREFIX
*timestamp = util::HsaRsrcFactory::Instance().TimestampNs();
*timestamp = roctracer::util::timestamp_ns();
API_METHOD_SUFFIX
}
@@ -1220,14 +1188,6 @@ PUBLIC_API roctracer_status_t roctracer_set_properties(roctracer_domain_t domain
reinterpret_cast<roctracer::hsa_ops_properties_t*>(properties);
HsaApiTable* table = reinterpret_cast<HsaApiTable*>(ops_properties->table);
#if 0
// HSA dispatches intercepting
rocprofiler::SaveHsaApi(table);
rocprofiler::ProxyQueue::InitFactory();
rocprofiler::ProxyQueue::HsaIntercept(table);
rocprofiler::InterceptQueue::HsaIntercept(table);
#endif
// HSA async-copy tracing
[[maybe_unused]] hsa_status_t status = hsa_amd_profiling_async_copy_enable(true);
assert(status == HSA_STATUS_SUCCESS && "hsa_amd_profiling_async_copy_enable failed");
@@ -1320,9 +1280,18 @@ CONSTRUCTOR_API void constructor() {
DESTRUCTOR_API void destructor() {
ONLOAD_TRACE_BEG();
roctracer_unload();
util::HsaRsrcFactory::Destroy();
roctracer::util::Logger::Destroy();
ONLOAD_TRACE_END();
}
// HSA-runtime tool on-load method
extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
uint64_t failed_tool_count,
const char* const* failed_tool_names) {
roctracer::SaveHsaApi(table);
return true;
}
extern "C" PUBLIC_API void OnUnload() {}
} // extern "C"
+15 -16
Wyświetl plik
@@ -35,8 +35,6 @@
namespace roctracer {
class Tracker {
public:
typedef ::util::HsaRsrcFactory::timestamp_t timestamp_t;
enum { ENTRY_INV = 0, ENTRY_INIT = 1, ENTRY_COMPL = 2 };
enum entry_type_t {
@@ -50,10 +48,8 @@ class Tracker {
struct entry_t {
std::atomic<uint32_t> valid;
entry_type_t type;
uint64_t dispatch;
uint64_t begin; // kernel begin timestamp, ns
uint64_t end; // kernel end timestamp, ns
uint64_t complete;
uint64_t begin; // begin timestamp, ns
uint64_t end; // end timestamp, ns
hsa_agent_t agent;
uint32_t dev_index;
hsa_signal_t orig;
@@ -75,14 +71,12 @@ class Tracker {
inline static void Enable(entry_type_t type, const hsa_agent_t& agent, const hsa_signal_t& signal,
entry_t* entry) {
hsa_status_t status = HSA_STATUS_ERROR;
::util::HsaRsrcFactory* hsa_rsrc = &(::util::HsaRsrcFactory::Instance());
// Creating a new tracker entry
entry->type = type;
entry->agent = agent;
entry->dev_index = 0; // hsa_rsrc->GetAgentInfo(agent)->dev_index;
entry->orig = signal;
entry->dispatch = hsa_rsrc->TimestampNs();
entry->valid.store(ENTRY_INIT, std::memory_order_release);
// Creating a proxy signal
@@ -104,27 +98,32 @@ class Tracker {
private:
// Entry completion
inline static void Complete(hsa_signal_value_t signal_value, entry_t* entry) {
// Query begin/end and complete timestamps
::util::HsaRsrcFactory* hsa_rsrc = &(::util::HsaRsrcFactory::Instance());
static uint64_t sysclock_period = []() {
uint64_t sysclock_hz = 0;
hsa_status_t status = hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sysclock_hz);
if (status != HSA_STATUS_SUCCESS)
EXC_RAISING(ROCTRACER_STATUS_ERROR, "hsa_system_get_info failed");
return (uint64_t)1000000000 / sysclock_hz;
}();
if (entry->type == COPY_ENTRY_TYPE) {
hsa_amd_profiling_async_copy_time_t async_copy_time{};
hsa_status_t status = hsa_amd_profiling_get_async_copy_time(entry->signal, &async_copy_time);
if (status != HSA_STATUS_SUCCESS)
EXC_RAISING(ROCTRACER_STATUS_ERROR, "hsa_amd_profiling_get_async_copy_time failed");
entry->begin = hsa_rsrc->SysclockToNs(async_copy_time.start);
entry->end = hsa_rsrc->SysclockToNs(async_copy_time.end);
entry->begin = async_copy_time.start * sysclock_period;
entry->end = async_copy_time.end * sysclock_period;
} else {
hsa_amd_profiling_dispatch_time_t dispatch_time{};
hsa_status_t status =
hsa_amd_profiling_get_dispatch_time(entry->agent, entry->signal, &dispatch_time);
if (status != HSA_STATUS_SUCCESS)
EXC_RAISING(ROCTRACER_STATUS_ERROR, "hsa_amd_profiling_get_dispatch_time failed");
entry->begin = hsa_rsrc->SysclockToNs(dispatch_time.start);
entry->end = hsa_rsrc->SysclockToNs(dispatch_time.end);
entry->dev_index = (hsa_rsrc->GetAgentInfo(entry->agent))->dev_index;
entry->begin = dispatch_time.start * sysclock_period;
entry->end = dispatch_time.end * sysclock_period;
entry->dev_index = ::util::HsaRsrcFactory::Instance().GetAgentInfo(entry->agent)->dev_index;
}
entry->complete = hsa_rsrc->TimestampNs();
hsa_signal_t orig = entry->orig;
hsa_signal_t signal = entry->signal;
+6 -3
Wyświetl plik
@@ -42,6 +42,9 @@ fi
if [ -n "$ROCTRACER_LIB_PATH" ] ; then
export LD_LIBRARY_PATH=$ROCTRACER_LIB_PATH
fi
if [ -z "$ROCTRACER_LIB_PATH" ] ; then
ROCTRACER_LIB_PATH="."
fi
if [ -z "$ROCTRACER_TOOL_PATH" ] ; then
ROCTRACER_TOOL_PATH="./test"
fi
@@ -119,7 +122,7 @@ eval_test "standalone HIP MGPU test" "./test/MatrixTranspose_mgpu" MatrixTranspo
# Tool test
# rocTracer/tool is loaded by HSA runtime
export HSA_TOOLS_LIB="$ROCTRACER_TOOL_PATH/libtracer_tool.so"
export HSA_TOOLS_LIB="$ROCTRACER_LIB_PATH/libroctracer64.so $ROCTRACER_TOOL_PATH/libroctracer_tool.so"
# SYS test
export ROCTRACER_DOMAIN="sys:roctx"
@@ -157,11 +160,11 @@ echo "<trace name=\"HSA\"><parameters api=\"hsa_agent_get_info, hsa_amd_memory_p
export ROCP_INPUT=input.xml
eval_test "tool HSA test input" ./test/hsa/ctrl ctrl_hsa_input_trace
export HSA_TOOLS_LIB=./test/libhsaco_test.so
export HSA_TOOLS_LIB="$ROCTRACER_LIB_PATH/libroctracer64.so ./test/libhsaco_test.so"
eval_test "tool HSA codeobj" ./test/MatrixTranspose hsa_co_trace
export ROCP_TOOL_LIB=./test/libcodeobj_test.so
export HSA_TOOLS_LIB=librocprofiler64.so
export HSA_TOOLS_LIB="$ROCTRACER_LIB_PATH/libroctracer64.so librocprofiler64.so"
eval_test "tool tracer codeobj" ./test/MatrixTranspose code_obj_trace
#valgrind --leak-check=full $tbin
@@ -37,7 +37,6 @@
#include <roctracer_roctx.h>
#include <roctracer_hsa.h>
#include <roctracer_hip.h>
#include <ext/hsa_rt_utils.hpp>
#include "src/core/loader.h"
#include "test/tool/trace_buffer.h"
@@ -97,10 +96,20 @@ inline static void DEBUG_TRACE(const char* fmt, ...) {
#define DEBUG_TRACE(...)
#endif
typedef hsa_rt_utils::Timer::timestamp_t timestamp_t;
hsa_rt_utils::Timer* timer = NULL;
typedef uint64_t timestamp_t;
thread_local timestamp_t hsa_begin_timestamp = 0;
thread_local timestamp_t hip_begin_timestamp = 0;
namespace util {
inline timestamp_t timestamp_ns() {
timestamp_t timestamp;
ROCTRACER_CALL(roctracer_get_timestamp(&timestamp));
return timestamp;
}
} // namespace util
bool trace_roctx = false;
bool trace_hsa_api = false;
bool trace_hsa_activity = false;
@@ -251,7 +260,7 @@ static inline void roctx_callback_fun(uint32_t domain, uint32_t cid, uint32_t ti
roctx_range_id_t rid, const char* message) {
roctx_trace_entry_t* entry = roctx_trace_buffer->GetEntry();
entry->cid = cid;
entry->time = timer->timestamp_fn_ns();
entry->time = util::timestamp_ns();
entry->pid = GetPid();
entry->tid = tid;
entry->rid = rid;
@@ -314,10 +323,10 @@ void hsa_api_callback(uint32_t domain, uint32_t cid, const void* callback_data,
(void)arg;
const hsa_api_data_t* data = reinterpret_cast<const hsa_api_data_t*>(callback_data);
if (data->phase == ACTIVITY_API_PHASE_ENTER) {
hsa_begin_timestamp = timer->timestamp_fn_ns();
hsa_begin_timestamp = util::timestamp_ns();
} else {
const timestamp_t end_timestamp =
(cid == HSA_API_ID_hsa_shut_down) ? hsa_begin_timestamp : timer->timestamp_fn_ns();
(cid == HSA_API_ID_hsa_shut_down) ? hsa_begin_timestamp : util::timestamp_ns();
hsa_api_trace_entry_t* entry = hsa_api_trace_buffer->GetEntry();
entry->cid = cid;
entry->begin = hsa_begin_timestamp;
@@ -368,7 +377,7 @@ static inline bool is_hip_kernel_launch_api(const uint32_t& cid) {
void hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg) {
(void)arg;
const hip_api_data_t* data = reinterpret_cast<const hip_api_data_t*>(callback_data);
const timestamp_t timestamp = timer->timestamp_fn_ns();
const timestamp_t timestamp = util::timestamp_ns();
hip_api_trace_entry_t* entry = NULL;
if (data->phase == ACTIVITY_API_PHASE_ENTER) {
@@ -445,7 +454,7 @@ void mark_api_callback(uint32_t domain, uint32_t cid, const void* callback_data,
(void)arg;
const char* name = reinterpret_cast<const char*>(callback_data);
const timestamp_t timestamp = timer->timestamp_fn_ns();
const timestamp_t timestamp = util::timestamp_ns();
hip_api_trace_entry_t* entry = hip_api_trace_buffer->GetEntry();
entry->cid = 0;
entry->domain = domain;
@@ -930,8 +939,6 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
const char* const* failed_tool_names) {
ONLOAD_TRACE_BEG();
timer = new hsa_rt_utils::Timer(table->core_->hsa_system_get_info_fn);
const char* output_prefix = getenv("ROCP_OUTPUT_DIR");
// Dumping HSA handles for agents
@@ -959,7 +966,7 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
// App begin timestamp begin_ts_file.txt
begin_ts_file_handle = open_output_file(output_prefix, "begin_ts_file.txt");
const timestamp_t app_start_time = timer->timestamp_fn_ns();
const timestamp_t app_start_time = util::timestamp_ns();
fprintf(begin_ts_file_handle, "%lu\n", app_start_time);
// Enable HSA API callbacks/activity