Merge "Consolidate all sources of timestamps" into amd-staging
[ROCm/roctracer commit: 7c4f7625b1]
This commit is contained in:
zatwierdzone przez
Gerrit Code Review
commit
4e9c35c929
@@ -93,7 +93,6 @@ set ( PUBLIC_HEADERS
|
||||
roctracer_hsa.h
|
||||
roctracer_roctx.h
|
||||
ext/prof_protocol.h
|
||||
ext/hsa_rt_utils.hpp
|
||||
)
|
||||
set ( GEN_HEADERS
|
||||
hip_ostream_ops.h
|
||||
|
||||
@@ -1,82 +0,0 @@
|
||||
################################################################################
|
||||
## Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
##
|
||||
## Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
## of this software and associated documentation files (the "Software"), to
|
||||
## deal in the Software without restriction, including without limitation the
|
||||
## rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
## sell copies of the Software, and to permit persons to whom the Software is
|
||||
## furnished to do so, subject to the following conditions:
|
||||
##
|
||||
## The above copyright notice and this permission notice shall be included in
|
||||
## all copies or substantial portions of the Software.
|
||||
##
|
||||
## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
## FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
## IN THE SOFTWARE.
|
||||
################################################################################
|
||||
|
||||
cmake_minimum_required ( VERSION 3.5.0 )
|
||||
|
||||
## Verbose output.
|
||||
set ( CMAKE_VERBOSE_MAKEFILE TRUE CACHE BOOL "Verbose Output" FORCE )
|
||||
|
||||
## Set module name and project name.
|
||||
set ( ROCTRACER_NAME "roctracer" )
|
||||
project ( ${ROCTRACER_NAME} )
|
||||
|
||||
set ( ROOT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../../" )
|
||||
## Adding default path cmake modules
|
||||
list ( APPEND CMAKE_MODULE_PATH "${ROOT_DIR}/cmake_modules" )
|
||||
## Include common cmake modules
|
||||
include ( utils )
|
||||
## Set build environment
|
||||
include ( env )
|
||||
|
||||
## Setup the package version.
|
||||
get_version ( "1.0.0" )
|
||||
message ( "-- LIB-VERSION: ${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH}" )
|
||||
|
||||
set ( BUILD_VERSION_MAJOR ${VERSION_MAJOR} )
|
||||
set ( BUILD_VERSION_MINOR ${VERSION_MINOR} )
|
||||
set ( BUILD_VERSION_PATCH ${VERSION_PATCH} )
|
||||
set ( LIB_VERSION_STRING "${BUILD_VERSION_MAJOR}.${BUILD_VERSION_MINOR}.${BUILD_VERSION_PATCH}" )
|
||||
if ( DEFINED VERSION_BUILD AND NOT ${VERSION_BUILD} STREQUAL "" )
|
||||
message ( "VERSION BUILD DEFINED ${VERSION_BUILD}" )
|
||||
set ( BUILD_VERSION_PATCH "${BUILD_VERSION_PATCH}-${VERSION_BUILD}" )
|
||||
endif ()
|
||||
set ( BUILD_VERSION_STRING "${BUILD_VERSION_MAJOR}.${BUILD_VERSION_MINOR}.${BUILD_VERSION_PATCH}" )
|
||||
|
||||
## Install and packaging
|
||||
set ( CMAKE_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX}/${ROCTRACER_NAME} )
|
||||
message ( "---------Install-Dir: ${CMAKE_INSTALL_PREFIX}" )
|
||||
|
||||
add_custom_target( pkgconfig_install COMMAND sh -x "${CMAKE_CURRENT_SOURCE_DIR}/RPM/rpm_post" )
|
||||
|
||||
## Install information
|
||||
install ( FILES ${ROOT_DIR}/inc/ext/prof_protocol.h DESTINATION include/ext )
|
||||
install ( FILES ${ROOT_DIR}/inc/ext/hsa_rt_utils.hpp DESTINATION include/ext )
|
||||
|
||||
## Packaging directives
|
||||
set ( CPACK_GENERATOR "DEB" "RPM" "TGZ" )
|
||||
set ( CPACK_PACKAGE_NAME "${ROCTRACER_NAME}-proto" )
|
||||
set ( CPACK_PACKAGE_VENDOR "AMD" )
|
||||
set ( CPACK_PACKAGE_VERSION_MAJOR ${BUILD_VERSION_MAJOR} )
|
||||
set ( CPACK_PACKAGE_VERSION_MINOR ${BUILD_VERSION_MINOR} )
|
||||
set ( CPACK_PACKAGE_VERSION_PATCH ${BUILD_VERSION_PATCH} )
|
||||
set ( CPACK_PACKAGE_CONTACT "Advanced Micro Devices Inc." )
|
||||
set ( CPACK_PACKAGE_DESCRIPTION_SUMMARY "AMD ROCTRACER library" )
|
||||
set ( CPACK_RESOURCE_FILE_LICENSE "${ROOT_DIR}/LICENSE" )
|
||||
|
||||
## Debian package specific variables
|
||||
set ( CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/postinst;${CMAKE_CURRENT_SOURCE_DIR}/DEBIAN/prerm" )
|
||||
|
||||
## RPM package specific variables
|
||||
set ( CPACK_RPM_PRE_INSTALL_SCRIPT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/RPM/rpm_post" )
|
||||
set ( CPACK_RPM_POST_UNINSTALL_SCRIPT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/RPM/rpm_postun" )
|
||||
|
||||
include ( CPack )
|
||||
@@ -1,28 +0,0 @@
|
||||
#/bin/bash
|
||||
|
||||
set -e
|
||||
|
||||
do_pkgconfig() {
|
||||
if [ ! -d /usr/lib/pkgconfig ] ; then mkdir -p /usr/lib/pkgconfig; fi
|
||||
cat > /usr/lib/pkgconfig/roctracer-proto.pc << EOF
|
||||
prefix=/opt/rocm/roctracer
|
||||
includedir=\${prefix}/include/ext
|
||||
|
||||
Name: roctracer-proto
|
||||
Version: 1.0.0
|
||||
Description: roctracer protocol
|
||||
Cflags: -I\${includedir}
|
||||
EOF
|
||||
}
|
||||
|
||||
case "$1" in
|
||||
configure)
|
||||
do_pkgconfig
|
||||
;;
|
||||
abort-upgrade|abort-remove|abort-deconfigure)
|
||||
echo "$1"
|
||||
;;
|
||||
*)
|
||||
exit 0
|
||||
;;
|
||||
esac
|
||||
@@ -1,18 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
|
||||
rm_pkgconfig() {
|
||||
rm -f /usr/lib/pkgconfig/roctracer-proto.pc
|
||||
}
|
||||
|
||||
case "$1" in
|
||||
remove | upgrade )
|
||||
rm_pkgconfig
|
||||
;;
|
||||
purge)
|
||||
;;
|
||||
*)
|
||||
exit 0
|
||||
;;
|
||||
esac
|
||||
@@ -1,10 +0,0 @@
|
||||
if [ ! -d /usr/lib/pkgconfig ] ; then mkdir -p /usr/lib/pkgconfig; fi
|
||||
cat > /usr/lib/pkgconfig/roctracer-proto.pc << EOF
|
||||
prefix=/opt/rocm/roctracer
|
||||
includedir=\${prefix}/include/ext
|
||||
|
||||
Name: roctracer-proto
|
||||
Version: 1.0.0
|
||||
Description: roctracer protocol
|
||||
Cflags: -I\${includedir}
|
||||
EOF
|
||||
@@ -1 +0,0 @@
|
||||
rm -f /usr/lib/pkgconfig/roctracer-proto.pc
|
||||
@@ -1,117 +0,0 @@
|
||||
/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#ifndef INC_ROCTRACER_HSA_RT_UTILS_HPP_
|
||||
#define INC_ROCTRACER_HSA_RT_UTILS_HPP_
|
||||
|
||||
#include <hsa/hsa.h>
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstddef>
|
||||
#include <iostream>
|
||||
#include <mutex>
|
||||
|
||||
#define HSART_CALL(call) \
|
||||
do { \
|
||||
hsa_status_t status = call; \
|
||||
if (status != HSA_STATUS_SUCCESS) { \
|
||||
std::cerr << "1HSA-rt call '" << #call << "' error(" << std::hex << status << ")" \
|
||||
<< std::dec << std::endl << std::flush; \
|
||||
abort(); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
namespace hsa_rt_utils {
|
||||
|
||||
// HSA runtime timer implementation
|
||||
class Timer {
|
||||
public:
|
||||
typedef uint64_t timestamp_t;
|
||||
typedef long double freq_t;
|
||||
typedef decltype(hsa_system_get_info)* hsa_system_get_info_fn_t;
|
||||
|
||||
// Initialization
|
||||
inline void init(const hsa_system_get_info_fn_t& get_info_fn) {
|
||||
hsa_system_get_info_fn = get_info_fn;
|
||||
timestamp_t timestamp_hz = 0;
|
||||
if (get_info_fn == NULL) {
|
||||
timestamp_rate_ = 0;
|
||||
} else {
|
||||
HSART_CALL(get_info_fn(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, ×tamp_hz));
|
||||
timestamp_rate_ = (freq_t)1000000000 / (freq_t)timestamp_hz;
|
||||
}
|
||||
}
|
||||
|
||||
// Returns HSA runtime timestamp rate
|
||||
freq_t timestamp_rate() const { return timestamp_rate_; }
|
||||
|
||||
// Convert a given timestamp to ns
|
||||
timestamp_t timestamp_to_ns(const timestamp_t ×tamp) const {
|
||||
return timestamp_t((freq_t)timestamp * timestamp_rate_);
|
||||
}
|
||||
|
||||
// Return timestamp in 'ns'
|
||||
timestamp_t timestamp_ns() const {
|
||||
timestamp_t timestamp;
|
||||
HSART_CALL(hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP, ×tamp));
|
||||
return timestamp_to_ns(timestamp);
|
||||
}
|
||||
timestamp_t timestamp_fn_ns() const {
|
||||
timestamp_t timestamp;
|
||||
HSART_CALL(hsa_system_get_info_fn(HSA_SYSTEM_INFO_TIMESTAMP, ×tamp));
|
||||
return timestamp_to_ns(timestamp);
|
||||
}
|
||||
|
||||
Timer(hsa_system_get_info_fn_t f = NULL) {
|
||||
if (f != NULL) init(f);
|
||||
else init(hsa_system_get_info);
|
||||
}
|
||||
|
||||
private:
|
||||
// hsa_system_get_info function
|
||||
hsa_system_get_info_fn_t hsa_system_get_info_fn;
|
||||
// Timestamp rate
|
||||
freq_t timestamp_rate_;
|
||||
};
|
||||
|
||||
class TimerFactory {
|
||||
public:
|
||||
typedef std::mutex mutex_t;
|
||||
|
||||
static Timer* Create(Timer::hsa_system_get_info_fn_t f = NULL) {
|
||||
if (instance_ == NULL) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
if (instance_ == NULL) instance_ = new Timer(f);
|
||||
}
|
||||
return instance_;
|
||||
}
|
||||
|
||||
static Timer& Instance() {
|
||||
return *instance_;
|
||||
}
|
||||
|
||||
private:
|
||||
static Timer* instance_;
|
||||
static mutex_t mutex_;
|
||||
};
|
||||
|
||||
} // namespace hsa_rt_utils
|
||||
|
||||
#endif // INC_ROCTRACER_HSA_RT_UTILS_HPP_
|
||||
@@ -41,9 +41,7 @@
|
||||
#include "core/loader.h"
|
||||
#include "core/memory_pool.h"
|
||||
#include "core/tracker.h"
|
||||
#include "ext/hsa_rt_utils.hpp"
|
||||
#include "util/exception.h"
|
||||
#include "util/hsa_rsrc_factory.h"
|
||||
#include "util/logger.h"
|
||||
|
||||
#define PUBLIC_API __attribute__((visibility("default")))
|
||||
@@ -98,78 +96,50 @@ mark_api_callback_t* mark_api_callback_ptr = NULL;
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Internal library methods
|
||||
//
|
||||
namespace rocprofiler {
|
||||
decltype(hsa_queue_create)* hsa_queue_create_fn;
|
||||
decltype(hsa_queue_destroy)* hsa_queue_destroy_fn;
|
||||
|
||||
decltype(hsa_signal_store_relaxed)* hsa_signal_store_relaxed_fn;
|
||||
decltype(hsa_signal_store_relaxed)* hsa_signal_store_screlease_fn;
|
||||
|
||||
decltype(hsa_queue_load_write_index_relaxed)* hsa_queue_load_write_index_relaxed_fn;
|
||||
decltype(hsa_queue_store_write_index_relaxed)* hsa_queue_store_write_index_relaxed_fn;
|
||||
decltype(hsa_queue_load_read_index_relaxed)* hsa_queue_load_read_index_relaxed_fn;
|
||||
|
||||
decltype(hsa_queue_load_write_index_scacquire)* hsa_queue_load_write_index_scacquire_fn;
|
||||
decltype(hsa_queue_store_write_index_screlease)* hsa_queue_store_write_index_screlease_fn;
|
||||
decltype(hsa_queue_load_read_index_scacquire)* hsa_queue_load_read_index_scacquire_fn;
|
||||
|
||||
decltype(hsa_amd_queue_intercept_create)* hsa_amd_queue_intercept_create_fn;
|
||||
decltype(hsa_amd_queue_intercept_register)* hsa_amd_queue_intercept_register_fn;
|
||||
|
||||
decltype(hsa_amd_memory_async_copy)* hsa_amd_memory_async_copy_fn;
|
||||
decltype(hsa_amd_memory_async_copy_rect)* hsa_amd_memory_async_copy_rect_fn;
|
||||
namespace roctracer {
|
||||
decltype(hsa_system_get_info)* hsa_system_get_info_fn = hsa_system_get_info;
|
||||
decltype(hsa_amd_memory_async_copy)* hsa_amd_memory_async_copy_fn = hsa_amd_memory_async_copy;
|
||||
decltype(hsa_amd_memory_async_copy_rect)* hsa_amd_memory_async_copy_rect_fn =
|
||||
hsa_amd_memory_async_copy_rect;
|
||||
|
||||
::HsaApiTable* kHsaApiTable;
|
||||
|
||||
void SaveHsaApi(::HsaApiTable* table) {
|
||||
util::HsaRsrcFactory::InitHsaApiTable(table);
|
||||
|
||||
kHsaApiTable = table;
|
||||
hsa_queue_create_fn = table->core_->hsa_queue_create_fn;
|
||||
hsa_queue_destroy_fn = table->core_->hsa_queue_destroy_fn;
|
||||
|
||||
hsa_signal_store_relaxed_fn = table->core_->hsa_signal_store_relaxed_fn;
|
||||
hsa_signal_store_screlease_fn = table->core_->hsa_signal_store_screlease_fn;
|
||||
|
||||
hsa_queue_load_write_index_relaxed_fn = table->core_->hsa_queue_load_write_index_relaxed_fn;
|
||||
hsa_queue_store_write_index_relaxed_fn = table->core_->hsa_queue_store_write_index_relaxed_fn;
|
||||
hsa_queue_load_read_index_relaxed_fn = table->core_->hsa_queue_load_read_index_relaxed_fn;
|
||||
|
||||
hsa_queue_load_write_index_scacquire_fn = table->core_->hsa_queue_load_write_index_scacquire_fn;
|
||||
hsa_queue_store_write_index_screlease_fn = table->core_->hsa_queue_store_write_index_screlease_fn;
|
||||
hsa_queue_load_read_index_scacquire_fn = table->core_->hsa_queue_load_read_index_scacquire_fn;
|
||||
|
||||
hsa_amd_queue_intercept_create_fn = table->amd_ext_->hsa_amd_queue_intercept_create_fn;
|
||||
hsa_amd_queue_intercept_register_fn = table->amd_ext_->hsa_amd_queue_intercept_register_fn;
|
||||
hsa_system_get_info_fn = table->core_->hsa_system_get_info_fn;
|
||||
hsa_amd_memory_async_copy_fn = table->amd_ext_->hsa_amd_memory_async_copy_fn;
|
||||
hsa_amd_memory_async_copy_rect_fn = table->amd_ext_->hsa_amd_memory_async_copy_rect_fn;
|
||||
}
|
||||
|
||||
void RestoreHsaApi() {
|
||||
::HsaApiTable* table = kHsaApiTable;
|
||||
table->core_->hsa_queue_create_fn = hsa_queue_create_fn;
|
||||
table->core_->hsa_queue_destroy_fn = hsa_queue_destroy_fn;
|
||||
|
||||
table->core_->hsa_signal_store_relaxed_fn = hsa_signal_store_relaxed_fn;
|
||||
table->core_->hsa_signal_store_screlease_fn = hsa_signal_store_screlease_fn;
|
||||
|
||||
table->core_->hsa_queue_load_write_index_relaxed_fn = hsa_queue_load_write_index_relaxed_fn;
|
||||
table->core_->hsa_queue_store_write_index_relaxed_fn = hsa_queue_store_write_index_relaxed_fn;
|
||||
table->core_->hsa_queue_load_read_index_relaxed_fn = hsa_queue_load_read_index_relaxed_fn;
|
||||
|
||||
table->core_->hsa_queue_load_write_index_scacquire_fn = hsa_queue_load_write_index_scacquire_fn;
|
||||
table->core_->hsa_queue_store_write_index_screlease_fn = hsa_queue_store_write_index_screlease_fn;
|
||||
table->core_->hsa_queue_load_read_index_scacquire_fn = hsa_queue_load_read_index_scacquire_fn;
|
||||
|
||||
table->amd_ext_->hsa_amd_queue_intercept_create_fn = hsa_amd_queue_intercept_create_fn;
|
||||
table->amd_ext_->hsa_amd_queue_intercept_register_fn = hsa_amd_queue_intercept_register_fn;
|
||||
}
|
||||
table->core_->hsa_system_get_info_fn = hsa_system_get_info_fn;
|
||||
table->amd_ext_->hsa_amd_memory_async_copy_fn = hsa_amd_memory_async_copy_fn;
|
||||
table->amd_ext_->hsa_amd_memory_async_copy_rect_fn = hsa_amd_memory_async_copy_rect_fn;
|
||||
}
|
||||
|
||||
namespace roctracer {
|
||||
// timestamp definitino
|
||||
typedef hsa_rt_utils::Timer::timestamp_t timestamp_t;
|
||||
|
||||
decltype(hsa_amd_memory_async_copy)* hsa_amd_memory_async_copy_fn;
|
||||
decltype(hsa_amd_memory_async_copy_rect)* hsa_amd_memory_async_copy_rect_fn;
|
||||
namespace util {
|
||||
|
||||
uint64_t timestamp_ns() {
|
||||
uint64_t sysclock;
|
||||
|
||||
hsa_status_t status = roctracer::hsa_system_get_info_fn(HSA_SYSTEM_INFO_TIMESTAMP, &sysclock);
|
||||
if (status == HSA_STATUS_ERROR_NOT_INITIALIZED) return 0;
|
||||
CHECK_STATUS("hsa_system_get_info()", status);
|
||||
|
||||
static uint64_t sysclock_period = []() {
|
||||
uint64_t sysclock_hz = 0;
|
||||
hsa_status_t status =
|
||||
roctracer::hsa_system_get_info_fn(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sysclock_hz);
|
||||
CHECK_STATUS("hsa_system_get_info()", status);
|
||||
return (uint64_t)1000000000 / sysclock_hz;
|
||||
}();
|
||||
|
||||
return sysclock * sysclock_period;
|
||||
}
|
||||
|
||||
} // namespace util
|
||||
|
||||
typedef decltype(roctracer_enable_op_callback)* roctracer_enable_op_callback_t;
|
||||
typedef decltype(roctracer_disable_op_callback)* roctracer_disable_op_callback_t;
|
||||
@@ -347,7 +317,6 @@ inline uint32_t HipActActivityDisableCheck(uint32_t op) {
|
||||
|
||||
void* HIP_SyncApiDataCallback(uint32_t op_id, roctracer_record_t* record, const void* callback_data,
|
||||
void* arg) {
|
||||
static hsa_rt_utils::Timer timer;
|
||||
if (record_pair_stack == NULL) record_pair_stack = new record_pair_stack_t;
|
||||
|
||||
void* ret = NULL;
|
||||
@@ -399,15 +368,14 @@ void* HIP_SyncApiDataCallback(uint32_t op_id, roctracer_record_t* record, const
|
||||
"correlation_id(%lu) time_ns(%lu)\n",
|
||||
roctracer_op_string(ACTIVITY_DOMAIN_HIP_API, op_id, 0), phase, op_id, record, data, pool,
|
||||
(int)(record_pair_stack->size()), (data_ptr) ? data_ptr->correlation_id : 0,
|
||||
timer.timestamp_ns());
|
||||
util::timestamp_ns());
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void* HIP_SyncActivityCallback(uint32_t op_id, roctracer_record_t* record,
|
||||
const void* callback_data, void* arg) {
|
||||
static hsa_rt_utils::Timer timer;
|
||||
const timestamp_t timestamp_ns = timer.timestamp_ns();
|
||||
const uint64_t timestamp_ns = util::timestamp_ns();
|
||||
if (record_pair_stack == NULL) record_pair_stack = new record_pair_stack_t;
|
||||
|
||||
void* ret = NULL;
|
||||
@@ -1205,7 +1173,7 @@ PUBLIC_API void roctracer_stop() {
|
||||
|
||||
PUBLIC_API roctracer_status_t roctracer_get_timestamp(uint64_t* timestamp) {
|
||||
API_METHOD_PREFIX
|
||||
*timestamp = util::HsaRsrcFactory::Instance().TimestampNs();
|
||||
*timestamp = roctracer::util::timestamp_ns();
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
@@ -1220,14 +1188,6 @@ PUBLIC_API roctracer_status_t roctracer_set_properties(roctracer_domain_t domain
|
||||
reinterpret_cast<roctracer::hsa_ops_properties_t*>(properties);
|
||||
HsaApiTable* table = reinterpret_cast<HsaApiTable*>(ops_properties->table);
|
||||
|
||||
#if 0
|
||||
// HSA dispatches intercepting
|
||||
rocprofiler::SaveHsaApi(table);
|
||||
rocprofiler::ProxyQueue::InitFactory();
|
||||
rocprofiler::ProxyQueue::HsaIntercept(table);
|
||||
rocprofiler::InterceptQueue::HsaIntercept(table);
|
||||
#endif
|
||||
|
||||
// HSA async-copy tracing
|
||||
[[maybe_unused]] hsa_status_t status = hsa_amd_profiling_async_copy_enable(true);
|
||||
assert(status == HSA_STATUS_SUCCESS && "hsa_amd_profiling_async_copy_enable failed");
|
||||
@@ -1320,9 +1280,18 @@ CONSTRUCTOR_API void constructor() {
|
||||
DESTRUCTOR_API void destructor() {
|
||||
ONLOAD_TRACE_BEG();
|
||||
roctracer_unload();
|
||||
util::HsaRsrcFactory::Destroy();
|
||||
roctracer::util::Logger::Destroy();
|
||||
ONLOAD_TRACE_END();
|
||||
}
|
||||
|
||||
// HSA-runtime tool on-load method
|
||||
extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
|
||||
uint64_t failed_tool_count,
|
||||
const char* const* failed_tool_names) {
|
||||
roctracer::SaveHsaApi(table);
|
||||
return true;
|
||||
}
|
||||
|
||||
extern "C" PUBLIC_API void OnUnload() {}
|
||||
|
||||
} // extern "C"
|
||||
|
||||
@@ -35,8 +35,6 @@
|
||||
namespace roctracer {
|
||||
class Tracker {
|
||||
public:
|
||||
typedef ::util::HsaRsrcFactory::timestamp_t timestamp_t;
|
||||
|
||||
enum { ENTRY_INV = 0, ENTRY_INIT = 1, ENTRY_COMPL = 2 };
|
||||
|
||||
enum entry_type_t {
|
||||
@@ -50,10 +48,8 @@ class Tracker {
|
||||
struct entry_t {
|
||||
std::atomic<uint32_t> valid;
|
||||
entry_type_t type;
|
||||
uint64_t dispatch;
|
||||
uint64_t begin; // kernel begin timestamp, ns
|
||||
uint64_t end; // kernel end timestamp, ns
|
||||
uint64_t complete;
|
||||
uint64_t begin; // begin timestamp, ns
|
||||
uint64_t end; // end timestamp, ns
|
||||
hsa_agent_t agent;
|
||||
uint32_t dev_index;
|
||||
hsa_signal_t orig;
|
||||
@@ -75,14 +71,12 @@ class Tracker {
|
||||
inline static void Enable(entry_type_t type, const hsa_agent_t& agent, const hsa_signal_t& signal,
|
||||
entry_t* entry) {
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
::util::HsaRsrcFactory* hsa_rsrc = &(::util::HsaRsrcFactory::Instance());
|
||||
|
||||
// Creating a new tracker entry
|
||||
entry->type = type;
|
||||
entry->agent = agent;
|
||||
entry->dev_index = 0; // hsa_rsrc->GetAgentInfo(agent)->dev_index;
|
||||
entry->orig = signal;
|
||||
entry->dispatch = hsa_rsrc->TimestampNs();
|
||||
entry->valid.store(ENTRY_INIT, std::memory_order_release);
|
||||
|
||||
// Creating a proxy signal
|
||||
@@ -104,27 +98,32 @@ class Tracker {
|
||||
private:
|
||||
// Entry completion
|
||||
inline static void Complete(hsa_signal_value_t signal_value, entry_t* entry) {
|
||||
// Query begin/end and complete timestamps
|
||||
::util::HsaRsrcFactory* hsa_rsrc = &(::util::HsaRsrcFactory::Instance());
|
||||
static uint64_t sysclock_period = []() {
|
||||
uint64_t sysclock_hz = 0;
|
||||
hsa_status_t status = hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sysclock_hz);
|
||||
if (status != HSA_STATUS_SUCCESS)
|
||||
EXC_RAISING(ROCTRACER_STATUS_ERROR, "hsa_system_get_info failed");
|
||||
return (uint64_t)1000000000 / sysclock_hz;
|
||||
}();
|
||||
|
||||
if (entry->type == COPY_ENTRY_TYPE) {
|
||||
hsa_amd_profiling_async_copy_time_t async_copy_time{};
|
||||
hsa_status_t status = hsa_amd_profiling_get_async_copy_time(entry->signal, &async_copy_time);
|
||||
if (status != HSA_STATUS_SUCCESS)
|
||||
EXC_RAISING(ROCTRACER_STATUS_ERROR, "hsa_amd_profiling_get_async_copy_time failed");
|
||||
entry->begin = hsa_rsrc->SysclockToNs(async_copy_time.start);
|
||||
entry->end = hsa_rsrc->SysclockToNs(async_copy_time.end);
|
||||
entry->begin = async_copy_time.start * sysclock_period;
|
||||
entry->end = async_copy_time.end * sysclock_period;
|
||||
} else {
|
||||
hsa_amd_profiling_dispatch_time_t dispatch_time{};
|
||||
hsa_status_t status =
|
||||
hsa_amd_profiling_get_dispatch_time(entry->agent, entry->signal, &dispatch_time);
|
||||
if (status != HSA_STATUS_SUCCESS)
|
||||
EXC_RAISING(ROCTRACER_STATUS_ERROR, "hsa_amd_profiling_get_dispatch_time failed");
|
||||
entry->begin = hsa_rsrc->SysclockToNs(dispatch_time.start);
|
||||
entry->end = hsa_rsrc->SysclockToNs(dispatch_time.end);
|
||||
entry->dev_index = (hsa_rsrc->GetAgentInfo(entry->agent))->dev_index;
|
||||
entry->begin = dispatch_time.start * sysclock_period;
|
||||
entry->end = dispatch_time.end * sysclock_period;
|
||||
entry->dev_index = ::util::HsaRsrcFactory::Instance().GetAgentInfo(entry->agent)->dev_index;
|
||||
}
|
||||
|
||||
entry->complete = hsa_rsrc->TimestampNs();
|
||||
hsa_signal_t orig = entry->orig;
|
||||
hsa_signal_t signal = entry->signal;
|
||||
|
||||
|
||||
@@ -42,6 +42,9 @@ fi
|
||||
if [ -n "$ROCTRACER_LIB_PATH" ] ; then
|
||||
export LD_LIBRARY_PATH=$ROCTRACER_LIB_PATH
|
||||
fi
|
||||
if [ -z "$ROCTRACER_LIB_PATH" ] ; then
|
||||
ROCTRACER_LIB_PATH="."
|
||||
fi
|
||||
if [ -z "$ROCTRACER_TOOL_PATH" ] ; then
|
||||
ROCTRACER_TOOL_PATH="./test"
|
||||
fi
|
||||
@@ -119,7 +122,7 @@ eval_test "standalone HIP MGPU test" "./test/MatrixTranspose_mgpu" MatrixTranspo
|
||||
|
||||
# Tool test
|
||||
# rocTracer/tool is loaded by HSA runtime
|
||||
export HSA_TOOLS_LIB="$ROCTRACER_TOOL_PATH/libtracer_tool.so"
|
||||
export HSA_TOOLS_LIB="$ROCTRACER_LIB_PATH/libroctracer64.so $ROCTRACER_TOOL_PATH/libroctracer_tool.so"
|
||||
|
||||
# SYS test
|
||||
export ROCTRACER_DOMAIN="sys:roctx"
|
||||
@@ -157,11 +160,11 @@ echo "<trace name=\"HSA\"><parameters api=\"hsa_agent_get_info, hsa_amd_memory_p
|
||||
export ROCP_INPUT=input.xml
|
||||
eval_test "tool HSA test input" ./test/hsa/ctrl ctrl_hsa_input_trace
|
||||
|
||||
export HSA_TOOLS_LIB=./test/libhsaco_test.so
|
||||
export HSA_TOOLS_LIB="$ROCTRACER_LIB_PATH/libroctracer64.so ./test/libhsaco_test.so"
|
||||
eval_test "tool HSA codeobj" ./test/MatrixTranspose hsa_co_trace
|
||||
|
||||
export ROCP_TOOL_LIB=./test/libcodeobj_test.so
|
||||
export HSA_TOOLS_LIB=librocprofiler64.so
|
||||
export HSA_TOOLS_LIB="$ROCTRACER_LIB_PATH/libroctracer64.so librocprofiler64.so"
|
||||
eval_test "tool tracer codeobj" ./test/MatrixTranspose code_obj_trace
|
||||
|
||||
#valgrind --leak-check=full $tbin
|
||||
|
||||
@@ -37,7 +37,6 @@
|
||||
#include <roctracer_roctx.h>
|
||||
#include <roctracer_hsa.h>
|
||||
#include <roctracer_hip.h>
|
||||
#include <ext/hsa_rt_utils.hpp>
|
||||
|
||||
#include "src/core/loader.h"
|
||||
#include "test/tool/trace_buffer.h"
|
||||
@@ -97,10 +96,20 @@ inline static void DEBUG_TRACE(const char* fmt, ...) {
|
||||
#define DEBUG_TRACE(...)
|
||||
#endif
|
||||
|
||||
typedef hsa_rt_utils::Timer::timestamp_t timestamp_t;
|
||||
hsa_rt_utils::Timer* timer = NULL;
|
||||
typedef uint64_t timestamp_t;
|
||||
thread_local timestamp_t hsa_begin_timestamp = 0;
|
||||
thread_local timestamp_t hip_begin_timestamp = 0;
|
||||
|
||||
namespace util {
|
||||
|
||||
inline timestamp_t timestamp_ns() {
|
||||
timestamp_t timestamp;
|
||||
ROCTRACER_CALL(roctracer_get_timestamp(×tamp));
|
||||
return timestamp;
|
||||
}
|
||||
|
||||
} // namespace util
|
||||
|
||||
bool trace_roctx = false;
|
||||
bool trace_hsa_api = false;
|
||||
bool trace_hsa_activity = false;
|
||||
@@ -251,7 +260,7 @@ static inline void roctx_callback_fun(uint32_t domain, uint32_t cid, uint32_t ti
|
||||
roctx_range_id_t rid, const char* message) {
|
||||
roctx_trace_entry_t* entry = roctx_trace_buffer->GetEntry();
|
||||
entry->cid = cid;
|
||||
entry->time = timer->timestamp_fn_ns();
|
||||
entry->time = util::timestamp_ns();
|
||||
entry->pid = GetPid();
|
||||
entry->tid = tid;
|
||||
entry->rid = rid;
|
||||
@@ -314,10 +323,10 @@ void hsa_api_callback(uint32_t domain, uint32_t cid, const void* callback_data,
|
||||
(void)arg;
|
||||
const hsa_api_data_t* data = reinterpret_cast<const hsa_api_data_t*>(callback_data);
|
||||
if (data->phase == ACTIVITY_API_PHASE_ENTER) {
|
||||
hsa_begin_timestamp = timer->timestamp_fn_ns();
|
||||
hsa_begin_timestamp = util::timestamp_ns();
|
||||
} else {
|
||||
const timestamp_t end_timestamp =
|
||||
(cid == HSA_API_ID_hsa_shut_down) ? hsa_begin_timestamp : timer->timestamp_fn_ns();
|
||||
(cid == HSA_API_ID_hsa_shut_down) ? hsa_begin_timestamp : util::timestamp_ns();
|
||||
hsa_api_trace_entry_t* entry = hsa_api_trace_buffer->GetEntry();
|
||||
entry->cid = cid;
|
||||
entry->begin = hsa_begin_timestamp;
|
||||
@@ -368,7 +377,7 @@ static inline bool is_hip_kernel_launch_api(const uint32_t& cid) {
|
||||
void hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg) {
|
||||
(void)arg;
|
||||
const hip_api_data_t* data = reinterpret_cast<const hip_api_data_t*>(callback_data);
|
||||
const timestamp_t timestamp = timer->timestamp_fn_ns();
|
||||
const timestamp_t timestamp = util::timestamp_ns();
|
||||
hip_api_trace_entry_t* entry = NULL;
|
||||
|
||||
if (data->phase == ACTIVITY_API_PHASE_ENTER) {
|
||||
@@ -445,7 +454,7 @@ void mark_api_callback(uint32_t domain, uint32_t cid, const void* callback_data,
|
||||
(void)arg;
|
||||
const char* name = reinterpret_cast<const char*>(callback_data);
|
||||
|
||||
const timestamp_t timestamp = timer->timestamp_fn_ns();
|
||||
const timestamp_t timestamp = util::timestamp_ns();
|
||||
hip_api_trace_entry_t* entry = hip_api_trace_buffer->GetEntry();
|
||||
entry->cid = 0;
|
||||
entry->domain = domain;
|
||||
@@ -930,8 +939,6 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
|
||||
const char* const* failed_tool_names) {
|
||||
ONLOAD_TRACE_BEG();
|
||||
|
||||
timer = new hsa_rt_utils::Timer(table->core_->hsa_system_get_info_fn);
|
||||
|
||||
const char* output_prefix = getenv("ROCP_OUTPUT_DIR");
|
||||
|
||||
// Dumping HSA handles for agents
|
||||
@@ -959,7 +966,7 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
|
||||
|
||||
// App begin timestamp begin_ts_file.txt
|
||||
begin_ts_file_handle = open_output_file(output_prefix, "begin_ts_file.txt");
|
||||
const timestamp_t app_start_time = timer->timestamp_fn_ns();
|
||||
const timestamp_t app_start_time = util::timestamp_ns();
|
||||
fprintf(begin_ts_file_handle, "%lu\n", app_start_time);
|
||||
|
||||
// Enable HSA API callbacks/activity
|
||||
|
||||
Reference in New Issue
Block a user