SWDEV-351980 - Don't allocate hip_api_data and record
The HIP runtime is now allocating the hip_api_data and record on its
stack so we don't need the thread local record_data_pair stack anymore.
Refactor the API callback function to handle both the case where
synchronous user callbacks are requested and the case where asynchronous
records are requested (enable_callback & enable_activity respectively).
If the callback argument (memory pool) is not null, then activity
records are requested.
Remove CorrelationIdRegister and CorrelationIdLookup. These were used
by the HIP runtime to associate a HIP record id to a ROCtracer
correlation id. Instead, the HIP runtime is now using the correlation
ID returned in the hip_api_data_t.
Added a test to check enabling/disabling concurrent callbacks and
activities.
Change-Id: I5850cfead9861eb3602a3e8fcb7b22580d5fc979
[ROCm/roctracer commit: 88c6e0a700]
这个提交包含在:
@@ -96,10 +96,9 @@ typedef struct activity_record_s {
|
||||
} activity_record_t;
|
||||
|
||||
/* Activity sync callback type */
|
||||
typedef void* (*activity_sync_callback_t)(uint32_t cid, activity_record_t* record, const void* data,
|
||||
void* arg);
|
||||
typedef void (*activity_sync_callback_t)(uint32_t cid, activity_record_t* record, const void* data,
|
||||
void* arg);
|
||||
/* Activity async callback type */
|
||||
typedef void (*activity_id_callback_t)(activity_correlation_id_t id);
|
||||
typedef void (*activity_async_callback_t)(uint32_t op, void* record, void* arg);
|
||||
|
||||
#endif /* EXT_PROF_PROTOCOL_H_ */
|
||||
|
||||
@@ -50,7 +50,7 @@ extern "C" {
|
||||
// Traced calls ID enumeration
|
||||
typedef enum hip_api_id_t roctracer_hip_api_cid_t;
|
||||
|
||||
typedef void(hipInitAsyncActivityCallback_t)(void* id_callback, void* op_callback, void* arg);
|
||||
typedef void(hipInitAsyncActivityCallback_t)(void* op_callback, void* arg);
|
||||
typedef bool(hipEnableAsyncActivityCallback_t)(unsigned op, bool enable);
|
||||
typedef const char*(hipGetOpName_t)(unsigned op);
|
||||
|
||||
|
||||
@@ -91,7 +91,14 @@
|
||||
#define ONLOAD_TRACE_BEG() ONLOAD_TRACE("begin")
|
||||
#define ONLOAD_TRACE_END() ONLOAD_TRACE("end")
|
||||
|
||||
static inline uint32_t GetPid() { return syscall(__NR_getpid); }
|
||||
static inline uint32_t GetPid() {
|
||||
static auto pid = syscall(__NR_getpid);
|
||||
return pid;
|
||||
}
|
||||
static inline uint32_t GetTid() {
|
||||
static thread_local auto tid = syscall(__NR_gettid);
|
||||
return tid;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Internal library methods
|
||||
@@ -159,47 +166,14 @@ static auto NextCorrelationId() {
|
||||
return counter.fetch_add(1, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
// Records storage
|
||||
struct RecordDataPair {
|
||||
roctracer_record_t record;
|
||||
union {
|
||||
hip_api_data_t data;
|
||||
};
|
||||
RecordDataPair() {}
|
||||
};
|
||||
static thread_local std::stack<RecordDataPair> record_data_pair_stack;
|
||||
|
||||
// Correlation id storage
|
||||
static thread_local activity_correlation_id_t correlation_id_tls = 0;
|
||||
static std::map<activity_correlation_id_t, activity_correlation_id_t> correlation_id_map{};
|
||||
std::mutex correlation_id_mutex;
|
||||
|
||||
static thread_local std::stack<activity_correlation_id_t> external_id_stack;
|
||||
|
||||
static inline void CorrelationIdRegister(activity_correlation_id_t correlation_id) {
|
||||
std::lock_guard lock(correlation_id_mutex);
|
||||
[[maybe_unused]] const auto ret = correlation_id_map.insert({correlation_id, correlation_id_tls});
|
||||
assert(ret.second && "HIP activity id is not unique");
|
||||
|
||||
DEBUG_TRACE("CorrelationIdRegister id(%lu) id_tls(%lu)\n", correlation_id, correlation_id_tls);
|
||||
}
|
||||
|
||||
static inline activity_correlation_id_t CorrelationIdLookup(
|
||||
activity_correlation_id_t correlation_id) {
|
||||
std::lock_guard lock(correlation_id_mutex);
|
||||
auto it = correlation_id_map.find(correlation_id);
|
||||
assert(it != correlation_id_map.end() && "HIP activity id lookup failed");
|
||||
const activity_correlation_id_t ret_val = it->second;
|
||||
correlation_id_map.erase(it);
|
||||
|
||||
DEBUG_TRACE("CorrelationIdLookup id(%lu) ret(%lu)\n", correlation_id, ret_val);
|
||||
|
||||
return ret_val;
|
||||
}
|
||||
|
||||
std::mutex hip_activity_mutex;
|
||||
|
||||
enum { API_CB_MASK = 0x1, ACT_CB_MASK = 0x2 };
|
||||
enum { API_CB_MASK = 0x1, API_ACT_MASK = 0x2 };
|
||||
|
||||
class HIPActivityCallbackTracker {
|
||||
public:
|
||||
@@ -212,181 +186,82 @@ class HIPActivityCallbackTracker {
|
||||
|
||||
static HIPActivityCallbackTracker hip_act_cb_tracker;
|
||||
|
||||
inline uint32_t HipApiActivityEnableCheck(uint32_t op) {
|
||||
inline uint32_t HipApiCallbackEnableCheck(uint32_t op) {
|
||||
const uint32_t mask = hip_act_cb_tracker.enable_check(op, API_CB_MASK);
|
||||
const uint32_t ret = (mask & ACT_CB_MASK);
|
||||
const uint32_t ret = (mask & API_ACT_MASK);
|
||||
return ret;
|
||||
}
|
||||
|
||||
inline uint32_t HipApiActivityDisableCheck(uint32_t op) {
|
||||
inline uint32_t HipApiCallbackDisableCheck(uint32_t op) {
|
||||
const uint32_t mask = hip_act_cb_tracker.disable_check(op, API_CB_MASK);
|
||||
const uint32_t ret = (mask & ACT_CB_MASK);
|
||||
const uint32_t ret = (mask & API_ACT_MASK);
|
||||
return ret;
|
||||
}
|
||||
|
||||
inline uint32_t HipActActivityEnableCheck(uint32_t op) {
|
||||
hip_act_cb_tracker.enable_check(op, ACT_CB_MASK);
|
||||
inline uint32_t HipApiActivityEnableCheck(uint32_t op) {
|
||||
hip_act_cb_tracker.enable_check(op, API_ACT_MASK);
|
||||
return 0;
|
||||
}
|
||||
|
||||
inline uint32_t HipActActivityDisableCheck(uint32_t op) {
|
||||
const uint32_t mask = hip_act_cb_tracker.disable_check(op, ACT_CB_MASK);
|
||||
inline uint32_t HipApiActivityDisableCheck(uint32_t op) {
|
||||
const uint32_t mask = hip_act_cb_tracker.disable_check(op, API_ACT_MASK);
|
||||
const uint32_t ret = (mask & API_CB_MASK);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void* HIP_SyncApiDataCallback(uint32_t op_id, roctracer_record_t* record, const void* callback_data,
|
||||
void* arg) {
|
||||
void* ret = nullptr;
|
||||
const hip_api_data_t* data = reinterpret_cast<const hip_api_data_t*>(callback_data);
|
||||
hip_api_data_t* data_ptr = const_cast<hip_api_data_t*>(data);
|
||||
MemoryPool* pool = reinterpret_cast<MemoryPool*>(arg);
|
||||
void HIP_ApiCallback(uint32_t op_id, roctracer_record_t* record, void* callback_data, void* arg) {
|
||||
hip_api_data_t* data = static_cast<hip_api_data_t*>(callback_data);
|
||||
MemoryPool* pool = static_cast<MemoryPool*>(arg);
|
||||
|
||||
int phase = ACTIVITY_API_PHASE_ENTER;
|
||||
if (record != nullptr) {
|
||||
assert(data != nullptr && "ActivityCallback: data is NULL");
|
||||
phase = data->phase;
|
||||
} else if (pool != nullptr) {
|
||||
phase = ACTIVITY_API_PHASE_EXIT;
|
||||
}
|
||||
if (data->phase == ACTIVITY_API_PHASE_ENTER) {
|
||||
// Generate a new correlation ID.
|
||||
uint64_t correlation_id = NextCorrelationId();
|
||||
data->correlation_id = correlation_id;
|
||||
|
||||
if (phase == ACTIVITY_API_PHASE_ENTER) {
|
||||
// Allocating a record if nullptr passed
|
||||
if (record == nullptr) {
|
||||
assert(data == nullptr && "ActivityCallback enter: record is NULL");
|
||||
data = &record_data_pair_stack.emplace().data;
|
||||
data_ptr = const_cast<hip_api_data_t*>(data);
|
||||
data_ptr->phase = phase;
|
||||
data_ptr->correlation_id = 0;
|
||||
}
|
||||
|
||||
// Correlation ID generating
|
||||
uint64_t correlation_id = data->correlation_id;
|
||||
if (correlation_id == 0) {
|
||||
correlation_id = NextCorrelationId();
|
||||
data_ptr->correlation_id = correlation_id;
|
||||
}
|
||||
|
||||
// Passing correlation ID
|
||||
// Record the correlation ID in a TLS variable so that it can be passed
|
||||
// to an asynchronous activity started before the API function returns.
|
||||
correlation_id_tls = correlation_id;
|
||||
|
||||
ret = data_ptr;
|
||||
if (pool != nullptr) {
|
||||
// Filing record info
|
||||
record->domain = ACTIVITY_DOMAIN_HIP_API;
|
||||
record->kind = 0;
|
||||
record->op = op_id;
|
||||
record->process_id = GetPid();
|
||||
record->thread_id = GetTid();
|
||||
record->begin_ns = util::timestamp_ns();
|
||||
record->correlation_id = correlation_id;
|
||||
}
|
||||
} else {
|
||||
// popping the record entry
|
||||
assert(!record_data_pair_stack.empty() &&
|
||||
"HIP_SyncApiDataCallback exit: record stack is empty");
|
||||
record_data_pair_stack.pop();
|
||||
if (pool != nullptr) {
|
||||
if (!external_id_stack.empty()) {
|
||||
roctracer_record_t ext_record{};
|
||||
ext_record.domain = ACTIVITY_DOMAIN_EXT_API;
|
||||
ext_record.op = ACTIVITY_EXT_OP_EXTERN_ID;
|
||||
ext_record.correlation_id = record->correlation_id;
|
||||
ext_record.external_id = external_id_stack.top();
|
||||
pool->Write(ext_record);
|
||||
}
|
||||
|
||||
// Clearing correlation ID
|
||||
// Write record to the buffer
|
||||
record->end_ns = util::timestamp_ns();
|
||||
pool->Write(*record);
|
||||
}
|
||||
// Clear correlation ID
|
||||
correlation_id_tls = 0;
|
||||
}
|
||||
|
||||
DEBUG_TRACE(
|
||||
"HIP_SyncApiDataCallback(\"%s\") phase(%d): op(%u) record(%p) data(%p) pool(%p) depth(%d) "
|
||||
"correlation_id(%lu) time_ns(%lu)\n",
|
||||
roctracer_op_string(ACTIVITY_DOMAIN_HIP_API, op_id, 0), phase, op_id, record, data, pool,
|
||||
(int)(record_data_pair_stack.size()), (data_ptr) ? data_ptr->correlation_id : 0,
|
||||
util::timestamp_ns());
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void* HIP_SyncActivityCallback(uint32_t op_id, roctracer_record_t* record,
|
||||
const void* callback_data, void* arg) {
|
||||
const roctracer_timestamp_t timestamp_ns = util::timestamp_ns();
|
||||
void* ret = nullptr;
|
||||
const hip_api_data_t* data = reinterpret_cast<const hip_api_data_t*>(callback_data);
|
||||
hip_api_data_t* data_ptr = const_cast<hip_api_data_t*>(data);
|
||||
MemoryPool* pool = reinterpret_cast<MemoryPool*>(arg);
|
||||
|
||||
int phase = ACTIVITY_API_PHASE_ENTER;
|
||||
if (record != nullptr) {
|
||||
assert(data != nullptr && "ActivityCallback: data is NULL");
|
||||
phase = data->phase;
|
||||
} else if (pool != nullptr) {
|
||||
phase = ACTIVITY_API_PHASE_EXIT;
|
||||
}
|
||||
|
||||
if (phase == ACTIVITY_API_PHASE_ENTER) {
|
||||
// Allocating a record if nullptr passed
|
||||
if (record == nullptr) {
|
||||
assert(data == nullptr && "ActivityCallback enter: record is NULL");
|
||||
auto& top = record_data_pair_stack.emplace();
|
||||
record = &(top.record);
|
||||
data = &(top.data);
|
||||
data_ptr = const_cast<hip_api_data_t*>(data);
|
||||
data_ptr->phase = phase;
|
||||
data_ptr->correlation_id = 0;
|
||||
}
|
||||
|
||||
// Filing record info
|
||||
record->domain = ACTIVITY_DOMAIN_HIP_API;
|
||||
record->op = op_id;
|
||||
record->begin_ns = timestamp_ns;
|
||||
|
||||
// Correlation ID generating
|
||||
uint64_t correlation_id = data->correlation_id;
|
||||
if (correlation_id == 0) {
|
||||
correlation_id = NextCorrelationId();
|
||||
data_ptr->correlation_id = correlation_id;
|
||||
}
|
||||
record->correlation_id = correlation_id;
|
||||
|
||||
// Passing correlation ID
|
||||
correlation_id_tls = correlation_id;
|
||||
|
||||
ret = data_ptr;
|
||||
} else {
|
||||
assert(pool != nullptr && "ActivityCallback exit: pool is NULL");
|
||||
assert(!record_data_pair_stack.empty() && "ActivityCallback exit: record stack is empty");
|
||||
|
||||
// Getting record of stacked
|
||||
if (record == nullptr) record = &record_data_pair_stack.top().record;
|
||||
|
||||
// Filing record info
|
||||
record->end_ns = timestamp_ns;
|
||||
record->process_id = syscall(__NR_getpid);
|
||||
record->thread_id = syscall(__NR_gettid);
|
||||
|
||||
if (!external_id_stack.empty()) {
|
||||
roctracer_record_t ext_record{};
|
||||
ext_record.domain = ACTIVITY_DOMAIN_EXT_API;
|
||||
ext_record.op = ACTIVITY_EXT_OP_EXTERN_ID;
|
||||
ext_record.correlation_id = record->correlation_id;
|
||||
ext_record.external_id = external_id_stack.top();
|
||||
pool->Write(ext_record);
|
||||
}
|
||||
|
||||
// Writing record to the buffer
|
||||
pool->Write(*record);
|
||||
|
||||
// popping the record entry
|
||||
record_data_pair_stack.pop();
|
||||
|
||||
// Clearing correlation ID
|
||||
correlation_id_tls = 0;
|
||||
}
|
||||
|
||||
DEBUG_TRACE(
|
||||
"HIP_SyncActivityCallback(\"%s\") phase(%d): op(%u) record(%p) data(%p) pool(%p) depth(%d) "
|
||||
"HIP_ApiCallback(\"%s\") phase(%d): op(%u) record(%p) data(%p) pool(%p) "
|
||||
"correlation_id(%lu) beg_ns(%lu) end_ns(%lu)\n",
|
||||
roctracer_op_string(ACTIVITY_DOMAIN_HIP_API, op_id, 0), phase, op_id, record, data, pool,
|
||||
(int)(record_data_pair_stack.size()), (data_ptr) ? data_ptr->correlation_id : 0,
|
||||
timestamp_ns);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void HIP_ActivityIdCallback(activity_correlation_id_t correlation_id) {
|
||||
CorrelationIdRegister(correlation_id);
|
||||
roctracer_op_string(ACTIVITY_DOMAIN_HIP_API, op_id, 0), data->phase, op_id, record, data,
|
||||
pool, data->correlation_id, timestamp_ns);
|
||||
}
|
||||
|
||||
void HIP_AsyncActivityCallback(uint32_t op_id, void* record_ptr, void* arg) {
|
||||
MemoryPool* pool = reinterpret_cast<MemoryPool*>(arg);
|
||||
roctracer_record_t record = *reinterpret_cast<roctracer_record_t*>(record_ptr);
|
||||
roctracer_record_t& record = *reinterpret_cast<roctracer_record_t*>(record_ptr);
|
||||
record.domain = ACTIVITY_DOMAIN_HIP_OPS;
|
||||
record.correlation_id = CorrelationIdLookup(record.correlation_id);
|
||||
if (record.correlation_id == 0) return;
|
||||
|
||||
// If the record is for a kernel dispatch, write the kernel name in the pool's data,
|
||||
// and make the record point to it. Older HIP runtimes do not provide a kernel
|
||||
@@ -652,9 +527,9 @@ static void roctracer_enable_callback_fun(roctracer_domain_t domain, uint32_t op
|
||||
if (hip_err != hipSuccess)
|
||||
FATAL_LOGGING("HIP::RegisterApiCallback(" << op << ") error(" << hip_err << ")");
|
||||
|
||||
if (HipApiActivityEnableCheck(op) == 0) {
|
||||
hip_err = HipLoader::Instance().RegisterActivityCallback(op, (void*)HIP_SyncApiDataCallback,
|
||||
(void*)1);
|
||||
if (HipApiCallbackEnableCheck(op) == 0) {
|
||||
hip_err =
|
||||
HipLoader::Instance().RegisterActivityCallback(op, (void*)HIP_ApiCallback, nullptr);
|
||||
if (hip_err != hipSuccess)
|
||||
FATAL_LOGGING("HIPAPI: HIP::RegisterActivityCallback(" << op << ") error(" << hip_err
|
||||
<< ")");
|
||||
@@ -723,7 +598,7 @@ static void roctracer_disable_callback_fun(roctracer_domain_t domain, uint32_t o
|
||||
if (hip_err != hipSuccess)
|
||||
FATAL_LOGGING("HIP::RemoveApiCallback(" << op << "), error(" << hip_err << ")");
|
||||
|
||||
if (HipApiActivityDisableCheck(op) == 0) {
|
||||
if (HipApiCallbackDisableCheck(op) == 0) {
|
||||
const hipError_t hip_err = HipLoader::Instance().RemoveActivityCallback(op);
|
||||
if (hip_err != hipSuccess)
|
||||
FATAL_LOGGING("HIPAPI: HIP::RemoveActivityCallback op(" << op << "), error(" << hip_err
|
||||
@@ -853,8 +728,7 @@ static void roctracer_enable_activity_fun(roctracer_domain_t domain, uint32_t op
|
||||
std::lock_guard lock(hip_activity_mutex);
|
||||
|
||||
if (!HipLoader::Instance().InitActivityDone()) {
|
||||
HipLoader::Instance().InitActivityCallback((void*)HIP_ActivityIdCallback,
|
||||
(void*)HIP_AsyncActivityCallback, (void*)pool);
|
||||
HipLoader::Instance().InitActivityCallback((void*)HIP_AsyncActivityCallback, pool);
|
||||
HipLoader::Instance().InitActivityDone() = true;
|
||||
}
|
||||
if (!HipLoader::Instance().EnableActivityCallback(op, true))
|
||||
@@ -865,9 +739,9 @@ static void roctracer_enable_activity_fun(roctracer_domain_t domain, uint32_t op
|
||||
if (!HipLoader::Instance().Enabled()) break;
|
||||
std::lock_guard lock(hip_activity_mutex);
|
||||
|
||||
if (HipActActivityEnableCheck(op) == 0) {
|
||||
const hipError_t hip_err = HipLoader::Instance().RegisterActivityCallback(
|
||||
op, (void*)HIP_SyncActivityCallback, (void*)pool);
|
||||
if (HipApiActivityEnableCheck(op) == 0) {
|
||||
const hipError_t hip_err =
|
||||
HipLoader::Instance().RegisterActivityCallback(op, (void*)HIP_ApiCallback, pool);
|
||||
if (hip_err != hipSuccess)
|
||||
FATAL_LOGGING("HIP::RegisterActivityCallback(" << op << " error(" << hip_err << ")");
|
||||
}
|
||||
@@ -961,13 +835,13 @@ static void roctracer_disable_activity_fun(roctracer_domain_t domain, uint32_t o
|
||||
if (!HipLoader::Instance().Enabled()) break;
|
||||
std::lock_guard lock(hip_activity_mutex);
|
||||
|
||||
if (HipActActivityDisableCheck(op) == 0) {
|
||||
if (HipApiActivityDisableCheck(op) == 0) {
|
||||
const hipError_t hip_err = HipLoader::Instance().RemoveActivityCallback(op);
|
||||
if (hip_err != hipSuccess)
|
||||
FATAL_LOGGING("HIP::RemoveActivityCallback op(" << op << "), error(" << hip_err << ")");
|
||||
} else {
|
||||
const hipError_t hip_err = HipLoader::Instance().RegisterActivityCallback(
|
||||
op, (void*)HIP_SyncApiDataCallback, (void*)1);
|
||||
const hipError_t hip_err =
|
||||
HipLoader::Instance().RegisterActivityCallback(op, (void*)HIP_ApiCallback, nullptr);
|
||||
if (hip_err != hipSuccess)
|
||||
FATAL_LOGGING("HIPACT: HIP::RegisterActivityCallback(" << op << ") error(" << hip_err
|
||||
<< ")");
|
||||
|
||||
@@ -133,6 +133,12 @@ target_include_directories(memory_pool PRIVATE ${PROJECT_SOURCE_DIR}/src/roctrac
|
||||
target_link_libraries(memory_pool Threads::Threads atomic)
|
||||
add_dependencies(mytest memory_pool)
|
||||
|
||||
## Build the activity_and_callback test
|
||||
set_source_files_properties(directed/activity_and_callback.cpp PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
|
||||
hip_add_executable(activity_and_callback directed/activity_and_callback.cpp)
|
||||
target_link_libraries(activity_and_callback roctracer)
|
||||
add_dependencies(mytest activity_and_callback)
|
||||
|
||||
## Copy the golden traces and test scripts
|
||||
configure_file(run.sh ${PROJECT_BINARY_DIR} COPYONLY)
|
||||
execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink run.sh ${PROJECT_BINARY_DIR}/run_ci.sh)
|
||||
|
||||
@@ -0,0 +1,139 @@
|
||||
/* Copyright (c) 2022 Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <roctracer.h>
|
||||
#define HIP_PROF_HIP_API_STRING 1
|
||||
#include <roctracer_hip.h>
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/syscall.h>
|
||||
|
||||
__global__ void kernel() {}
|
||||
|
||||
template <typename T> inline void CHECK(T status);
|
||||
|
||||
template <> inline void CHECK(hipError_t err) {
|
||||
if (err != hipSuccess) {
|
||||
std::cerr << hipGetErrorString(err) << std::endl;
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
template <> inline void CHECK(roctracer_status_t status) {
|
||||
if (status != ROCTRACER_STATUS_SUCCESS) {
|
||||
std::cerr << roctracer_error_string() << std::endl;
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
uint32_t GetPid() {
|
||||
static auto pid = syscall(__NR_getpid);
|
||||
return pid;
|
||||
}
|
||||
uint32_t GetTid() {
|
||||
static thread_local auto tid = syscall(__NR_gettid);
|
||||
return tid;
|
||||
}
|
||||
|
||||
void hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg) {
|
||||
const hip_api_data_t* data = static_cast<const hip_api_data_t*>(callback_data);
|
||||
fprintf(stdout, "<%s id(%u)\tcorrelation_id(%lu) %s pid(%d) tid(%d)>\n",
|
||||
roctracer_op_string(domain, cid, 0), cid, data->correlation_id,
|
||||
(data->phase == ACTIVITY_API_PHASE_ENTER) ? "on-enter" : "on-exit", GetPid(), GetTid());
|
||||
}
|
||||
|
||||
void buffer_callback(const char* begin, const char* end, void* arg) {
|
||||
for (const roctracer_record_t* record = (const roctracer_record_t*)begin;
|
||||
record < (const roctracer_record_t*)end; CHECK(roctracer_next_record(record, &record))) {
|
||||
fprintf(stdout, "\t%s\tcorrelation_id(%lu) time_ns(%lu:%lu)\n",
|
||||
roctracer_op_string(record->domain, record->op, record->kind), record->correlation_id,
|
||||
record->begin_ns, record->end_ns);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
int main() {
|
||||
CHECK(hipSetDevice(0));
|
||||
|
||||
roctracer_properties_t properties{};
|
||||
properties.buffer_callback_fun = buffer_callback;
|
||||
properties.buffer_callback_arg = nullptr;
|
||||
properties.buffer_size = 1024;
|
||||
CHECK(roctracer_open_pool(&properties));
|
||||
|
||||
// 1: callbacks only
|
||||
CHECK(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HIP_API, hip_api_callback, nullptr));
|
||||
CHECK(hipSetDevice(0));
|
||||
kernel<<<1, 1>>>();
|
||||
CHECK(hipDeviceSynchronize());
|
||||
CHECK(roctracer_flush_activity());
|
||||
|
||||
// 2: callbacks and activities
|
||||
CHECK(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_API));
|
||||
CHECK(hipSetDevice(0));
|
||||
kernel<<<1, 1>>>();
|
||||
CHECK(hipDeviceSynchronize());
|
||||
CHECK(roctracer_flush_activity());
|
||||
|
||||
// 3: activities only
|
||||
CHECK(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HIP_API));
|
||||
CHECK(hipSetDevice(0));
|
||||
kernel<<<1, 1>>>();
|
||||
CHECK(hipDeviceSynchronize());
|
||||
CHECK(roctracer_flush_activity());
|
||||
|
||||
// 4: callbacks only
|
||||
CHECK(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HIP_API, hip_api_callback, nullptr));
|
||||
CHECK(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HIP_API));
|
||||
CHECK(hipSetDevice(0));
|
||||
kernel<<<1, 1>>>();
|
||||
CHECK(hipDeviceSynchronize());
|
||||
CHECK(roctracer_flush_activity());
|
||||
|
||||
// 5: callbacks and activities
|
||||
CHECK(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_API));
|
||||
CHECK(hipSetDevice(0));
|
||||
kernel<<<1, 1>>>();
|
||||
CHECK(hipDeviceSynchronize());
|
||||
CHECK(roctracer_flush_activity());
|
||||
|
||||
// 6: callbacks only
|
||||
CHECK(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HIP_API));
|
||||
CHECK(hipSetDevice(0));
|
||||
kernel<<<1, 1>>>();
|
||||
CHECK(hipDeviceSynchronize());
|
||||
CHECK(roctracer_flush_activity());
|
||||
|
||||
// 7: none
|
||||
CHECK(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HIP_API));
|
||||
CHECK(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HIP_API));
|
||||
CHECK(hipSetDevice(0));
|
||||
kernel<<<1, 1>>>();
|
||||
CHECK(hipDeviceSynchronize());
|
||||
CHECK(roctracer_flush_activity());
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,65 @@
|
||||
<hipSetDevice id(186) correlation_id(1) on-enter pid(877336) tid(877336)>
|
||||
<hipSetDevice id(186) correlation_id(1) on-exit pid(877336) tid(877336)>
|
||||
<__hipPushCallConfiguration id(2) correlation_id(2) on-enter pid(877336) tid(877336)>
|
||||
<__hipPushCallConfiguration id(2) correlation_id(2) on-exit pid(877336) tid(877336)>
|
||||
<__hipPopCallConfiguration id(1) correlation_id(3) on-enter pid(877336) tid(877336)>
|
||||
<__hipPopCallConfiguration id(1) correlation_id(3) on-exit pid(877336) tid(877336)>
|
||||
<hipLaunchKernel id(107) correlation_id(4) on-enter pid(877336) tid(877336)>
|
||||
<hipLaunchKernel id(107) correlation_id(4) on-exit pid(877336) tid(877336)>
|
||||
<hipDeviceSynchronize id(48) correlation_id(5) on-enter pid(877336) tid(877336)>
|
||||
<hipDeviceSynchronize id(48) correlation_id(5) on-exit pid(877336) tid(877336)>
|
||||
<hipSetDevice id(186) correlation_id(6) on-enter pid(877336) tid(877336)>
|
||||
<hipSetDevice id(186) correlation_id(6) on-exit pid(877336) tid(877336)>
|
||||
<__hipPushCallConfiguration id(2) correlation_id(7) on-enter pid(877336) tid(877336)>
|
||||
<__hipPushCallConfiguration id(2) correlation_id(7) on-exit pid(877336) tid(877336)>
|
||||
<__hipPopCallConfiguration id(1) correlation_id(8) on-enter pid(877336) tid(877336)>
|
||||
<__hipPopCallConfiguration id(1) correlation_id(8) on-exit pid(877336) tid(877336)>
|
||||
<hipLaunchKernel id(107) correlation_id(9) on-enter pid(877336) tid(877336)>
|
||||
<hipLaunchKernel id(107) correlation_id(9) on-exit pid(877336) tid(877336)>
|
||||
<hipDeviceSynchronize id(48) correlation_id(10) on-enter pid(877336) tid(877336)>
|
||||
<hipDeviceSynchronize id(48) correlation_id(10) on-exit pid(877336) tid(877336)>
|
||||
hipSetDevice correlation_id(6) time_ns(861794298279896:861794298283613)
|
||||
__hipPushCallConfiguration correlation_id(7) time_ns(861794298290125:861794298293211)
|
||||
__hipPopCallConfiguration correlation_id(8) time_ns(861794298293903:861794298295325)
|
||||
hipLaunchKernel correlation_id(9) time_ns(861794298296377:861794298313029)
|
||||
hipDeviceSynchronize correlation_id(10) time_ns(861794298313470:861794298331113)
|
||||
hipSetDevice correlation_id(11) time_ns(861794298565986:861794298566277)
|
||||
__hipPushCallConfiguration correlation_id(12) time_ns(861794298566738:861794298567148)
|
||||
__hipPopCallConfiguration correlation_id(13) time_ns(861794298567569:861794298568010)
|
||||
hipLaunchKernel correlation_id(14) time_ns(861794298568391:861794298577638)
|
||||
hipDeviceSynchronize correlation_id(15) time_ns(861794298578069:861794298594841)
|
||||
<hipSetDevice id(186) correlation_id(16) on-enter pid(877336) tid(877336)>
|
||||
<hipSetDevice id(186) correlation_id(16) on-exit pid(877336) tid(877336)>
|
||||
<__hipPushCallConfiguration id(2) correlation_id(17) on-enter pid(877336) tid(877336)>
|
||||
<__hipPushCallConfiguration id(2) correlation_id(17) on-exit pid(877336) tid(877336)>
|
||||
<__hipPopCallConfiguration id(1) correlation_id(18) on-enter pid(877336) tid(877336)>
|
||||
<__hipPopCallConfiguration id(1) correlation_id(18) on-exit pid(877336) tid(877336)>
|
||||
<hipLaunchKernel id(107) correlation_id(19) on-enter pid(877336) tid(877336)>
|
||||
<hipLaunchKernel id(107) correlation_id(19) on-exit pid(877336) tid(877336)>
|
||||
<hipDeviceSynchronize id(48) correlation_id(20) on-enter pid(877336) tid(877336)>
|
||||
<hipDeviceSynchronize id(48) correlation_id(20) on-exit pid(877336) tid(877336)>
|
||||
<hipSetDevice id(186) correlation_id(21) on-enter pid(877336) tid(877336)>
|
||||
<hipSetDevice id(186) correlation_id(21) on-exit pid(877336) tid(877336)>
|
||||
<__hipPushCallConfiguration id(2) correlation_id(22) on-enter pid(877336) tid(877336)>
|
||||
<__hipPushCallConfiguration id(2) correlation_id(22) on-exit pid(877336) tid(877336)>
|
||||
<__hipPopCallConfiguration id(1) correlation_id(23) on-enter pid(877336) tid(877336)>
|
||||
<__hipPopCallConfiguration id(1) correlation_id(23) on-exit pid(877336) tid(877336)>
|
||||
<hipLaunchKernel id(107) correlation_id(24) on-enter pid(877336) tid(877336)>
|
||||
<hipLaunchKernel id(107) correlation_id(24) on-exit pid(877336) tid(877336)>
|
||||
<hipDeviceSynchronize id(48) correlation_id(25) on-enter pid(877336) tid(877336)>
|
||||
<hipDeviceSynchronize id(48) correlation_id(25) on-exit pid(877336) tid(877336)>
|
||||
hipSetDevice correlation_id(21) time_ns(861794299364583:861794299365585)
|
||||
__hipPushCallConfiguration correlation_id(22) time_ns(861794299366106:861794299367329)
|
||||
__hipPopCallConfiguration correlation_id(23) time_ns(861794299367830:861794299369082)
|
||||
hipLaunchKernel correlation_id(24) time_ns(861794299369523:861794299377227)
|
||||
hipDeviceSynchronize correlation_id(25) time_ns(861794299377748:861794299394730)
|
||||
<hipSetDevice id(186) correlation_id(26) on-enter pid(877336) tid(877336)>
|
||||
<hipSetDevice id(186) correlation_id(26) on-exit pid(877336) tid(877336)>
|
||||
<__hipPushCallConfiguration id(2) correlation_id(27) on-enter pid(877336) tid(877336)>
|
||||
<__hipPushCallConfiguration id(2) correlation_id(27) on-exit pid(877336) tid(877336)>
|
||||
<__hipPopCallConfiguration id(1) correlation_id(28) on-enter pid(877336) tid(877336)>
|
||||
<__hipPopCallConfiguration id(1) correlation_id(28) on-exit pid(877336) tid(877336)>
|
||||
<hipLaunchKernel id(107) correlation_id(29) on-enter pid(877336) tid(877336)>
|
||||
<hipLaunchKernel id(107) correlation_id(29) on-exit pid(877336) tid(877336)>
|
||||
<hipDeviceSynchronize id(48) correlation_id(30) on-enter pid(877336) tid(877336)>
|
||||
<hipDeviceSynchronize id(48) correlation_id(30) on-exit pid(877336) tid(877336)>
|
||||
@@ -18,5 +18,6 @@ hsa_co_trace --check-none
|
||||
code_obj_trace --check-none
|
||||
trace_buffer --check-none
|
||||
memory_pool --check-none
|
||||
activity_and_callback_trace --check-order .*
|
||||
roctx_test_trace --check-count .*
|
||||
backward_compat_test_trace --check-none
|
||||
backward_compat_test_trace --check-none
|
||||
|
||||
@@ -176,14 +176,16 @@ export ROCP_TOOL_LIB=./test/libcodeobj_test.so
|
||||
export HSA_TOOLS_LIB="librocprofiler64.so"
|
||||
eval_test "tool tracer codeobj" ./test/MatrixTranspose code_obj_trace
|
||||
|
||||
unset LD_PRELOAD
|
||||
#valgrind --leak-check=full $tbin
|
||||
#valgrind --tool=massif $tbin
|
||||
#ms_print massif.out.<N>
|
||||
|
||||
eval_test "directed TraceBuffer test" ./test/trace_buffer trace_buffer
|
||||
eval_test "directed MemoryPool test" ./test/memory_pool memory_pool
|
||||
eval_test "enable/disable callbacks and activities test" ./test/activity_and_callback activity_and_callback_trace
|
||||
|
||||
eval_test "backward compatibilty tests" ./test/backward_compat_test backward_compat_test_trace
|
||||
eval_test "backward compatibility tests" ./test/backward_compat_test backward_compat_test_trace
|
||||
|
||||
echo "$test_number tests total / $test_runnum tests run / $test_status tests failed"
|
||||
if [ $test_status != 0 ] ; then
|
||||
|
||||
在新工单中引用
屏蔽一个用户