@@ -31,6 +31,7 @@ typedef enum {
|
||||
ACTIVITY_DOMAIN_HSA_OPS = 1, // HSA async activity domain
|
||||
ACTIVITY_DOMAIN_HCC_OPS = 2, // HCC async activity domain
|
||||
ACTIVITY_DOMAIN_HIP_API = 3, // HIP API domain
|
||||
ACTIVITY_DOMAIN_HIP_VDI = ACTIVITY_DOMAIN_HCC_OPS, // HIP VDI domain
|
||||
ACTIVITY_DOMAIN_KFD_API = 4, // KFD API domain
|
||||
ACTIVITY_DOMAIN_EXT_API = 5, // External ID domain
|
||||
ACTIVITY_DOMAIN_ROCTX = 6, // ROCTX domain
|
||||
|
||||
@@ -34,6 +34,13 @@ THE SOFTWARE.
|
||||
|
||||
#include "roctracer.h"
|
||||
|
||||
typedef void (*roctracer_start_cb_t)();
|
||||
typedef void (*roctracer_stop_cb_t)();
|
||||
typedef struct {
|
||||
roctracer_start_cb_t start_cb;
|
||||
roctracer_stop_cb_t stop_cb;
|
||||
} roctracer_ext_properties_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif // __cplusplus
|
||||
|
||||
@@ -77,6 +77,14 @@ bool RegisterApiCallback(uint32_t op, void* callback, void* arg);
|
||||
// Remove ROCTX callback for given opertaion id
|
||||
bool RemoveApiCallback(uint32_t op);
|
||||
|
||||
// Iterate range stack to support tracing start/stop
|
||||
typedef struct {
|
||||
const char* message;
|
||||
uint32_t tid;
|
||||
} roctx_range_data_t;
|
||||
typedef void (*roctx_range_iterate_cb_t)(const roctx_range_data_t* data, void* arg);
|
||||
void RangeStackIterate(roctx_range_iterate_cb_t callback, void* arg);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C" block
|
||||
#endif // __cplusplus
|
||||
|
||||
@@ -141,20 +141,24 @@ class KfdApi {
|
||||
};
|
||||
|
||||
// rocTX runtime library loader class
|
||||
#include "inc/roctracer_roctx.h"
|
||||
class RocTxApi {
|
||||
public:
|
||||
typedef BaseLoader<RocTxApi> Loader;
|
||||
|
||||
typedef bool (RegisterApiCallback_t)(uint32_t op, void* callback, void* arg);
|
||||
typedef bool (RemoveApiCallback_t)(uint32_t op);
|
||||
typedef decltype(RegisterApiCallback) RegisterApiCallback_t;
|
||||
typedef decltype(RemoveApiCallback) RemoveApiCallback_t;
|
||||
typedef decltype(RangeStackIterate) RangeStackIterate_t;
|
||||
|
||||
RegisterApiCallback_t* RegisterApiCallback;
|
||||
RemoveApiCallback_t* RemoveApiCallback;
|
||||
RangeStackIterate_t* RangeStackIterate;
|
||||
|
||||
protected:
|
||||
void init(Loader* loader) {
|
||||
RegisterApiCallback = loader->GetFun<RegisterApiCallback_t>("RegisterApiCallback");
|
||||
RemoveApiCallback = loader->GetFun<RemoveApiCallback_t>("RemoveApiCallback");
|
||||
RangeStackIterate = loader->GetFun<RangeStackIterate_t>("RangeStackIterate");
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -0,0 +1,210 @@
|
||||
/*
|
||||
Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef MEMORY_POOL_H_
|
||||
#define MEMORY_POOL_H_
|
||||
|
||||
#include <pthread.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <mutex>
|
||||
|
||||
#include "util/exception.h"
|
||||
|
||||
#define PTHREAD_CALL(call) \
|
||||
do { \
|
||||
int err = call; \
|
||||
if (err != 0) { \
|
||||
errno = err; \
|
||||
perror(#call); \
|
||||
abort(); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
namespace roctracer {
|
||||
|
||||
class MemoryPool {
|
||||
public:
|
||||
typedef std::mutex mutex_t;
|
||||
|
||||
static void allocator_default(char** ptr, size_t size, void* arg) {
|
||||
(void)arg;
|
||||
if (*ptr == NULL) {
|
||||
*ptr = reinterpret_cast<char*>(malloc(size));
|
||||
} else if (size != 0) {
|
||||
*ptr = reinterpret_cast<char*>(realloc(ptr, size));
|
||||
} else {
|
||||
free(*ptr);
|
||||
*ptr = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
MemoryPool(const roctracer_properties_t& properties) {
|
||||
// Assigning pool allocator
|
||||
alloc_fun_ = allocator_default;
|
||||
alloc_arg_ = NULL;
|
||||
if (properties.alloc_fun != NULL) {
|
||||
alloc_fun_ = properties.alloc_fun;
|
||||
alloc_arg_ = properties.alloc_arg;
|
||||
}
|
||||
|
||||
// Pool definition
|
||||
buffer_size_ = properties.buffer_size;
|
||||
const size_t pool_size = 2 * buffer_size_;
|
||||
pool_begin_ = NULL;
|
||||
alloc_fun_(&pool_begin_, pool_size, alloc_arg_);
|
||||
if (pool_begin_ == NULL) EXC_ABORT(ROCTRACER_STATUS_ERROR, "pool allocator failed");
|
||||
pool_end_ = pool_begin_ + pool_size;
|
||||
buffer_begin_ = pool_begin_;
|
||||
buffer_end_ = buffer_begin_ + buffer_size_;
|
||||
write_ptr_ = buffer_begin_;
|
||||
|
||||
// Consuming read thread
|
||||
read_callback_fun_ = properties.buffer_callback_fun;
|
||||
read_callback_arg_ = properties.buffer_callback_arg;
|
||||
consumer_arg_.set(this, NULL, NULL, true);
|
||||
PTHREAD_CALL(pthread_mutex_init(&read_mutex_, NULL));
|
||||
PTHREAD_CALL(pthread_cond_init(&read_cond_, NULL));
|
||||
PTHREAD_CALL(pthread_create(&consumer_thread_, NULL, reader_fun, &consumer_arg_));
|
||||
}
|
||||
|
||||
~MemoryPool() {
|
||||
Flush();
|
||||
PTHREAD_CALL(pthread_cancel(consumer_thread_));
|
||||
void *res;
|
||||
PTHREAD_CALL(pthread_join(consumer_thread_, &res));
|
||||
if (res != PTHREAD_CANCELED) EXC_ABORT(ROCTRACER_STATUS_ERROR, "consumer thread wasn't stopped correctly");
|
||||
allocator_default(&pool_begin_, 0, alloc_arg_);
|
||||
}
|
||||
|
||||
template <typename Record>
|
||||
void Write(const Record& record) {
|
||||
std::lock_guard<mutex_t> lock(write_mutex_);
|
||||
getRecord<Record>(record);
|
||||
}
|
||||
|
||||
void Flush() {
|
||||
std::lock_guard<mutex_t> lock(write_mutex_);
|
||||
if (write_ptr_ > buffer_begin_) {
|
||||
spawn_reader(buffer_begin_, write_ptr_);
|
||||
sync_reader(&consumer_arg_);
|
||||
buffer_begin_ = (buffer_end_ == pool_end_) ? pool_begin_ : buffer_end_;
|
||||
buffer_end_ = buffer_begin_ + buffer_size_;
|
||||
write_ptr_ = buffer_begin_;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
struct consumer_arg_t {
|
||||
MemoryPool* obj;
|
||||
const char* begin;
|
||||
const char* end;
|
||||
volatile std::atomic<bool> valid;
|
||||
void set(MemoryPool* obj_p, const char* begin_p, const char* end_p, bool valid_p) {
|
||||
obj = obj_p;
|
||||
begin = begin_p;
|
||||
end = end_p;
|
||||
valid.store(valid_p);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Record>
|
||||
Record* getRecord(const Record& init) {
|
||||
char* next = write_ptr_ + sizeof(Record);
|
||||
if (next > buffer_end_) {
|
||||
if (write_ptr_ == buffer_begin_) EXC_ABORT(ROCTRACER_STATUS_ERROR, "buffer size(" << buffer_size_ << ") is less then the record(" << sizeof(Record) << ")");
|
||||
spawn_reader(buffer_begin_, write_ptr_);
|
||||
buffer_begin_ = (buffer_end_ == pool_end_) ? pool_begin_ : buffer_end_;
|
||||
buffer_end_ = buffer_begin_ + buffer_size_;
|
||||
write_ptr_ = buffer_begin_;
|
||||
next = write_ptr_ + sizeof(Record);
|
||||
}
|
||||
|
||||
Record* ptr = reinterpret_cast<Record*>(write_ptr_);
|
||||
write_ptr_ = next;
|
||||
|
||||
*ptr = init;
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static void reset_reader(consumer_arg_t* arg) {
|
||||
arg->valid.store(false);
|
||||
}
|
||||
|
||||
static void sync_reader(const consumer_arg_t* arg) {
|
||||
while(arg->valid.load() == true) PTHREAD_CALL(pthread_yield());
|
||||
}
|
||||
|
||||
static void* reader_fun(void* consumer_arg) {
|
||||
consumer_arg_t* arg = reinterpret_cast<consumer_arg_t*>(consumer_arg);
|
||||
roctracer::MemoryPool* obj = arg->obj;
|
||||
|
||||
reset_reader(arg);
|
||||
|
||||
while (1) {
|
||||
PTHREAD_CALL(pthread_mutex_lock(&(obj->read_mutex_)));
|
||||
while (arg->valid.load() == false) {
|
||||
PTHREAD_CALL(pthread_cond_wait(&(obj->read_cond_), &(obj->read_mutex_)));
|
||||
}
|
||||
|
||||
obj->read_callback_fun_(arg->begin, arg->end, obj->read_callback_arg_);
|
||||
reset_reader(arg);
|
||||
PTHREAD_CALL(pthread_mutex_unlock(&(obj->read_mutex_)));
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void spawn_reader(const char* data_begin, const char* data_end) {
|
||||
sync_reader(&consumer_arg_);
|
||||
PTHREAD_CALL(pthread_mutex_lock(&read_mutex_));
|
||||
consumer_arg_.set(this, data_begin, data_end, true);
|
||||
PTHREAD_CALL(pthread_cond_signal(&read_cond_));
|
||||
PTHREAD_CALL(pthread_mutex_unlock(&read_mutex_));
|
||||
}
|
||||
|
||||
// pool allocator
|
||||
roctracer_allocator_t alloc_fun_;
|
||||
void* alloc_arg_;
|
||||
|
||||
// Pool definition
|
||||
size_t buffer_size_;
|
||||
char* pool_begin_;
|
||||
char* pool_end_;
|
||||
char* buffer_begin_;
|
||||
char* buffer_end_;
|
||||
char* write_ptr_;
|
||||
mutex_t write_mutex_;
|
||||
|
||||
// Consuming read thread
|
||||
roctracer_buffer_callback_t read_callback_fun_;
|
||||
void* read_callback_arg_;
|
||||
consumer_arg_t consumer_arg_;
|
||||
pthread_t consumer_thread_;
|
||||
pthread_mutex_t read_mutex_;
|
||||
pthread_cond_t read_cond_;
|
||||
};
|
||||
|
||||
} // namespace roctracer
|
||||
|
||||
#endif // MEMORY_POOL_H_
|
||||
@@ -23,6 +23,7 @@ THE SOFTWARE.
|
||||
#include "inc/roctracer.h"
|
||||
#include "inc/roctracer_hcc.h"
|
||||
#include "inc/roctracer_hip.h"
|
||||
#include "inc/roctracer_ext.h"
|
||||
#include "inc/roctracer_roctx.h"
|
||||
#define PROF_API_IMPL 1
|
||||
#include "inc/roctracer_hsa.h"
|
||||
@@ -30,18 +31,19 @@ THE SOFTWARE.
|
||||
#include "inc/roctracer_kfd.h"
|
||||
#endif
|
||||
|
||||
#include <dirent.h>
|
||||
#include <pthread.h>
|
||||
#include <string.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <mutex>
|
||||
#include <stack>
|
||||
#include <dirent.h>
|
||||
#include <stack>
|
||||
#include <string.h>
|
||||
#include <pthread.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/syscall.h>
|
||||
|
||||
#include "core/journal.h"
|
||||
#include "core/loader.h"
|
||||
#include "core/memory_pool.h"
|
||||
#include "core/trace_buffer.h"
|
||||
#include "proxy/tracker.h"
|
||||
#include "ext/hsa_rt_utils.hpp"
|
||||
@@ -58,16 +60,6 @@ THE SOFTWARE.
|
||||
#define CONSTRUCTOR_API __attribute__((constructor))
|
||||
#define DESTRUCTOR_API __attribute__((destructor))
|
||||
|
||||
#define PTHREAD_CALL(call) \
|
||||
do { \
|
||||
int err = call; \
|
||||
if (err != 0) { \
|
||||
errno = err; \
|
||||
perror(#call); \
|
||||
abort(); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define HIPAPI_CALL(call) \
|
||||
do { \
|
||||
hipError_t err = call; \
|
||||
@@ -244,7 +236,12 @@ CoreApiTable CoreApiTable_saved{};
|
||||
AmdExtTable AmdExtTable_saved{};
|
||||
// Table of function pointers to HSA Image Extension
|
||||
ImageExtTable ImageExtTable_saved{};
|
||||
}
|
||||
} // namespace hsa_support
|
||||
|
||||
namespace ext_support {
|
||||
roctracer_start_cb_t roctracer_start_cb = NULL;
|
||||
roctracer_stop_cb_t roctracer_stop_cb = NULL;
|
||||
} // namespace ext_suppoprt
|
||||
|
||||
roctracer_status_t GetExcStatus(const std::exception& e) {
|
||||
const util::exception* roctracer_exc_ptr = dynamic_cast<const util::exception*>(&e);
|
||||
@@ -268,168 +265,6 @@ class GlobalCounter {
|
||||
GlobalCounter::mutex_t GlobalCounter::mutex_;
|
||||
GlobalCounter::counter_t GlobalCounter::counter_ = 0;
|
||||
|
||||
class MemoryPool {
|
||||
public:
|
||||
typedef std::mutex mutex_t;
|
||||
|
||||
static void allocator_default(char** ptr, size_t size, void* arg) {
|
||||
(void)arg;
|
||||
if (*ptr == NULL) {
|
||||
*ptr = reinterpret_cast<char*>(malloc(size));
|
||||
} else if (size != 0) {
|
||||
*ptr = reinterpret_cast<char*>(realloc(ptr, size));
|
||||
} else {
|
||||
free(*ptr);
|
||||
*ptr = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
MemoryPool(const roctracer_properties_t& properties) {
|
||||
// Assigning pool allocator
|
||||
alloc_fun_ = allocator_default;
|
||||
alloc_arg_ = NULL;
|
||||
if (properties.alloc_fun != NULL) {
|
||||
alloc_fun_ = properties.alloc_fun;
|
||||
alloc_arg_ = properties.alloc_arg;
|
||||
}
|
||||
|
||||
// Pool definition
|
||||
buffer_size_ = properties.buffer_size;
|
||||
const size_t pool_size = 2 * buffer_size_;
|
||||
pool_begin_ = NULL;
|
||||
alloc_fun_(&pool_begin_, pool_size, alloc_arg_);
|
||||
if (pool_begin_ == NULL) EXC_ABORT(ROCTRACER_STATUS_ERROR, "pool allocator failed");
|
||||
pool_end_ = pool_begin_ + pool_size;
|
||||
buffer_begin_ = pool_begin_;
|
||||
buffer_end_ = buffer_begin_ + buffer_size_;
|
||||
write_ptr_ = buffer_begin_;
|
||||
|
||||
// Consuming read thread
|
||||
read_callback_fun_ = properties.buffer_callback_fun;
|
||||
read_callback_arg_ = properties.buffer_callback_arg;
|
||||
consumer_arg_.set(this, NULL, NULL, true);
|
||||
PTHREAD_CALL(pthread_mutex_init(&read_mutex_, NULL));
|
||||
PTHREAD_CALL(pthread_cond_init(&read_cond_, NULL));
|
||||
PTHREAD_CALL(pthread_create(&consumer_thread_, NULL, reader_fun, &consumer_arg_));
|
||||
}
|
||||
|
||||
~MemoryPool() {
|
||||
Flush();
|
||||
PTHREAD_CALL(pthread_cancel(consumer_thread_));
|
||||
void *res;
|
||||
PTHREAD_CALL(pthread_join(consumer_thread_, &res));
|
||||
if (res != PTHREAD_CANCELED) EXC_ABORT(ROCTRACER_STATUS_ERROR, "consumer thread wasn't stopped correctly");
|
||||
allocator_default(&pool_begin_, 0, alloc_arg_);
|
||||
}
|
||||
|
||||
template <typename Record>
|
||||
void Write(const Record& record) {
|
||||
std::lock_guard<mutex_t> lock(write_mutex_);
|
||||
getRecord<Record>(record);
|
||||
}
|
||||
|
||||
void Flush() {
|
||||
std::lock_guard<mutex_t> lock(write_mutex_);
|
||||
if (write_ptr_ > buffer_begin_) {
|
||||
spawn_reader(buffer_begin_, write_ptr_);
|
||||
sync_reader(&consumer_arg_);
|
||||
buffer_begin_ = (buffer_end_ == pool_end_) ? pool_begin_ : buffer_end_;
|
||||
buffer_end_ = buffer_begin_ + buffer_size_;
|
||||
write_ptr_ = buffer_begin_;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
struct consumer_arg_t {
|
||||
MemoryPool* obj;
|
||||
const char* begin;
|
||||
const char* end;
|
||||
volatile std::atomic<bool> valid;
|
||||
void set(MemoryPool* obj_p, const char* begin_p, const char* end_p, bool valid_p) {
|
||||
obj = obj_p;
|
||||
begin = begin_p;
|
||||
end = end_p;
|
||||
valid.store(valid_p);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Record>
|
||||
Record* getRecord(const Record& init) {
|
||||
char* next = write_ptr_ + sizeof(Record);
|
||||
if (next > buffer_end_) {
|
||||
if (write_ptr_ == buffer_begin_) EXC_ABORT(ROCTRACER_STATUS_ERROR, "buffer size(" << buffer_size_ << ") is less then the record(" << sizeof(Record) << ")");
|
||||
spawn_reader(buffer_begin_, write_ptr_);
|
||||
buffer_begin_ = (buffer_end_ == pool_end_) ? pool_begin_ : buffer_end_;
|
||||
buffer_end_ = buffer_begin_ + buffer_size_;
|
||||
write_ptr_ = buffer_begin_;
|
||||
next = write_ptr_ + sizeof(Record);
|
||||
}
|
||||
|
||||
Record* ptr = reinterpret_cast<Record*>(write_ptr_);
|
||||
write_ptr_ = next;
|
||||
|
||||
*ptr = init;
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static void reset_reader(consumer_arg_t* arg) {
|
||||
arg->valid.store(false);
|
||||
}
|
||||
|
||||
static void sync_reader(const consumer_arg_t* arg) {
|
||||
while(arg->valid.load() == true) PTHREAD_CALL(pthread_yield());
|
||||
}
|
||||
|
||||
static void* reader_fun(void* consumer_arg) {
|
||||
consumer_arg_t* arg = reinterpret_cast<consumer_arg_t*>(consumer_arg);
|
||||
roctracer::MemoryPool* obj = arg->obj;
|
||||
|
||||
reset_reader(arg);
|
||||
|
||||
while (1) {
|
||||
PTHREAD_CALL(pthread_mutex_lock(&(obj->read_mutex_)));
|
||||
while (arg->valid.load() == false) {
|
||||
PTHREAD_CALL(pthread_cond_wait(&(obj->read_cond_), &(obj->read_mutex_)));
|
||||
}
|
||||
|
||||
obj->read_callback_fun_(arg->begin, arg->end, obj->read_callback_arg_);
|
||||
reset_reader(arg);
|
||||
PTHREAD_CALL(pthread_mutex_unlock(&(obj->read_mutex_)));
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void spawn_reader(const char* data_begin, const char* data_end) {
|
||||
sync_reader(&consumer_arg_);
|
||||
PTHREAD_CALL(pthread_mutex_lock(&read_mutex_));
|
||||
consumer_arg_.set(this, data_begin, data_end, true);
|
||||
PTHREAD_CALL(pthread_cond_signal(&read_cond_));
|
||||
PTHREAD_CALL(pthread_mutex_unlock(&read_mutex_));
|
||||
}
|
||||
|
||||
// pool allocator
|
||||
roctracer_allocator_t alloc_fun_;
|
||||
void* alloc_arg_;
|
||||
|
||||
// Pool definition
|
||||
size_t buffer_size_;
|
||||
char* pool_begin_;
|
||||
char* pool_end_;
|
||||
char* buffer_begin_;
|
||||
char* buffer_end_;
|
||||
char* write_ptr_;
|
||||
mutex_t write_mutex_;
|
||||
|
||||
// Consuming read thread
|
||||
roctracer_buffer_callback_t read_callback_fun_;
|
||||
void* read_callback_arg_;
|
||||
consumer_arg_t consumer_arg_;
|
||||
pthread_t consumer_thread_;
|
||||
pthread_mutex_t read_mutex_;
|
||||
pthread_cond_t read_cond_;
|
||||
};
|
||||
|
||||
// Records storage
|
||||
struct roctracer_api_data_t {
|
||||
union {
|
||||
@@ -1162,6 +997,7 @@ PUBLIC_API void roctracer_mark(const char* str) {
|
||||
|
||||
// Start API
|
||||
PUBLIC_API void roctracer_start() {
|
||||
if (roctracer::ext_support::roctracer_start_cb) roctracer::ext_support::roctracer_start_cb();
|
||||
roctracer::cb_journal->foreach(roctracer::cb_en_functor_t(roctracer_enable_callback_fun));
|
||||
roctracer::act_journal->foreach(roctracer::act_en_functor_t(roctracer_enable_activity_fun));
|
||||
}
|
||||
@@ -1170,6 +1006,7 @@ PUBLIC_API void roctracer_start() {
|
||||
PUBLIC_API void roctracer_stop() {
|
||||
roctracer::cb_journal->foreach(roctracer::cb_dis_functor_t(roctracer_disable_callback_fun));
|
||||
roctracer::act_journal->foreach(roctracer::act_dis_functor_t(roctracer_disable_activity_fun));
|
||||
if (roctracer::ext_support::roctracer_stop_cb) roctracer::ext_support::roctracer_stop_cb();
|
||||
}
|
||||
|
||||
// Set properties
|
||||
@@ -1222,6 +1059,13 @@ PUBLIC_API roctracer_status_t roctracer_set_properties(
|
||||
const char* hip_backend_lib_name = getenv("HIP_BACKEND_LIB");
|
||||
if (hip_backend_lib_name != NULL) roctracer::HccLoader::Instance().SetLibName(hip_backend_lib_name);
|
||||
mark_api_callback_ptr = reinterpret_cast<mark_api_callback_t*>(properties);
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_EXT_API: {
|
||||
roctracer_ext_properties_t* ops_properties = reinterpret_cast<roctracer_ext_properties_t*>(properties);
|
||||
roctracer::ext_support::roctracer_start_cb = ops_properties->start_cb;
|
||||
roctracer::ext_support::roctracer_stop_cb = ops_properties->stop_cb;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
EXC_RAISING(ROCTRACER_STATUS_BAD_DOMAIN, "invalid domain ID(" << domain << ")");
|
||||
|
||||
@@ -24,11 +24,13 @@ THE SOFTWARE.
|
||||
#include "inc/roctracer_roctx.h"
|
||||
|
||||
#include <string.h>
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
#include <stack>
|
||||
|
||||
#include "inc/ext/prof_protocol.h"
|
||||
#include "util/exception.h"
|
||||
#include "util/logger.h"
|
||||
#include <stack>
|
||||
|
||||
#define PUBLIC_API __attribute__((visibility("default")))
|
||||
#define CONSTRUCTOR_API __attribute__((constructor))
|
||||
@@ -62,12 +64,8 @@ THE SOFTWARE.
|
||||
(void)err; \
|
||||
return X;
|
||||
|
||||
static thread_local std::stack<std::string> message_stack;
|
||||
|
||||
#if 0
|
||||
static inline uint32_t GetPid() { return syscall(__NR_getpid); }
|
||||
static inline uint32_t GetTid() { return syscall(__NR_gettid); }
|
||||
#endif
|
||||
inline uint32_t GetPid() { return syscall(__NR_getpid); }
|
||||
inline uint32_t GetTid() { return syscall(__NR_gettid); }
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Library errors enumaration
|
||||
@@ -80,12 +78,27 @@ typedef enum {
|
||||
// Library implementation
|
||||
//
|
||||
namespace roctx {
|
||||
typedef std::stack<std::string> message_stack_t;
|
||||
typedef std::map<uint32_t, message_stack_t*> thread_map_t;
|
||||
typedef std::mutex map_mutex_t;
|
||||
map_mutex_t map_mutex;
|
||||
thread_map_t* thread_map = NULL;
|
||||
static thread_local message_stack_t* message_stack = NULL;
|
||||
|
||||
roctx_status_t GetExcStatus(const std::exception& e) {
|
||||
const roctracer::util::exception* roctx_exc_ptr = dynamic_cast<const roctracer::util::exception*>(&e);
|
||||
return (roctx_exc_ptr) ? static_cast<roctx_status_t>(roctx_exc_ptr->status()) : ROCTX_STATUS_ERROR;
|
||||
}
|
||||
|
||||
void thread_data_init() {
|
||||
message_stack = new message_stack_t;
|
||||
const auto tid = GetTid();
|
||||
|
||||
std::lock_guard<map_mutex_t> lck(map_mutex);
|
||||
if (thread_map == NULL) thread_map = new thread_map_t;
|
||||
(*thread_map)[tid] = message_stack;
|
||||
}
|
||||
|
||||
// callbacks table
|
||||
extern cb_table_t cb_table;
|
||||
} // namespace roctx
|
||||
@@ -119,31 +132,50 @@ PUBLIC_API void roctxMarkA(const char* message) {
|
||||
|
||||
PUBLIC_API int roctxRangePushA(const char* message) {
|
||||
API_METHOD_PREFIX
|
||||
if (roctx::message_stack == NULL) roctx::thread_data_init();
|
||||
|
||||
roctx_api_data_t api_data{};
|
||||
api_data.args.roctxRangePushA.message = strdup(message);
|
||||
activity_rtapi_callback_t api_callback_fun = NULL;
|
||||
void* api_callback_arg = NULL;
|
||||
roctx::cb_table.get(ROCTX_API_ID_roctxRangePushA, &api_callback_fun, &api_callback_arg);
|
||||
if (api_callback_fun) api_callback_fun(ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_roctxRangePushA, &api_data, api_callback_arg);
|
||||
message_stack.push(strdup(message));
|
||||
roctx::message_stack->push(strdup(message));
|
||||
|
||||
return roctx::message_stack->size() - 1;
|
||||
API_METHOD_CATCH(-1);
|
||||
return message_stack.size()-1;
|
||||
}
|
||||
|
||||
PUBLIC_API int roctxRangePop() {
|
||||
API_METHOD_PREFIX
|
||||
if (roctx::message_stack == NULL) roctx::thread_data_init();
|
||||
|
||||
roctx_api_data_t api_data{};
|
||||
activity_rtapi_callback_t api_callback_fun = NULL;
|
||||
void* api_callback_arg = NULL;
|
||||
roctx::cb_table.get(ROCTX_API_ID_roctxRangePop, &api_callback_fun, &api_callback_arg);
|
||||
if (api_callback_fun) api_callback_fun(ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_roctxRangePop, &api_data, api_callback_arg);
|
||||
if (message_stack.empty()) {
|
||||
if (roctx::message_stack->empty()) {
|
||||
EXC_ABORT(ROCTX_STATUS_ERROR, "Pop from empty stack!");
|
||||
} else {
|
||||
message_stack.pop();
|
||||
roctx::message_stack->pop();
|
||||
}
|
||||
|
||||
return roctx::message_stack->size();
|
||||
API_METHOD_CATCH(-1)
|
||||
return message_stack.size();
|
||||
}
|
||||
|
||||
PUBLIC_API void RangeStackIterate(roctx_range_iterate_cb_t callback, void* arg) {
|
||||
for (const auto& entry : *roctx::thread_map) {
|
||||
const auto tid = entry.first;
|
||||
for (roctx::message_stack_t stack = *(entry.second); !stack.empty(); stack.pop()){
|
||||
std::string message = stack.top();
|
||||
roctx_range_data_t data{};
|
||||
data.message = message.c_str();
|
||||
data.tid = tid;
|
||||
callback(&data, arg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // extern "C"
|
||||
|
||||
@@ -158,7 +158,24 @@ roctracer::TraceBuffer<roctx_trace_entry_t>::flush_prm_t roctx_flush_prm[1] = {{
|
||||
roctracer::TraceBuffer<roctx_trace_entry_t> roctx_trace_buffer("rocTX API", 0x200000, roctx_flush_prm, 1);
|
||||
|
||||
// rocTX callback function
|
||||
void roctx_callback(
|
||||
static inline void roctx_callback_fun(
|
||||
uint32_t domain,
|
||||
uint32_t cid,
|
||||
uint32_t tid,
|
||||
const char* message)
|
||||
{
|
||||
const timestamp_t timestamp = timer->timestamp_fn_ns();
|
||||
roctx_trace_entry_t* entry = roctx_trace_buffer.GetEntry();
|
||||
entry->valid = roctracer::TRACE_ENTRY_COMPL;
|
||||
entry->type = 0;
|
||||
entry->cid = cid;
|
||||
entry->timestamp = timestamp;
|
||||
entry->pid = GetPid();
|
||||
entry->tid = tid;
|
||||
entry->message = (message != NULL) ? strdup(message) : NULL;
|
||||
}
|
||||
|
||||
void roctx_api_callback(
|
||||
uint32_t domain,
|
||||
uint32_t cid,
|
||||
const void* callback_data,
|
||||
@@ -166,16 +183,23 @@ void roctx_callback(
|
||||
{
|
||||
(void)arg;
|
||||
const roctx_api_data_t* data = reinterpret_cast<const roctx_api_data_t*>(callback_data);
|
||||
const timestamp_t timestamp = timer->timestamp_fn_ns();
|
||||
roctx_trace_entry_t* entry = roctx_trace_buffer.GetEntry();
|
||||
const char* message = data->args.message;
|
||||
entry->valid = roctracer::TRACE_ENTRY_COMPL;
|
||||
entry->type = 0;
|
||||
entry->cid = cid;
|
||||
entry->timestamp = timestamp;
|
||||
entry->pid = GetPid();
|
||||
entry->tid = GetTid();
|
||||
entry->message = (message != NULL) ? strdup(message) : NULL;
|
||||
roctx_callback_fun(domain, cid, GetTid(), data->args.message);
|
||||
}
|
||||
|
||||
// Start/Stop callbacks
|
||||
void roctx_range_stack_callback(const roctx_range_data_t* data, void* arg) {
|
||||
const bool* is_stop_ptr = (bool*)arg;
|
||||
const uint32_t cid = (*is_stop_ptr == true) ? ROCTX_API_ID_roctxRangePop : ROCTX_API_ID_roctxRangePushA;
|
||||
const char* message = (*is_stop_ptr == true) ? NULL : data->message;
|
||||
roctx_callback_fun(ACTIVITY_DOMAIN_ROCTX, cid, data->tid, message);
|
||||
}
|
||||
void stop_callback() {
|
||||
bool is_stop = true;
|
||||
roctracer::RocTxLoader::Instance().RangeStackIterate(roctx_range_stack_callback, (void*)&is_stop);
|
||||
}
|
||||
void start_callback() {
|
||||
bool is_stop = false;
|
||||
roctracer::RocTxLoader::Instance().RangeStackIterate(roctx_range_stack_callback, (void*)&is_stop);
|
||||
}
|
||||
|
||||
void roctx_flush_cb(roctx_trace_entry_t* entry) {
|
||||
@@ -597,8 +621,15 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
|
||||
if (trace_roctx) {
|
||||
roctx_file_handle = open_output_file(output_prefix, "roctx_trace.txt");
|
||||
|
||||
// initialize HSA tracing
|
||||
roctracer_ext_properties_t properties {
|
||||
start_callback,
|
||||
stop_callback
|
||||
};
|
||||
roctracer_set_properties(ACTIVITY_DOMAIN_EXT_API, &properties);
|
||||
|
||||
fprintf(stdout, " rocTX-trace()\n"); fflush(stdout);
|
||||
ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_ROCTX, roctx_callback, NULL));
|
||||
ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_ROCTX, roctx_api_callback, NULL));
|
||||
}
|
||||
|
||||
// Enable HSA API callbacks/activity
|
||||
@@ -627,11 +658,12 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
|
||||
hsa_async_copy_file_handle = open_output_file(output_prefix, "async_copy_trace.txt");
|
||||
|
||||
// initialize HSA tracing
|
||||
roctracer::hsa_ops_properties_t ops_properties{
|
||||
roctracer::hsa_ops_properties_t ops_properties {
|
||||
table,
|
||||
reinterpret_cast<activity_async_callback_t>(hsa_activity_callback),
|
||||
NULL,
|
||||
output_prefix};
|
||||
output_prefix
|
||||
};
|
||||
roctracer_set_properties(ACTIVITY_DOMAIN_HSA_OPS, &ops_properties);
|
||||
|
||||
fprintf(stdout, " HSA-activity-trace()\n"); fflush(stdout);
|
||||
|
||||
Ссылка в новой задаче
Block a user