Run clang-format on all source files
Change-Id: Ifb52ca306286b6b2d473821bed9db28e9f616d50
This commit is contained in:
zatwierdzone przez
Laurent Morichetti
rodzic
89f6880371
commit
15ab5d9cda
+43
-47
@@ -25,35 +25,30 @@
|
||||
|
||||
// Traced API domains
|
||||
typedef enum {
|
||||
ACTIVITY_DOMAIN_HSA_API = 0, // HSA API domain
|
||||
ACTIVITY_DOMAIN_HSA_OPS = 1, // HSA async activity domain
|
||||
ACTIVITY_DOMAIN_HIP_OPS = 2, // HIP async activity domain
|
||||
ACTIVITY_DOMAIN_HCC_OPS = ACTIVITY_DOMAIN_HIP_OPS, // HCC async activity domain
|
||||
ACTIVITY_DOMAIN_HIP_VDI = ACTIVITY_DOMAIN_HIP_OPS, // HIP VDI async activity domain
|
||||
ACTIVITY_DOMAIN_HIP_API = 3, // HIP API domain
|
||||
ACTIVITY_DOMAIN_KFD_API = 4, // KFD API domain
|
||||
ACTIVITY_DOMAIN_EXT_API = 5, // External ID domain
|
||||
ACTIVITY_DOMAIN_ROCTX = 6, // ROCTX domain
|
||||
ACTIVITY_DOMAIN_HSA_EVT = 7, // HSA events
|
||||
ACTIVITY_DOMAIN_HSA_API = 0, // HSA API domain
|
||||
ACTIVITY_DOMAIN_HSA_OPS = 1, // HSA async activity domain
|
||||
ACTIVITY_DOMAIN_HIP_OPS = 2, // HIP async activity domain
|
||||
ACTIVITY_DOMAIN_HCC_OPS = ACTIVITY_DOMAIN_HIP_OPS, // HCC async activity domain
|
||||
ACTIVITY_DOMAIN_HIP_VDI = ACTIVITY_DOMAIN_HIP_OPS, // HIP VDI async activity domain
|
||||
ACTIVITY_DOMAIN_HIP_API = 3, // HIP API domain
|
||||
ACTIVITY_DOMAIN_KFD_API = 4, // KFD API domain
|
||||
ACTIVITY_DOMAIN_EXT_API = 5, // External ID domain
|
||||
ACTIVITY_DOMAIN_ROCTX = 6, // ROCTX domain
|
||||
ACTIVITY_DOMAIN_HSA_EVT = 7, // HSA events
|
||||
ACTIVITY_DOMAIN_NUMBER
|
||||
} activity_domain_t;
|
||||
|
||||
// Extension API opcodes
|
||||
typedef enum {
|
||||
ACTIVITY_EXT_OP_MARK = 0,
|
||||
ACTIVITY_EXT_OP_EXTERN_ID = 1
|
||||
} activity_ext_op_t;
|
||||
typedef enum { ACTIVITY_EXT_OP_MARK = 0, ACTIVITY_EXT_OP_EXTERN_ID = 1 } activity_ext_op_t;
|
||||
|
||||
// API calback type
|
||||
typedef void (*activity_rtapi_callback_t)(uint32_t domain, uint32_t cid, const void* data, void* arg);
|
||||
typedef void (*activity_rtapi_callback_t)(uint32_t domain, uint32_t cid, const void* data,
|
||||
void* arg);
|
||||
typedef uint32_t activity_kind_t;
|
||||
typedef uint32_t activity_op_t;
|
||||
|
||||
// API callback phase
|
||||
typedef enum {
|
||||
ACTIVITY_API_PHASE_ENTER = 0,
|
||||
ACTIVITY_API_PHASE_EXIT = 1
|
||||
} activity_api_phase_t;
|
||||
typedef enum { ACTIVITY_API_PHASE_ENTER = 0, ACTIVITY_API_PHASE_EXIT = 1 } activity_api_phase_t;
|
||||
|
||||
// Trace record types
|
||||
// Correlation id
|
||||
@@ -61,39 +56,40 @@ typedef uint64_t activity_correlation_id_t;
|
||||
|
||||
// Activity record type
|
||||
typedef struct activity_record_s {
|
||||
uint32_t domain; // activity domain id
|
||||
activity_kind_t kind; // activity kind
|
||||
activity_op_t op; // activity op
|
||||
union {
|
||||
struct {
|
||||
activity_correlation_id_t correlation_id; // activity ID
|
||||
uint64_t begin_ns; // host begin timestamp
|
||||
uint64_t end_ns; // host end timestamp
|
||||
};
|
||||
struct {
|
||||
uint32_t se; // sampled SE
|
||||
uint64_t cycle; // sample cycle
|
||||
uint64_t pc; // sample PC
|
||||
} pc_sample;
|
||||
uint32_t domain; // activity domain id
|
||||
activity_kind_t kind; // activity kind
|
||||
activity_op_t op; // activity op
|
||||
union {
|
||||
struct {
|
||||
activity_correlation_id_t correlation_id; // activity ID
|
||||
uint64_t begin_ns; // host begin timestamp
|
||||
uint64_t end_ns; // host end timestamp
|
||||
};
|
||||
union {
|
||||
struct {
|
||||
int device_id; // device id
|
||||
uint64_t queue_id; // queue id
|
||||
};
|
||||
struct {
|
||||
uint32_t process_id; // device id
|
||||
uint32_t thread_id; // thread id
|
||||
};
|
||||
struct {
|
||||
activity_correlation_id_t external_id; // external correlatino id
|
||||
};
|
||||
struct {
|
||||
uint32_t se; // sampled SE
|
||||
uint64_t cycle; // sample cycle
|
||||
uint64_t pc; // sample PC
|
||||
} pc_sample;
|
||||
};
|
||||
union {
|
||||
struct {
|
||||
int device_id; // device id
|
||||
uint64_t queue_id; // queue id
|
||||
};
|
||||
size_t bytes; // data size bytes
|
||||
struct {
|
||||
uint32_t process_id; // device id
|
||||
uint32_t thread_id; // thread id
|
||||
};
|
||||
struct {
|
||||
activity_correlation_id_t external_id; // external correlatino id
|
||||
};
|
||||
};
|
||||
size_t bytes; // data size bytes
|
||||
} activity_record_t;
|
||||
|
||||
// Activity sync calback type
|
||||
typedef void* (*activity_sync_callback_t)(uint32_t cid, activity_record_t* record, const void* data, void* arg);
|
||||
typedef void* (*activity_sync_callback_t)(uint32_t cid, activity_record_t* record, const void* data,
|
||||
void* arg);
|
||||
// Activity async calback type
|
||||
typedef void (*activity_id_callback_t)(activity_correlation_id_t id);
|
||||
typedef void (*activity_async_callback_t)(uint32_t op, void* record, void* arg);
|
||||
|
||||
+66
-81
@@ -81,17 +81,15 @@ typedef activity_domain_t roctracer_domain_t;
|
||||
|
||||
// Return Op string by given domain and Op code
|
||||
// NULL returned on the error and the library errno is set
|
||||
const char* roctracer_op_string(
|
||||
uint32_t domain, // tracing domain
|
||||
uint32_t op, // activity op ID
|
||||
uint32_t kind); // activity kind
|
||||
const char* roctracer_op_string(uint32_t domain, // tracing domain
|
||||
uint32_t op, // activity op ID
|
||||
uint32_t kind); // activity kind
|
||||
|
||||
// Return Op code and kind by given string
|
||||
roctracer_status_t roctracer_op_code(
|
||||
uint32_t domain, // tracing domain
|
||||
const char* str, // [in] op string
|
||||
uint32_t* op, // [out] op code
|
||||
uint32_t* kind); // [out] op kind code if not NULL
|
||||
roctracer_status_t roctracer_op_code(uint32_t domain, // tracing domain
|
||||
const char* str, // [in] op string
|
||||
uint32_t* op, // [out] op code
|
||||
uint32_t* kind); // [out] op kind code if not NULL
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Callback API
|
||||
@@ -106,24 +104,22 @@ typedef activity_rtapi_callback_t roctracer_rtapi_callback_t;
|
||||
|
||||
// Enable runtime API callbacks
|
||||
roctracer_status_t roctracer_enable_op_callback(
|
||||
activity_domain_t domain, // tracing domain
|
||||
uint32_t op, // API call ID
|
||||
activity_rtapi_callback_t callback, // callback function pointer
|
||||
void* arg); // [in/out] callback arg
|
||||
activity_domain_t domain, // tracing domain
|
||||
uint32_t op, // API call ID
|
||||
activity_rtapi_callback_t callback, // callback function pointer
|
||||
void* arg); // [in/out] callback arg
|
||||
roctracer_status_t roctracer_enable_domain_callback(
|
||||
activity_domain_t domain, // tracing domain
|
||||
activity_rtapi_callback_t callback, // callback function pointer
|
||||
void* arg); // [in/out] callback arg
|
||||
activity_domain_t domain, // tracing domain
|
||||
activity_rtapi_callback_t callback, // callback function pointer
|
||||
void* arg); // [in/out] callback arg
|
||||
roctracer_status_t roctracer_enable_callback(
|
||||
activity_rtapi_callback_t callback, // callback function pointer
|
||||
void* arg); // [in/out] callback arg
|
||||
activity_rtapi_callback_t callback, // callback function pointer
|
||||
void* arg); // [in/out] callback arg
|
||||
|
||||
// Disable runtime API callbacks
|
||||
roctracer_status_t roctracer_disable_op_callback(
|
||||
activity_domain_t domain, // tracing domain
|
||||
uint32_t op); // API call ID
|
||||
roctracer_status_t roctracer_disable_domain_callback(
|
||||
activity_domain_t domain); // tracing domain
|
||||
roctracer_status_t roctracer_disable_op_callback(activity_domain_t domain, // tracing domain
|
||||
uint32_t op); // API call ID
|
||||
roctracer_status_t roctracer_disable_domain_callback(activity_domain_t domain); // tracing domain
|
||||
roctracer_status_t roctracer_disable_callback();
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
@@ -140,33 +136,32 @@ typedef activity_record_t roctracer_record_t;
|
||||
|
||||
// Return next record
|
||||
static inline roctracer_status_t roctracer_next_record(
|
||||
const activity_record_t* record, // [in] record ptr
|
||||
const activity_record_t** next) // [out] next record ptr
|
||||
const activity_record_t* record, // [in] record ptr
|
||||
const activity_record_t** next) // [out] next record ptr
|
||||
{
|
||||
*next = record + 1;
|
||||
return ROCTRACER_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// Tracer allocator type
|
||||
typedef void (*roctracer_allocator_t)(
|
||||
char** ptr, // memory pointer
|
||||
size_t size, // memory size
|
||||
void* arg); // allocator arg
|
||||
typedef void (*roctracer_allocator_t)(char** ptr, // memory pointer
|
||||
size_t size, // memory size
|
||||
void* arg); // allocator arg
|
||||
|
||||
// Pool callback type
|
||||
typedef void (*roctracer_buffer_callback_t)(
|
||||
const char* begin, // [in] available buffered trace records
|
||||
const char* end, // [in] end of buffered trace records
|
||||
void* arg); // [in/out] callback arg
|
||||
const char* begin, // [in] available buffered trace records
|
||||
const char* end, // [in] end of buffered trace records
|
||||
void* arg); // [in/out] callback arg
|
||||
|
||||
// Tracer properties
|
||||
typedef struct {
|
||||
uint32_t mode; // roctracer mode
|
||||
size_t buffer_size; // buffer size
|
||||
roctracer_allocator_t alloc_fun; // memory alocator function pointer
|
||||
void* alloc_arg; // memory alocator function pointer
|
||||
roctracer_buffer_callback_t buffer_callback_fun; // tracer record callback function
|
||||
void* buffer_callback_arg; // tracer record callback arg
|
||||
uint32_t mode; // roctracer mode
|
||||
size_t buffer_size; // buffer size
|
||||
roctracer_allocator_t alloc_fun; // memory alocator function pointer
|
||||
void* alloc_arg; // memory alocator function pointer
|
||||
roctracer_buffer_callback_t buffer_callback_fun; // tracer record callback function
|
||||
void* buffer_callback_arg; // tracer record callback arg
|
||||
} roctracer_properties_t;
|
||||
|
||||
// Tracer memory pool type
|
||||
@@ -175,78 +170,69 @@ typedef void roctracer_pool_t;
|
||||
// Create tracer memory pool
|
||||
// The first invocation sets the default pool
|
||||
roctracer_status_t roctracer_open_pool_expl(
|
||||
const roctracer_properties_t* properties, // tracer pool properties
|
||||
roctracer_pool_t** pool); // [out] returns tracer pool if not NULL,
|
||||
// otherwise sets the default one if it is not set yet
|
||||
const roctracer_properties_t* properties, // tracer pool properties
|
||||
roctracer_pool_t** pool); // [out] returns tracer pool if not NULL,
|
||||
// otherwise sets the default one if it is not set yet
|
||||
static inline roctracer_status_t roctracer_open_pool(
|
||||
const roctracer_properties_t* properties) // tracer pool properties
|
||||
const roctracer_properties_t* properties) // tracer pool properties
|
||||
{
|
||||
return roctracer_open_pool_expl(properties, NULL);
|
||||
return roctracer_open_pool_expl(properties, NULL);
|
||||
}
|
||||
// otherwise the error is generated
|
||||
// otherwise the error is generated
|
||||
|
||||
// Close tracer memory pool
|
||||
roctracer_status_t roctracer_close_pool_expl(
|
||||
roctracer_pool_t* pool); // [in] memory pool, NULL is a default one
|
||||
static inline roctracer_status_t roctracer_close_pool()
|
||||
{
|
||||
return roctracer_close_pool_expl(NULL);
|
||||
}
|
||||
roctracer_pool_t* pool); // [in] memory pool, NULL is a default one
|
||||
static inline roctracer_status_t roctracer_close_pool() { return roctracer_close_pool_expl(NULL); }
|
||||
|
||||
// Return current default pool
|
||||
// Set new default pool if the argument is not NULL
|
||||
roctracer_pool_t* roctracer_default_pool_expl(
|
||||
roctracer_pool_t* pool); // [in] new default pool if not NULL
|
||||
static inline roctracer_pool_t* roctracer_default_pool()
|
||||
{
|
||||
return roctracer_default_pool_expl(NULL);
|
||||
roctracer_pool_t* pool); // [in] new default pool if not NULL
|
||||
static inline roctracer_pool_t* roctracer_default_pool() {
|
||||
return roctracer_default_pool_expl(NULL);
|
||||
}
|
||||
|
||||
// Enable activity records logging
|
||||
roctracer_status_t roctracer_enable_op_activity_expl(
|
||||
activity_domain_t domain, // tracing domain
|
||||
uint32_t op, // activity op ID
|
||||
roctracer_pool_t* pool); // memory pool, NULL is a default one
|
||||
activity_domain_t domain, // tracing domain
|
||||
uint32_t op, // activity op ID
|
||||
roctracer_pool_t* pool); // memory pool, NULL is a default one
|
||||
static inline roctracer_status_t roctracer_enable_op_activity(
|
||||
activity_domain_t domain, // tracing domain
|
||||
uint32_t op) // activity op ID
|
||||
activity_domain_t domain, // tracing domain
|
||||
uint32_t op) // activity op ID
|
||||
{
|
||||
return roctracer_enable_op_activity_expl(domain, op, NULL);
|
||||
return roctracer_enable_op_activity_expl(domain, op, NULL);
|
||||
}
|
||||
roctracer_status_t roctracer_enable_domain_activity_expl(
|
||||
activity_domain_t domain, // tracing domain
|
||||
roctracer_pool_t* pool); // memory pool, NULL is a default one
|
||||
activity_domain_t domain, // tracing domain
|
||||
roctracer_pool_t* pool); // memory pool, NULL is a default one
|
||||
static inline roctracer_status_t roctracer_enable_domain_activity(
|
||||
activity_domain_t domain) // tracing domain
|
||||
activity_domain_t domain) // tracing domain
|
||||
{
|
||||
return roctracer_enable_domain_activity_expl(domain, NULL);
|
||||
return roctracer_enable_domain_activity_expl(domain, NULL);
|
||||
}
|
||||
roctracer_status_t roctracer_enable_activity_expl(
|
||||
roctracer_pool_t* pool); // memory pool, NULL is a default one
|
||||
static inline roctracer_status_t roctracer_enable_activity()
|
||||
{
|
||||
return roctracer_enable_activity_expl(NULL);
|
||||
roctracer_pool_t* pool); // memory pool, NULL is a default one
|
||||
static inline roctracer_status_t roctracer_enable_activity() {
|
||||
return roctracer_enable_activity_expl(NULL);
|
||||
}
|
||||
|
||||
// Disable activity records logging
|
||||
roctracer_status_t roctracer_disable_op_activity(
|
||||
activity_domain_t domain, // tracing domain
|
||||
uint32_t op); // activity op ID
|
||||
roctracer_status_t roctracer_disable_domain_activity(
|
||||
activity_domain_t domain); // tracing domain
|
||||
roctracer_status_t roctracer_disable_op_activity(activity_domain_t domain, // tracing domain
|
||||
uint32_t op); // activity op ID
|
||||
roctracer_status_t roctracer_disable_domain_activity(activity_domain_t domain); // tracing domain
|
||||
roctracer_status_t roctracer_disable_activity();
|
||||
|
||||
// Flush available activity records
|
||||
roctracer_status_t roctracer_flush_activity_expl(
|
||||
roctracer_pool_t* pool); // memory pool, NULL is a default one
|
||||
static inline roctracer_status_t roctracer_flush_activity()
|
||||
{
|
||||
return roctracer_flush_activity_expl(NULL);
|
||||
roctracer_pool_t* pool); // memory pool, NULL is a default one
|
||||
static inline roctracer_status_t roctracer_flush_activity() {
|
||||
return roctracer_flush_activity_expl(NULL);
|
||||
}
|
||||
|
||||
// Get system timestamp
|
||||
roctracer_status_t roctracer_get_timestamp(
|
||||
uint64_t* timestamp); // [out] return timestamp
|
||||
roctracer_status_t roctracer_get_timestamp(uint64_t* timestamp); // [out] return timestamp
|
||||
|
||||
// Load/Unload methods
|
||||
bool roctracer_load();
|
||||
@@ -254,9 +240,8 @@ void roctracer_unload();
|
||||
void roctracer_flush_buf();
|
||||
|
||||
// Set properties
|
||||
roctracer_status_t roctracer_set_properties(
|
||||
roctracer_domain_t domain, // tracing domain
|
||||
void* propertes); // tracing properties
|
||||
roctracer_status_t roctracer_set_properties(roctracer_domain_t domain, // tracing domain
|
||||
void* propertes); // tracing properties
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C" block
|
||||
|
||||
@@ -28,9 +28,8 @@
|
||||
namespace roctracer {
|
||||
|
||||
// Generic callbacks table
|
||||
template <int N>
|
||||
class CbTable {
|
||||
public:
|
||||
template <int N> class CbTable {
|
||||
public:
|
||||
typedef std::mutex mutex_t;
|
||||
|
||||
CbTable() {
|
||||
@@ -63,7 +62,7 @@ class CbTable {
|
||||
return ret;
|
||||
}
|
||||
|
||||
private:
|
||||
private:
|
||||
activity_rtapi_callback_t callback_[N];
|
||||
void* arg_[N];
|
||||
mutex_t mutex_;
|
||||
|
||||
@@ -62,7 +62,8 @@ roctracer_status_t roctracer_activity_push_external_correlation_id(activity_corr
|
||||
// Notifies that the calling thread is leaving an external API region.
|
||||
// Pop an external correlation id for the calling thread.
|
||||
// 'lastId' returns the last external correlation if not NULL
|
||||
roctracer_status_t roctracer_activity_pop_external_correlation_id(activity_correlation_id_t* last_id);
|
||||
roctracer_status_t roctracer_activity_pop_external_correlation_id(
|
||||
activity_correlation_id_t* last_id);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C" block
|
||||
|
||||
@@ -21,19 +21,14 @@
|
||||
#ifndef INC_ROCTRACER_HCC_H_
|
||||
#define INC_ROCTRACER_HCC_H_
|
||||
|
||||
enum {
|
||||
HIP_OP_ID_DISPATCH = 0,
|
||||
HIP_OP_ID_COPY = 1,
|
||||
HIP_OP_ID_BARRIER = 2,
|
||||
HIP_OP_ID_NUMBER = 3
|
||||
};
|
||||
enum { HIP_OP_ID_DISPATCH = 0, HIP_OP_ID_COPY = 1, HIP_OP_ID_BARRIER = 2, HIP_OP_ID_NUMBER = 3 };
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
typedef void (hipInitAsyncActivityCallback_t)(void* id_callback, void* op_callback, void* arg);
|
||||
typedef bool (hipEnableAsyncActivityCallback_t)(unsigned op, bool enable);
|
||||
typedef const char* (hipGetOpName_t)(unsigned op);
|
||||
typedef void(hipInitAsyncActivityCallback_t)(void* id_callback, void* op_callback, void* arg);
|
||||
typedef bool(hipEnableAsyncActivityCallback_t)(unsigned op, bool enable);
|
||||
typedef const char*(hipGetOpName_t)(unsigned op);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -25,12 +25,12 @@
|
||||
#include <iostream>
|
||||
|
||||
inline static std::ostream& operator<<(std::ostream& out, const unsigned char& v) {
|
||||
out << (unsigned int)v;
|
||||
out << (unsigned int)v;
|
||||
return out;
|
||||
}
|
||||
|
||||
inline static std::ostream& operator<<(std::ostream& out, const char& v) {
|
||||
out << (unsigned char)v;
|
||||
out << (unsigned char)v;
|
||||
return out;
|
||||
}
|
||||
#endif // __cplusplus
|
||||
|
||||
@@ -42,9 +42,7 @@ enum hsa_op_id_t {
|
||||
|
||||
namespace roctracer {
|
||||
namespace hsa_support {
|
||||
enum {
|
||||
HSA_OP_ID_async_copy = 0
|
||||
};
|
||||
enum { HSA_OP_ID_async_copy = 0 };
|
||||
|
||||
extern CoreApiTable CoreApiTable_saved;
|
||||
extern AmdExtTable AmdExtTable_saved;
|
||||
@@ -57,18 +55,18 @@ struct ops_properties_t {
|
||||
const char* output_prefix;
|
||||
};
|
||||
|
||||
}; // namespace hsa_support
|
||||
}; // namespace hsa_support
|
||||
|
||||
typedef hsa_support::ops_properties_t hsa_ops_properties_t;
|
||||
}; // namespace roctracer
|
||||
}; // namespace roctracer
|
||||
|
||||
#include "hsa_ostream_ops.h"
|
||||
|
||||
|
||||
#else // !__cplusplus
|
||||
#else // !__cplusplus
|
||||
typedef void* hsa_amd_queue_intercept_handler;
|
||||
typedef void* hsa_amd_runtime_queue_notifier;
|
||||
#endif //! __cplusplus
|
||||
#endif //! __cplusplus
|
||||
|
||||
#include <hsa_prof_str.h>
|
||||
#endif // INC_ROCTRACER_HSA_H_
|
||||
#endif // INC_ROCTRACER_HSA_H_
|
||||
|
||||
@@ -47,8 +47,8 @@ enum roctx_api_id_t {
|
||||
typedef struct roctx_api_data_s {
|
||||
union {
|
||||
struct {
|
||||
const char* message;
|
||||
roctx_range_id_t id;
|
||||
const char* message;
|
||||
roctx_range_id_t id;
|
||||
};
|
||||
struct {
|
||||
const char* message;
|
||||
|
||||
+2
-2
@@ -59,8 +59,8 @@ void roctxMarkA(const char* message);
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Ranges annotating API
|
||||
|
||||
// Returns the 0 based level of a nested range being started by given message associated to this range.
|
||||
// A negative value is returned on the error.
|
||||
// Returns the 0 based level of a nested range being started by given message associated to this
|
||||
// range. A negative value is returned on the error.
|
||||
int roctxRangePushA(const char* message);
|
||||
#define roctxRangePush(message) roctxRangePushA(message)
|
||||
|
||||
|
||||
@@ -27,7 +27,7 @@ namespace roctracer {
|
||||
enum { API_CB_MASK = 0x1, ACT_CB_MASK = 0x2 };
|
||||
|
||||
class hip_act_cb_tracker_t {
|
||||
private:
|
||||
private:
|
||||
std::map<uint32_t, uint32_t> data;
|
||||
|
||||
public:
|
||||
|
||||
+73
-63
@@ -25,29 +25,29 @@
|
||||
#include <mutex>
|
||||
#include <dlfcn.h>
|
||||
|
||||
#define ONLD_TRACE(str) \
|
||||
if (getenv("ROCP_ONLOAD_TRACE")) do { \
|
||||
std::cout << "PID(" << GetPid() << "): TRACER_LOADER::" << __FUNCTION__ << " " << str << std::endl << std::flush; \
|
||||
} while(0);
|
||||
#define ONLD_TRACE(str) \
|
||||
if (getenv("ROCP_ONLOAD_TRACE")) do { \
|
||||
std::cout << "PID(" << GetPid() << "): TRACER_LOADER::" << __FUNCTION__ << " " << str \
|
||||
<< std::endl \
|
||||
<< std::flush; \
|
||||
} while (0);
|
||||
|
||||
namespace roctracer {
|
||||
|
||||
// Base runtime loader class
|
||||
template <class T>
|
||||
class BaseLoader : public T {
|
||||
template <class T> class BaseLoader : public T {
|
||||
static uint32_t GetPid() { return syscall(__NR_getpid); }
|
||||
|
||||
public:
|
||||
public:
|
||||
typedef std::mutex mutex_t;
|
||||
typedef BaseLoader<T> loader_t;
|
||||
|
||||
bool Enabled() const { return (handle_ != NULL); }
|
||||
|
||||
template <class fun_t>
|
||||
fun_t* GetFun(const char* fun_name) {
|
||||
template <class fun_t> fun_t* GetFun(const char* fun_name) {
|
||||
if (handle_ == NULL) return NULL;
|
||||
|
||||
fun_t *f = (fun_t*) dlsym(handle_, fun_name);
|
||||
fun_t* f = (fun_t*)dlsym(handle_, fun_name);
|
||||
if ((to_check_symb_ == true) && (f == NULL)) {
|
||||
fprintf(stderr, "roctracer: symbol lookup '%s' failed: \"%s\"\n", fun_name, dlerror());
|
||||
abort();
|
||||
@@ -68,11 +68,11 @@ class BaseLoader : public T {
|
||||
}
|
||||
|
||||
static loader_t* GetRef() { return instance_; }
|
||||
static void SetLibName(const char *name) { lib_name_ = name; }
|
||||
static void SetLibName(const char* name) { lib_name_ = name; }
|
||||
|
||||
private:
|
||||
private:
|
||||
BaseLoader() {
|
||||
const int flags = (to_load_ == true) ? RTLD_LAZY : RTLD_LAZY|RTLD_NOLOAD;
|
||||
const int flags = (to_load_ == true) ? RTLD_LAZY : RTLD_LAZY | RTLD_NOLOAD;
|
||||
handle_ = dlopen(lib_name_, flags);
|
||||
ONLD_TRACE("(" << lib_name_ << " = " << handle_ << ")");
|
||||
if ((to_check_open_ == true) && (handle_ == NULL)) {
|
||||
@@ -99,14 +99,14 @@ class BaseLoader : public T {
|
||||
|
||||
// 'rocprofiler' library loader class
|
||||
class RocpApi {
|
||||
public:
|
||||
public:
|
||||
typedef BaseLoader<RocpApi> Loader;
|
||||
|
||||
typedef bool (RegisterCallback_t)(uint32_t op, void* callback, void* arg);
|
||||
typedef bool (OperateCallback_t)(uint32_t op);
|
||||
typedef bool (InitCallback_t)(void* callback, void* arg);
|
||||
typedef bool (EnableCallback_t)(uint32_t op, bool enable);
|
||||
typedef const char* (NameCallback_t)(uint32_t op);
|
||||
typedef bool(RegisterCallback_t)(uint32_t op, void* callback, void* arg);
|
||||
typedef bool(OperateCallback_t)(uint32_t op);
|
||||
typedef bool(InitCallback_t)(void* callback, void* arg);
|
||||
typedef bool(EnableCallback_t)(uint32_t op, bool enable);
|
||||
typedef const char*(NameCallback_t)(uint32_t op);
|
||||
|
||||
RegisterCallback_t* RegisterApiCallback;
|
||||
OperateCallback_t* RemoveApiCallback;
|
||||
@@ -118,7 +118,7 @@ class RocpApi {
|
||||
OperateCallback_t* RemoveEvtCallback;
|
||||
NameCallback_t* GetEvtName;
|
||||
|
||||
protected:
|
||||
protected:
|
||||
void init(Loader* loader) {
|
||||
RegisterApiCallback = loader->GetFun<RegisterCallback_t>("RegisterApiCallback");
|
||||
RemoveApiCallback = loader->GetFun<OperateCallback_t>("RemoveApiCallback");
|
||||
@@ -134,29 +134,36 @@ class RocpApi {
|
||||
|
||||
// HIP runtime library loader class
|
||||
#if STATIC_BUILD
|
||||
__attribute__((weak)) hipError_t hipRegisterApiCallback(uint32_t id, void* fun, void* arg) { return hipErrorUnknown; }
|
||||
__attribute__((weak)) hipError_t hipRegisterApiCallback(uint32_t id, void* fun, void* arg) {
|
||||
return hipErrorUnknown;
|
||||
}
|
||||
__attribute__((weak)) hipError_t hipRemoveApiCallback(uint32_t id) { return hipErrorUnknown; }
|
||||
__attribute__((weak)) hipError_t hipRegisterActivityCallback(uint32_t id, void* fun, void* arg) { return hipErrorUnknown; }
|
||||
__attribute__((weak)) hipError_t hipRegisterActivityCallback(uint32_t id, void* fun, void* arg) {
|
||||
return hipErrorUnknown;
|
||||
}
|
||||
__attribute__((weak)) hipError_t hipRemoveActivityCallback(uint32_t id) { return hipErrorUnknown; }
|
||||
__attribute__((weak)) const char* hipKernelNameRef(const hipFunction_t f) { return NULL; }
|
||||
__attribute__((weak)) const char* hipKernelNameRefByPtr(const void* hostFunction, hipStream_t stream) { return NULL; }
|
||||
__attribute__((weak)) const char* hipKernelNameRefByPtr(const void* hostFunction,
|
||||
hipStream_t stream) {
|
||||
return NULL;
|
||||
}
|
||||
__attribute__((weak)) int hipGetStreamDeviceId(hipStream_t stream) { return 0; }
|
||||
__attribute__((weak)) const char* hipApiName(uint32_t id) { return NULL; }
|
||||
|
||||
class HipLoaderStatic {
|
||||
public:
|
||||
public:
|
||||
typedef std::mutex mutex_t;
|
||||
typedef HipLoaderStatic loader_t;
|
||||
typedef std::atomic<loader_t*> instance_t;
|
||||
|
||||
typedef hipError_t (RegisterApiCallback_t)(uint32_t id, void* fun, void* arg);
|
||||
typedef hipError_t (RemoveApiCallback_t)(uint32_t id);
|
||||
typedef hipError_t (RegisterActivityCallback_t)(uint32_t id, void* fun, void* arg);
|
||||
typedef hipError_t (RemoveActivityCallback_t)(uint32_t id);
|
||||
typedef const char* (KernelNameRef_t)(const hipFunction_t f);
|
||||
typedef const char* (KernelNameRefByPtr_t)(const void* hostFunction, hipStream_t stream);
|
||||
typedef int (GetStreamDeviceId_t)(hipStream_t stream);
|
||||
typedef const char* (ApiName_t)(uint32_t id);
|
||||
typedef hipError_t(RegisterApiCallback_t)(uint32_t id, void* fun, void* arg);
|
||||
typedef hipError_t(RemoveApiCallback_t)(uint32_t id);
|
||||
typedef hipError_t(RegisterActivityCallback_t)(uint32_t id, void* fun, void* arg);
|
||||
typedef hipError_t(RemoveActivityCallback_t)(uint32_t id);
|
||||
typedef const char*(KernelNameRef_t)(const hipFunction_t f);
|
||||
typedef const char*(KernelNameRefByPtr_t)(const void* hostFunction, hipStream_t stream);
|
||||
typedef int(GetStreamDeviceId_t)(hipStream_t stream);
|
||||
typedef const char*(ApiName_t)(uint32_t id);
|
||||
|
||||
RegisterApiCallback_t* RegisterApiCallback;
|
||||
RemoveApiCallback_t* RemoveApiCallback;
|
||||
@@ -181,7 +188,7 @@ class HipLoaderStatic {
|
||||
|
||||
bool Enabled() const { return true; }
|
||||
|
||||
private:
|
||||
private:
|
||||
HipLoaderStatic() {
|
||||
RegisterApiCallback = hipRegisterApiCallback;
|
||||
RemoveApiCallback = hipRemoveApiCallback;
|
||||
@@ -189,7 +196,7 @@ class HipLoaderStatic {
|
||||
RemoveActivityCallback = hipRemoveActivityCallback;
|
||||
KernelNameRef = hipKernelNameRef;
|
||||
KernelNameRefByPtr = hipKernelNameRefByPtr;
|
||||
GetStreamDeviceId = hipGetStreamDeviceId;
|
||||
GetStreamDeviceId = hipGetStreamDeviceId;
|
||||
ApiName = hipApiName;
|
||||
}
|
||||
|
||||
@@ -198,7 +205,7 @@ class HipLoaderStatic {
|
||||
};
|
||||
#else
|
||||
class HipApi {
|
||||
public:
|
||||
public:
|
||||
typedef BaseLoader<HipApi> Loader;
|
||||
|
||||
typedef decltype(hipRegisterApiCallback) RegisterApiCallback_t;
|
||||
@@ -219,15 +226,16 @@ class HipApi {
|
||||
GetStreamDeviceId_t* GetStreamDeviceId;
|
||||
ApiName_t* ApiName;
|
||||
|
||||
protected:
|
||||
protected:
|
||||
void init(Loader* loader) {
|
||||
RegisterApiCallback = loader->GetFun<RegisterApiCallback_t>("hipRegisterApiCallback");
|
||||
RemoveApiCallback = loader->GetFun<RemoveApiCallback_t>("hipRemoveApiCallback");
|
||||
RegisterActivityCallback = loader->GetFun<RegisterActivityCallback_t>("hipRegisterActivityCallback");
|
||||
RegisterActivityCallback =
|
||||
loader->GetFun<RegisterActivityCallback_t>("hipRegisterActivityCallback");
|
||||
RemoveActivityCallback = loader->GetFun<RemoveActivityCallback_t>("hipRemoveActivityCallback");
|
||||
KernelNameRef = loader->GetFun<KernelNameRef_t>("hipKernelNameRef");
|
||||
KernelNameRefByPtr = loader->GetFun<KernelNameRefByPtr_t>("hipKernelNameRefByPtr");
|
||||
GetStreamDeviceId = loader->GetFun<GetStreamDeviceId_t>("hipGetStreamDeviceId");
|
||||
GetStreamDeviceId = loader->GetFun<GetStreamDeviceId_t>("hipGetStreamDeviceId");
|
||||
ApiName = loader->GetFun<ApiName_t>("hipApiName");
|
||||
}
|
||||
};
|
||||
@@ -236,17 +244,19 @@ class HipApi {
|
||||
// HCC runtime library loader class
|
||||
#include "inc/roctracer_hcc.h"
|
||||
class HccApi {
|
||||
public:
|
||||
public:
|
||||
typedef BaseLoader<HccApi> Loader;
|
||||
|
||||
hipInitAsyncActivityCallback_t* InitActivityCallback;
|
||||
hipEnableAsyncActivityCallback_t* EnableActivityCallback;
|
||||
hipGetOpName_t* GetOpName;
|
||||
|
||||
protected:
|
||||
protected:
|
||||
void init(Loader* loader) {
|
||||
InitActivityCallback = loader->GetFun<hipInitAsyncActivityCallback_t>("hipInitActivityCallback");
|
||||
EnableActivityCallback = loader->GetFun<hipEnableAsyncActivityCallback_t>("hipEnableActivityCallback");
|
||||
InitActivityCallback =
|
||||
loader->GetFun<hipInitAsyncActivityCallback_t>("hipInitActivityCallback");
|
||||
EnableActivityCallback =
|
||||
loader->GetFun<hipEnableAsyncActivityCallback_t>("hipEnableActivityCallback");
|
||||
GetOpName = loader->GetFun<hipGetOpName_t>("hipGetCmdName");
|
||||
}
|
||||
};
|
||||
@@ -254,7 +264,7 @@ class HccApi {
|
||||
// rocTX runtime library loader class
|
||||
#include "inc/roctracer_roctx.h"
|
||||
class RocTxApi {
|
||||
public:
|
||||
public:
|
||||
typedef BaseLoader<RocTxApi> Loader;
|
||||
|
||||
typedef decltype(RegisterApiCallback) RegisterApiCallback_t;
|
||||
@@ -265,7 +275,7 @@ class RocTxApi {
|
||||
RemoveApiCallback_t* RemoveApiCallback;
|
||||
RangeStackIterate_t* RangeStackIterate;
|
||||
|
||||
protected:
|
||||
protected:
|
||||
void init(Loader* loader) {
|
||||
RegisterApiCallback = loader->GetFun<RegisterApiCallback_t>("RegisterApiCallback");
|
||||
RemoveApiCallback = loader->GetFun<RemoveApiCallback_t>("RemoveApiCallback");
|
||||
@@ -284,31 +294,31 @@ typedef BaseLoader<HipApi> HipLoaderShared;
|
||||
typedef HipLoaderShared HipLoader;
|
||||
#endif
|
||||
|
||||
} // namespace roctracer
|
||||
} // namespace roctracer
|
||||
|
||||
#define LOADER_INSTANTIATE_2() \
|
||||
template<class T> typename roctracer::BaseLoader<T>::mutex_t roctracer::BaseLoader<T>::mutex_; \
|
||||
template<class T> std::atomic<roctracer::BaseLoader<T>*> roctracer::BaseLoader<T>::instance_{}; \
|
||||
template<class T> bool roctracer::BaseLoader<T>::to_load_ = false; \
|
||||
template<class T> bool roctracer::BaseLoader<T>::to_check_open_ = true; \
|
||||
template<class T> bool roctracer::BaseLoader<T>::to_check_symb_ = true; \
|
||||
template<> const char* roctracer::RocpLoader::lib_name_ = "librocprofiler64.so"; \
|
||||
template<> bool roctracer::RocpLoader::to_load_ = true; \
|
||||
template<> const char* roctracer::HccLoader::lib_name_ = "libamdhip64.so"; \
|
||||
template<> const char* roctracer::RocTxLoader::lib_name_ = "libroctx64.so"; \
|
||||
template<> bool roctracer::RocTxLoader::to_load_ = true;
|
||||
#define LOADER_INSTANTIATE_2() \
|
||||
template <class T> typename roctracer::BaseLoader<T>::mutex_t roctracer::BaseLoader<T>::mutex_; \
|
||||
template <class T> std::atomic<roctracer::BaseLoader<T>*> roctracer::BaseLoader<T>::instance_{}; \
|
||||
template <class T> bool roctracer::BaseLoader<T>::to_load_ = false; \
|
||||
template <class T> bool roctracer::BaseLoader<T>::to_check_open_ = true; \
|
||||
template <class T> bool roctracer::BaseLoader<T>::to_check_symb_ = true; \
|
||||
template <> const char* roctracer::RocpLoader::lib_name_ = "librocprofiler64.so"; \
|
||||
template <> bool roctracer::RocpLoader::to_load_ = true; \
|
||||
template <> const char* roctracer::HccLoader::lib_name_ = "libamdhip64.so"; \
|
||||
template <> const char* roctracer::RocTxLoader::lib_name_ = "libroctx64.so"; \
|
||||
template <> bool roctracer::RocTxLoader::to_load_ = true;
|
||||
|
||||
#if STATIC_BUILD
|
||||
#define LOADER_INSTANTIATE_HIP() \
|
||||
roctracer::HipLoaderStatic::mutex_t roctracer::HipLoaderStatic::mutex_; \
|
||||
#define LOADER_INSTANTIATE_HIP() \
|
||||
roctracer::HipLoaderStatic::mutex_t roctracer::HipLoaderStatic::mutex_; \
|
||||
roctracer::HipLoaderStatic::instance_t roctracer::HipLoaderStatic::instance_{};
|
||||
#else
|
||||
#define LOADER_INSTANTIATE_HIP() \
|
||||
template<> const char* roctracer::HipLoaderShared::lib_name_ = "libamdhip64.so";
|
||||
#define LOADER_INSTANTIATE_HIP() \
|
||||
template <> const char* roctracer::HipLoaderShared::lib_name_ = "libamdhip64.so";
|
||||
#endif
|
||||
|
||||
#define LOADER_INSTANTIATE() \
|
||||
LOADER_INSTANTIATE_2(); \
|
||||
#define LOADER_INSTANTIATE() \
|
||||
LOADER_INSTANTIATE_2(); \
|
||||
LOADER_INSTANTIATE_HIP();
|
||||
|
||||
#endif // SRC_CORE_LOADER_H_
|
||||
#endif // SRC_CORE_LOADER_H_
|
||||
|
||||
+15
-15
@@ -42,7 +42,7 @@
|
||||
namespace roctracer {
|
||||
|
||||
class MemoryPool {
|
||||
public:
|
||||
public:
|
||||
typedef std::mutex mutex_t;
|
||||
|
||||
static void allocator_default(char** ptr, size_t size, void* arg) {
|
||||
@@ -52,12 +52,12 @@ class MemoryPool {
|
||||
} else if (size != 0) {
|
||||
*ptr = reinterpret_cast<char*>(realloc(*ptr, size));
|
||||
} else {
|
||||
free(*ptr);
|
||||
free(*ptr);
|
||||
*ptr = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
MemoryPool(const roctracer_properties_t& properties) {
|
||||
MemoryPool(const roctracer_properties_t& properties) {
|
||||
// Assigning pool allocator
|
||||
alloc_fun_ = allocator_default;
|
||||
alloc_arg_ = NULL;
|
||||
@@ -89,14 +89,14 @@ class MemoryPool {
|
||||
~MemoryPool() {
|
||||
Flush();
|
||||
PTHREAD_CALL(pthread_cancel(consumer_thread_));
|
||||
void *res;
|
||||
void* res;
|
||||
PTHREAD_CALL(pthread_join(consumer_thread_, &res));
|
||||
if (res != PTHREAD_CANCELED) EXC_ABORT(ROCTRACER_STATUS_ERROR, "consumer thread wasn't stopped correctly");
|
||||
if (res != PTHREAD_CANCELED)
|
||||
EXC_ABORT(ROCTRACER_STATUS_ERROR, "consumer thread wasn't stopped correctly");
|
||||
allocator_default(&pool_begin_, 0, alloc_arg_);
|
||||
}
|
||||
|
||||
template <typename Record>
|
||||
void Write(const Record& record) {
|
||||
template <typename Record> void Write(const Record& record) {
|
||||
std::lock_guard<mutex_t> lock(write_mutex_);
|
||||
getRecord<Record>(record);
|
||||
}
|
||||
@@ -112,7 +112,7 @@ class MemoryPool {
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
private:
|
||||
struct consumer_arg_t {
|
||||
MemoryPool* obj;
|
||||
const char* begin;
|
||||
@@ -126,11 +126,13 @@ class MemoryPool {
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Record>
|
||||
Record* getRecord(const Record& init) {
|
||||
template <typename Record> Record* getRecord(const Record& init) {
|
||||
char* next = write_ptr_ + sizeof(Record);
|
||||
if (next > buffer_end_) {
|
||||
if (write_ptr_ == buffer_begin_) EXC_ABORT(ROCTRACER_STATUS_ERROR, "buffer size(" << buffer_size_ << ") is less then the record(" << sizeof(Record) << ")");
|
||||
if (write_ptr_ == buffer_begin_)
|
||||
EXC_ABORT(ROCTRACER_STATUS_ERROR,
|
||||
"buffer size(" << buffer_size_ << ") is less then the record(" << sizeof(Record)
|
||||
<< ")");
|
||||
spawn_reader(buffer_begin_, write_ptr_);
|
||||
buffer_begin_ = (buffer_end_ == pool_end_) ? pool_begin_ : buffer_end_;
|
||||
buffer_end_ = buffer_begin_ + buffer_size_;
|
||||
@@ -145,12 +147,10 @@ class MemoryPool {
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static void reset_reader(consumer_arg_t* arg) {
|
||||
arg->valid.store(false);
|
||||
}
|
||||
static void reset_reader(consumer_arg_t* arg) { arg->valid.store(false); }
|
||||
|
||||
static void sync_reader(const consumer_arg_t* arg) {
|
||||
while(arg->valid.load() == true) PTHREAD_CALL(sched_yield());
|
||||
while (arg->valid.load() == true) PTHREAD_CALL(sched_yield());
|
||||
}
|
||||
|
||||
static void* reader_fun(void* consumer_arg) {
|
||||
|
||||
+267
-258
@@ -66,7 +66,6 @@
|
||||
#define API_METHOD_PREFIX \
|
||||
roctracer_status_t err = ROCTRACER_STATUS_SUCCESS; \
|
||||
try {
|
||||
|
||||
#define API_METHOD_SUFFIX \
|
||||
} \
|
||||
catch (std::exception & e) { \
|
||||
@@ -83,10 +82,12 @@
|
||||
(void)err; \
|
||||
return X;
|
||||
|
||||
#define ONLOAD_TRACE(str) \
|
||||
if (getenv("ROCP_ONLOAD_TRACE")) do { \
|
||||
std::cout << "PID(" << GetPid() << "): TRACER_LIB::" << __FUNCTION__ << " " << str << std::endl << std::flush; \
|
||||
} while(0);
|
||||
#define ONLOAD_TRACE(str) \
|
||||
if (getenv("ROCP_ONLOAD_TRACE")) do { \
|
||||
std::cout << "PID(" << GetPid() << "): TRACER_LIB::" << __FUNCTION__ << " " << str \
|
||||
<< std::endl \
|
||||
<< std::flush; \
|
||||
} while (0);
|
||||
#define ONLOAD_TRACE_BEG() ONLOAD_TRACE("begin")
|
||||
#define ONLOAD_TRACE_END() ONLOAD_TRACE("end")
|
||||
|
||||
@@ -95,7 +96,8 @@ static inline uint32_t GetPid() { return syscall(__NR_getpid); }
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Mark callback
|
||||
//
|
||||
typedef void (mark_api_callback_t)(uint32_t domain, uint32_t cid, const void* callback_data, void* arg);
|
||||
typedef void(mark_api_callback_t)(uint32_t domain, uint32_t cid, const void* callback_data,
|
||||
void* arg);
|
||||
mark_api_callback_t* mark_api_callback_ptr = NULL;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@@ -195,7 +197,8 @@ ActJournal* act_journal;
|
||||
template <typename Functor> struct journal_functor_t {
|
||||
Functor func_;
|
||||
journal_functor_t(Functor&& f) : func_(std::forward<Functor>(f)) {}
|
||||
template <typename Data> bool operator ()(activity_domain_t domain, uint32_t op, Data&& /* data */) const {
|
||||
template <typename Data>
|
||||
bool operator()(activity_domain_t domain, uint32_t op, Data&& /* data */) const {
|
||||
func_(domain, op);
|
||||
return true;
|
||||
}
|
||||
@@ -208,13 +211,13 @@ using act_dis_functor_t = journal_functor_t<roctracer_disable_op_activity_t>;
|
||||
|
||||
template <>
|
||||
template <typename Data>
|
||||
bool cb_en_functor_t::operator ()(activity_domain_t domain, uint32_t op, Data&& data) const {
|
||||
bool cb_en_functor_t::operator()(activity_domain_t domain, uint32_t op, Data&& data) const {
|
||||
func_(domain, op, data.callback, data.user_data);
|
||||
return true;
|
||||
}
|
||||
template <>
|
||||
template <typename Data>
|
||||
bool act_en_functor_t::operator ()(activity_domain_t domain, uint32_t op, Data&& data) const {
|
||||
bool act_en_functor_t::operator()(activity_domain_t domain, uint32_t op, Data&& data) const {
|
||||
func_(domain, op, data.pool);
|
||||
return true;
|
||||
}
|
||||
@@ -222,9 +225,7 @@ bool act_en_functor_t::operator ()(activity_domain_t domain, uint32_t op, Data&&
|
||||
void hsa_async_copy_handler(::proxy::Tracker::entry_t* entry);
|
||||
void hsa_kernel_handler(::proxy::Tracker::entry_t* entry);
|
||||
constexpr TraceBuffer<trace_entry_t>::flush_prm_t trace_buffer_prm[] = {
|
||||
{COPY_ENTRY_TYPE, hsa_async_copy_handler},
|
||||
{KERNEL_ENTRY_TYPE, hsa_kernel_handler}
|
||||
};
|
||||
{COPY_ENTRY_TYPE, hsa_async_copy_handler}, {KERNEL_ENTRY_TYPE, hsa_kernel_handler}};
|
||||
TraceBuffer<trace_entry_t>* trace_buffer = NULL;
|
||||
|
||||
namespace hsa_support {
|
||||
@@ -250,18 +251,19 @@ roctracer_stop_cb_t roctracer_stop_cb = NULL;
|
||||
|
||||
roctracer_status_t GetExcStatus(const std::exception& e) {
|
||||
const util::exception* roctracer_exc_ptr = dynamic_cast<const util::exception*>(&e);
|
||||
return (roctracer_exc_ptr) ? static_cast<roctracer_status_t>(roctracer_exc_ptr->status()) : ROCTRACER_STATUS_ERROR;
|
||||
return (roctracer_exc_ptr) ? static_cast<roctracer_status_t>(roctracer_exc_ptr->status())
|
||||
: ROCTRACER_STATUS_ERROR;
|
||||
}
|
||||
|
||||
class GlobalCounter {
|
||||
public:
|
||||
public:
|
||||
typedef std::mutex mutex_t;
|
||||
typedef uint64_t counter_t;
|
||||
typedef std::atomic<counter_t> atomic_counter_t;
|
||||
|
||||
static counter_t Increment() { return counter_.fetch_add(1, std::memory_order_relaxed); }
|
||||
|
||||
private:
|
||||
private:
|
||||
static mutex_t mutex_;
|
||||
static atomic_counter_t counter_;
|
||||
};
|
||||
@@ -273,12 +275,12 @@ struct roctracer_api_data_t {
|
||||
union {
|
||||
hip_api_data_t hip;
|
||||
};
|
||||
roctracer_api_data_t() {};
|
||||
roctracer_api_data_t(){};
|
||||
};
|
||||
struct record_pair_t {
|
||||
roctracer_record_t record;
|
||||
roctracer_api_data_t data;
|
||||
record_pair_t() {};
|
||||
record_pair_t(){};
|
||||
};
|
||||
typedef std::stack<record_pair_t> record_pair_stack_t;
|
||||
static thread_local record_pair_stack_t* record_pair_stack = NULL;
|
||||
@@ -295,15 +297,18 @@ static thread_local std::stack<activity_correlation_id_t> external_id_stack;
|
||||
static inline void CorrelationIdRegistr(const activity_correlation_id_t& correlation_id) {
|
||||
std::lock_guard<correlation_id_mutex_t> lck(correlation_id_mutex);
|
||||
const auto ret = correlation_id_map.insert({correlation_id, correlation_id_tls});
|
||||
if (ret.second == false) EXC_ABORT(ROCTRACER_STATUS_ERROR, "HCC activity id is not unique(" << correlation_id << ")");
|
||||
if (ret.second == false)
|
||||
EXC_ABORT(ROCTRACER_STATUS_ERROR, "HCC activity id is not unique(" << correlation_id << ")");
|
||||
|
||||
DEBUG_TRACE("CorrelationIdRegistr id(%lu) id_tls(%lu)\n", correlation_id, correlation_id_tls);
|
||||
}
|
||||
|
||||
static inline activity_correlation_id_t CorrelationIdLookup(const activity_correlation_id_t& correlation_id) {
|
||||
static inline activity_correlation_id_t CorrelationIdLookup(
|
||||
const activity_correlation_id_t& correlation_id) {
|
||||
std::lock_guard<correlation_id_mutex_t> lck(correlation_id_mutex);
|
||||
auto it = correlation_id_map.find(correlation_id);
|
||||
if (it == correlation_id_map.end()) EXC_ABORT(ROCTRACER_STATUS_ERROR, "HCC activity id lookup failed(" << correlation_id << ")");
|
||||
if (it == correlation_id_map.end())
|
||||
EXC_ABORT(ROCTRACER_STATUS_ERROR, "HCC activity id lookup failed(" << correlation_id << ")");
|
||||
const activity_correlation_id_t ret_val = it->second;
|
||||
correlation_id_map.erase(it);
|
||||
|
||||
@@ -320,14 +325,14 @@ hip_act_cb_tracker_t* hip_act_cb_tracker = NULL;
|
||||
inline uint32_t HipApiActivityEnableCheck(uint32_t op) {
|
||||
if (hip_act_cb_tracker == NULL) EXC_ABORT(ROCTRACER_STATUS_ERROR, "hip_act_cb_tracker is NULL");
|
||||
const uint32_t mask = hip_act_cb_tracker->enable_check(op, API_CB_MASK);
|
||||
const uint32_t ret = (mask & ACT_CB_MASK);
|
||||
const uint32_t ret = (mask & ACT_CB_MASK);
|
||||
return ret;
|
||||
}
|
||||
|
||||
inline uint32_t HipApiActivityDisableCheck(uint32_t op) {
|
||||
if (hip_act_cb_tracker == NULL) EXC_ABORT(ROCTRACER_STATUS_ERROR, "hip_act_cb_tracker is NULL");
|
||||
const uint32_t mask = hip_act_cb_tracker->disable_check(op, API_CB_MASK);
|
||||
const uint32_t ret = (mask & ACT_CB_MASK);
|
||||
const uint32_t ret = (mask & ACT_CB_MASK);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -340,16 +345,12 @@ inline uint32_t HipActActivityEnableCheck(uint32_t op) {
|
||||
inline uint32_t HipActActivityDisableCheck(uint32_t op) {
|
||||
if (hip_act_cb_tracker == NULL) EXC_ABORT(ROCTRACER_STATUS_ERROR, "hip_act_cb_tracker is NULL");
|
||||
const uint32_t mask = hip_act_cb_tracker->disable_check(op, ACT_CB_MASK);
|
||||
const uint32_t ret = (mask & API_CB_MASK);
|
||||
const uint32_t ret = (mask & API_CB_MASK);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void* HIP_SyncApiDataCallback(
|
||||
uint32_t op_id,
|
||||
roctracer_record_t* record,
|
||||
const void* callback_data,
|
||||
void* arg)
|
||||
{
|
||||
void* HIP_SyncApiDataCallback(uint32_t op_id, roctracer_record_t* record, const void* callback_data,
|
||||
void* arg) {
|
||||
static hsa_rt_utils::Timer timer;
|
||||
if (record_pair_stack == NULL) record_pair_stack = new record_pair_stack_t;
|
||||
|
||||
@@ -397,19 +398,18 @@ void* HIP_SyncApiDataCallback(
|
||||
correlation_id_tls = 0;
|
||||
}
|
||||
|
||||
const char * name = roctracer_op_string(ACTIVITY_DOMAIN_HIP_API, op_id, 0);
|
||||
DEBUG_TRACE("HIP_SyncApiDataCallback(\"%s\") phase(%d): op(%u) record(%p) data(%p) pool(%p) depth(%d) correlation_id(%lu) time_ns(%lu)\n",
|
||||
name, phase, op_id, record, data, pool, (int)(record_pair_stack->size()), (data_ptr) ? data_ptr->correlation_id : 0, timer.timestamp_ns());
|
||||
const char* name = roctracer_op_string(ACTIVITY_DOMAIN_HIP_API, op_id, 0);
|
||||
DEBUG_TRACE(
|
||||
"HIP_SyncApiDataCallback(\"%s\") phase(%d): op(%u) record(%p) data(%p) pool(%p) depth(%d) "
|
||||
"correlation_id(%lu) time_ns(%lu)\n",
|
||||
name, phase, op_id, record, data, pool, (int)(record_pair_stack->size()),
|
||||
(data_ptr) ? data_ptr->correlation_id : 0, timer.timestamp_ns());
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void* HIP_SyncActivityCallback(
|
||||
uint32_t op_id,
|
||||
roctracer_record_t* record,
|
||||
const void* callback_data,
|
||||
void* arg)
|
||||
{
|
||||
void* HIP_SyncActivityCallback(uint32_t op_id, roctracer_record_t* record,
|
||||
const void* callback_data, void* arg) {
|
||||
static hsa_rt_utils::Timer timer;
|
||||
const timestamp_t timestamp_ns = timer.timestamp_ns();
|
||||
if (record_pair_stack == NULL) record_pair_stack = new record_pair_stack_t;
|
||||
@@ -462,8 +462,9 @@ void* HIP_SyncActivityCallback(
|
||||
|
||||
// Getting record of stacked
|
||||
if (record == NULL) {
|
||||
if (record_pair_stack->empty()) EXC_ABORT(ROCTRACER_STATUS_ERROR, "ActivityCallback exit: record stack is empty");
|
||||
auto& top = record_pair_stack->top();
|
||||
if (record_pair_stack->empty())
|
||||
EXC_ABORT(ROCTRACER_STATUS_ERROR, "ActivityCallback exit: record stack is empty");
|
||||
auto& top = record_pair_stack->top();
|
||||
record = &(top.record);
|
||||
}
|
||||
|
||||
@@ -491,9 +492,12 @@ void* HIP_SyncActivityCallback(
|
||||
correlation_id_tls = 0;
|
||||
}
|
||||
|
||||
const char * name = roctracer_op_string(ACTIVITY_DOMAIN_HIP_API, op_id, 0);
|
||||
DEBUG_TRACE("HIP_SyncActivityCallback(\"%s\") phase(%d): op(%u) record(%p) data(%p) pool(%p) depth(%d) correlation_id(%lu) beg_ns(%lu) end_ns(%lu)\n",
|
||||
name, phase, op_id, record, data, pool, (int)(record_pair_stack->size()), (data_ptr) ? data_ptr->correlation_id : 0, timestamp_ns);
|
||||
const char* name = roctracer_op_string(ACTIVITY_DOMAIN_HIP_API, op_id, 0);
|
||||
DEBUG_TRACE(
|
||||
"HIP_SyncActivityCallback(\"%s\") phase(%d): op(%u) record(%p) data(%p) pool(%p) depth(%d) "
|
||||
"correlation_id(%lu) beg_ns(%lu) end_ns(%lu)\n",
|
||||
name, phase, op_id, record, data, pool, (int)(record_pair_stack->size()),
|
||||
(data_ptr) ? data_ptr->correlation_id : 0, timestamp_ns);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -510,9 +514,12 @@ void HCC_AsyncActivityCallback(uint32_t op_id, void* record, void* arg) {
|
||||
if (record_ptr->correlation_id == 0) return;
|
||||
pool->Write(*record_ptr);
|
||||
|
||||
const char * name = roctracer_op_string(ACTIVITY_DOMAIN_HCC_OPS, record_ptr->op, record_ptr->kind);
|
||||
DEBUG_TRACE("HCC_AsyncActivityCallback(\"%s\"): op(%u) kind(%u) record(%p) pool(%p) correlation_id(%d) beg_ns(%lu) end_ns(%lu)\n",
|
||||
name, record_ptr->op, record_ptr->kind, record, pool, record_ptr->correlation_id, record_ptr->begin_ns, record_ptr->end_ns);
|
||||
const char* name = roctracer_op_string(ACTIVITY_DOMAIN_HCC_OPS, record_ptr->op, record_ptr->kind);
|
||||
DEBUG_TRACE(
|
||||
"HCC_AsyncActivityCallback(\"%s\"): op(%u) kind(%u) record(%p) pool(%p) correlation_id(%d) "
|
||||
"beg_ns(%lu) end_ns(%lu)\n",
|
||||
name, record_ptr->op, record_ptr->kind, record, pool, record_ptr->correlation_id,
|
||||
record_ptr->begin_ns, record_ptr->end_ns);
|
||||
}
|
||||
|
||||
// Open output file
|
||||
@@ -528,7 +535,8 @@ FILE* open_output_file(const char* prefix, const char* name) {
|
||||
perror(errmsg.str().c_str());
|
||||
abort();
|
||||
}
|
||||
} else file_handle = stdout;
|
||||
} else
|
||||
file_handle = stdout;
|
||||
return file_handle;
|
||||
}
|
||||
|
||||
@@ -542,16 +550,11 @@ void hsa_kernel_handler(::proxy::Tracker::entry_t* entry) {
|
||||
if (index == 0) {
|
||||
kernel_file_handle = open_output_file(hsa_support::output_prefix, "results.txt");
|
||||
}
|
||||
fprintf(kernel_file_handle, "dispatch[%lu], gpu-id(%u), tid(%u), kernel-name(\"%s\"), time(%lu,%lu,%lu,%lu)\n",
|
||||
index,
|
||||
//::util::HsaRsrcFactory::Instance().GetAgentInfo(entry->agent)->dev_index,
|
||||
entry->dev_index,
|
||||
entry->kernel.tid,
|
||||
entry->kernel.name,
|
||||
entry->dispatch,
|
||||
entry->begin,
|
||||
entry->end,
|
||||
entry->complete);
|
||||
fprintf(kernel_file_handle,
|
||||
"dispatch[%lu], gpu-id(%u), tid(%u), kernel-name(\"%s\"), time(%lu,%lu,%lu,%lu)\n", index,
|
||||
//::util::HsaRsrcFactory::Instance().GetAgentInfo(entry->agent)->dev_index,
|
||||
entry->dev_index, entry->kernel.tid, entry->kernel.name, entry->dispatch, entry->begin,
|
||||
entry->end, entry->complete);
|
||||
#if 0
|
||||
fprintf(file_handle, "dispatch[%u], gpu-id(%u), queue-id(%u), queue-index(%lu), tid(%lu), grd(%u), wgr(%u), lds(%u), scr(%u), vgpr(%u), sgpr(%u), fbar(%u), sig(0x%lx), kernel-name(\"%s\")",
|
||||
index,
|
||||
@@ -581,32 +584,29 @@ void hsa_kernel_handler(::proxy::Tracker::entry_t* entry) {
|
||||
|
||||
void hsa_async_copy_handler(::proxy::Tracker::entry_t* entry) {
|
||||
activity_record_t record{};
|
||||
record.domain = ACTIVITY_DOMAIN_HSA_OPS; // activity domain id
|
||||
record.begin_ns = entry->begin; // host begin timestamp
|
||||
record.end_ns = entry->end; // host end timestamp
|
||||
record.device_id = 0; // device id
|
||||
record.domain = ACTIVITY_DOMAIN_HSA_OPS; // activity domain id
|
||||
record.begin_ns = entry->begin; // host begin timestamp
|
||||
record.end_ns = entry->end; // host end timestamp
|
||||
record.device_id = 0; // device id
|
||||
|
||||
hsa_support::async_copy_callback_fun(hsa_support::HSA_OP_ID_async_copy, &record, hsa_support::async_copy_callback_arg);
|
||||
hsa_support::async_copy_callback_fun(hsa_support::HSA_OP_ID_async_copy, &record,
|
||||
hsa_support::async_copy_callback_arg);
|
||||
}
|
||||
|
||||
hsa_status_t hsa_amd_memory_async_copy_interceptor(
|
||||
void* dst, hsa_agent_t dst_agent, const void* src,
|
||||
hsa_agent_t src_agent, size_t size, uint32_t num_dep_signals,
|
||||
const hsa_signal_t* dep_signals, hsa_signal_t completion_signal)
|
||||
{
|
||||
hsa_status_t hsa_amd_memory_async_copy_interceptor(void* dst, hsa_agent_t dst_agent,
|
||||
const void* src, hsa_agent_t src_agent,
|
||||
size_t size, uint32_t num_dep_signals,
|
||||
const hsa_signal_t* dep_signals,
|
||||
hsa_signal_t completion_signal) {
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
if (hsa_support::async_copy_callback_enabled) {
|
||||
trace_entry_t* entry = trace_buffer->GetEntry();
|
||||
::proxy::Tracker::Enable(COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry);
|
||||
status = hsa_amd_memory_async_copy_fn(dst, dst_agent, src,
|
||||
src_agent, size, num_dep_signals,
|
||||
status = hsa_amd_memory_async_copy_fn(dst, dst_agent, src, src_agent, size, num_dep_signals,
|
||||
dep_signals, entry->signal);
|
||||
if (status != HSA_STATUS_SUCCESS) ::proxy::Tracker::Disable(entry);
|
||||
}
|
||||
else
|
||||
{
|
||||
status = hsa_amd_memory_async_copy_fn(dst, dst_agent, src,
|
||||
src_agent, size, num_dep_signals,
|
||||
} else {
|
||||
status = hsa_amd_memory_async_copy_fn(dst, dst_agent, src, src_agent, size, num_dep_signals,
|
||||
dep_signals, completion_signal);
|
||||
}
|
||||
return status;
|
||||
@@ -616,24 +616,18 @@ hsa_status_t hsa_amd_memory_async_copy_rect_interceptor(
|
||||
const hsa_pitched_ptr_t* dst, const hsa_dim3_t* dst_offset, const hsa_pitched_ptr_t* src,
|
||||
const hsa_dim3_t* src_offset, const hsa_dim3_t* range, hsa_agent_t copy_agent,
|
||||
hsa_amd_copy_direction_t dir, uint32_t num_dep_signals, const hsa_signal_t* dep_signals,
|
||||
hsa_signal_t completion_signal)
|
||||
{
|
||||
hsa_signal_t completion_signal) {
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
if (hsa_support::async_copy_callback_enabled) {
|
||||
trace_entry_t* entry = trace_buffer->GetEntry();
|
||||
::proxy::Tracker::Enable(COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry);
|
||||
status = hsa_amd_memory_async_copy_rect_fn(dst, dst_offset, src,
|
||||
src_offset, range, copy_agent,
|
||||
dir, num_dep_signals, dep_signals,
|
||||
entry->signal);
|
||||
status = hsa_amd_memory_async_copy_rect_fn(dst, dst_offset, src, src_offset, range, copy_agent,
|
||||
dir, num_dep_signals, dep_signals, entry->signal);
|
||||
if (status != HSA_STATUS_SUCCESS) ::proxy::Tracker::Disable(entry);
|
||||
}
|
||||
else
|
||||
{
|
||||
status = hsa_amd_memory_async_copy_rect_fn(dst, dst_offset, src,
|
||||
src_offset, range, copy_agent,
|
||||
dir, num_dep_signals, dep_signals,
|
||||
completion_signal);
|
||||
} else {
|
||||
status =
|
||||
hsa_amd_memory_async_copy_rect_fn(dst, dst_offset, src, src_offset, range, copy_agent, dir,
|
||||
num_dep_signals, dep_signals, completion_signal);
|
||||
}
|
||||
return status;
|
||||
}
|
||||
@@ -685,11 +679,7 @@ PUBLIC_API const char* roctracer_error_string() {
|
||||
|
||||
// Return Op string by given domain and activity/API codes
|
||||
// NULL returned on the error and the library errno is set
|
||||
PUBLIC_API const char* roctracer_op_string(
|
||||
uint32_t domain,
|
||||
uint32_t op,
|
||||
uint32_t kind)
|
||||
{
|
||||
PUBLIC_API const char* roctracer_op_string(uint32_t domain, uint32_t op, uint32_t kind) {
|
||||
API_METHOD_PREFIX
|
||||
switch (domain) {
|
||||
case ACTIVITY_DOMAIN_HSA_API:
|
||||
@@ -711,18 +701,15 @@ PUBLIC_API const char* roctracer_op_string(
|
||||
}
|
||||
|
||||
// Return Op code and kind by given string
|
||||
PUBLIC_API roctracer_status_t roctracer_op_code(
|
||||
uint32_t domain,
|
||||
const char* str,
|
||||
uint32_t* op,
|
||||
uint32_t* kind)
|
||||
{
|
||||
PUBLIC_API roctracer_status_t roctracer_op_code(uint32_t domain, const char* str, uint32_t* op,
|
||||
uint32_t* kind) {
|
||||
API_METHOD_PREFIX
|
||||
switch (domain) {
|
||||
case ACTIVITY_DOMAIN_HSA_API: {
|
||||
*op = roctracer::hsa_support::GetApiCode(str);
|
||||
if (*op == HSA_API_ID_NUMBER) {
|
||||
EXC_RAISING(ROCTRACER_STATUS_BAD_PARAMETER, "Invalid API name \"" << str << "\", domain ID(" << domain << ")");
|
||||
EXC_RAISING(ROCTRACER_STATUS_BAD_PARAMETER,
|
||||
"Invalid API name \"" << str << "\", domain ID(" << domain << ")");
|
||||
}
|
||||
if (kind != NULL) *kind = 0;
|
||||
break;
|
||||
@@ -730,7 +717,8 @@ PUBLIC_API roctracer_status_t roctracer_op_code(
|
||||
case ACTIVITY_DOMAIN_HIP_API: {
|
||||
*op = hipApiIdByName(str);
|
||||
if (*op == HIP_API_ID_NONE) {
|
||||
EXC_RAISING(ROCTRACER_STATUS_BAD_PARAMETER, "Invalid API name \"" << str << "\", domain ID(" << domain << ")");
|
||||
EXC_RAISING(ROCTRACER_STATUS_BAD_PARAMETER,
|
||||
"Invalid API name \"" << str << "\", domain ID(" << domain << ")");
|
||||
}
|
||||
if (kind != NULL) *kind = 0;
|
||||
break;
|
||||
@@ -743,13 +731,20 @@ PUBLIC_API roctracer_status_t roctracer_op_code(
|
||||
|
||||
static inline uint32_t get_op_begin(uint32_t domain) {
|
||||
switch (domain) {
|
||||
case ACTIVITY_DOMAIN_HSA_OPS: return 0;
|
||||
case ACTIVITY_DOMAIN_HSA_API: return 0;
|
||||
case ACTIVITY_DOMAIN_HSA_EVT: return 0;
|
||||
case ACTIVITY_DOMAIN_HCC_OPS: return 0;
|
||||
case ACTIVITY_DOMAIN_HIP_API: return HIP_API_ID_FIRST;
|
||||
case ACTIVITY_DOMAIN_EXT_API: return 0;
|
||||
case ACTIVITY_DOMAIN_ROCTX: return 0;
|
||||
case ACTIVITY_DOMAIN_HSA_OPS:
|
||||
return 0;
|
||||
case ACTIVITY_DOMAIN_HSA_API:
|
||||
return 0;
|
||||
case ACTIVITY_DOMAIN_HSA_EVT:
|
||||
return 0;
|
||||
case ACTIVITY_DOMAIN_HCC_OPS:
|
||||
return 0;
|
||||
case ACTIVITY_DOMAIN_HIP_API:
|
||||
return HIP_API_ID_FIRST;
|
||||
case ACTIVITY_DOMAIN_EXT_API:
|
||||
return 0;
|
||||
case ACTIVITY_DOMAIN_ROCTX:
|
||||
return 0;
|
||||
default:
|
||||
EXC_RAISING(ROCTRACER_STATUS_BAD_DOMAIN, "invalid domain ID(" << domain << ")");
|
||||
}
|
||||
@@ -758,13 +753,21 @@ static inline uint32_t get_op_begin(uint32_t domain) {
|
||||
|
||||
static inline uint32_t get_op_end(uint32_t domain) {
|
||||
switch (domain) {
|
||||
case ACTIVITY_DOMAIN_HSA_OPS: return HSA_OP_ID_NUMBER;
|
||||
case ACTIVITY_DOMAIN_HSA_API: return HSA_API_ID_NUMBER;
|
||||
case ACTIVITY_DOMAIN_HSA_EVT: return HSA_EVT_ID_NUMBER;
|
||||
case ACTIVITY_DOMAIN_HCC_OPS: return HIP_OP_ID_NUMBER;
|
||||
case ACTIVITY_DOMAIN_HIP_API: return HIP_API_ID_LAST + 1;;
|
||||
case ACTIVITY_DOMAIN_EXT_API: return 0;
|
||||
case ACTIVITY_DOMAIN_ROCTX: return ROCTX_API_ID_NUMBER;
|
||||
case ACTIVITY_DOMAIN_HSA_OPS:
|
||||
return HSA_OP_ID_NUMBER;
|
||||
case ACTIVITY_DOMAIN_HSA_API:
|
||||
return HSA_API_ID_NUMBER;
|
||||
case ACTIVITY_DOMAIN_HSA_EVT:
|
||||
return HSA_EVT_ID_NUMBER;
|
||||
case ACTIVITY_DOMAIN_HCC_OPS:
|
||||
return HIP_OP_ID_NUMBER;
|
||||
case ACTIVITY_DOMAIN_HIP_API:
|
||||
return HIP_API_ID_LAST + 1;
|
||||
;
|
||||
case ACTIVITY_DOMAIN_EXT_API:
|
||||
return 0;
|
||||
case ACTIVITY_DOMAIN_ROCTX:
|
||||
return ROCTX_API_ID_NUMBER;
|
||||
default:
|
||||
EXC_RAISING(ROCTRACER_STATUS_BAD_DOMAIN, "invalid domain ID(" << domain << ")");
|
||||
}
|
||||
@@ -772,14 +775,12 @@ static inline uint32_t get_op_end(uint32_t domain) {
|
||||
}
|
||||
|
||||
// Enable runtime API callbacks
|
||||
static roctracer_status_t roctracer_enable_callback_fun(
|
||||
roctracer_domain_t domain,
|
||||
uint32_t op,
|
||||
roctracer_rtapi_callback_t callback,
|
||||
void* user_data)
|
||||
{
|
||||
static roctracer_status_t roctracer_enable_callback_fun(roctracer_domain_t domain, uint32_t op,
|
||||
roctracer_rtapi_callback_t callback,
|
||||
void* user_data) {
|
||||
switch (domain) {
|
||||
case ACTIVITY_DOMAIN_HSA_OPS: break;
|
||||
case ACTIVITY_DOMAIN_HSA_OPS:
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HSA_API: {
|
||||
#if 0
|
||||
if (op == HSA_API_ID_DISPATCH) {
|
||||
@@ -792,28 +793,42 @@ static roctracer_status_t roctracer_enable_callback_fun(
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HSA_EVT: {
|
||||
const bool succ = roctracer::RocpLoader::Instance().RegisterEvtCallback(op, (void*)callback, user_data);
|
||||
if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HSA_ERR, "HSA::RegisterEvtCallback error(" << op << ") failed");
|
||||
const bool succ =
|
||||
roctracer::RocpLoader::Instance().RegisterEvtCallback(op, (void*)callback, user_data);
|
||||
if (succ == false)
|
||||
HCC_EXC_RAISING(ROCTRACER_STATUS_HSA_ERR,
|
||||
"HSA::RegisterEvtCallback error(" << op << ") failed");
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HCC_OPS: break;
|
||||
case ACTIVITY_DOMAIN_HCC_OPS:
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HIP_API: {
|
||||
if (roctracer::HipLoader::Instance().Enabled() == false) break;
|
||||
std::lock_guard<roctracer::hip_activity_mutex_t> lock(roctracer::hip_activity_mutex);
|
||||
|
||||
hipError_t hip_err = roctracer::HipLoader::Instance().RegisterApiCallback(op, (void*)callback, user_data);
|
||||
if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "HIP::RegisterApiCallback(" << op << ") error(" << hip_err << ")");
|
||||
hipError_t hip_err =
|
||||
roctracer::HipLoader::Instance().RegisterApiCallback(op, (void*)callback, user_data);
|
||||
if (hip_err != hipSuccess)
|
||||
HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR,
|
||||
"HIP::RegisterApiCallback(" << op << ") error(" << hip_err << ")");
|
||||
|
||||
if (roctracer::HipApiActivityEnableCheck(op) == 0) {
|
||||
hip_err = roctracer::HipLoader::Instance().RegisterActivityCallback(op, (void*)roctracer::HIP_SyncApiDataCallback, (void*)1);
|
||||
if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "HIPAPI: HIP::RegisterActivityCallback(" << op << ") error(" << hip_err << ")");
|
||||
hip_err = roctracer::HipLoader::Instance().RegisterActivityCallback(
|
||||
op, (void*)roctracer::HIP_SyncApiDataCallback, (void*)1);
|
||||
if (hip_err != hipSuccess)
|
||||
HIP_EXC_RAISING(
|
||||
ROCTRACER_STATUS_HIP_API_ERR,
|
||||
"HIPAPI: HIP::RegisterActivityCallback(" << op << ") error(" << hip_err << ")");
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_ROCTX: {
|
||||
if (roctracer::RocTxLoader::Instance().Enabled()) {
|
||||
const bool suc = roctracer::RocTxLoader::Instance().RegisterApiCallback(op, (void*)callback, user_data);
|
||||
if (suc == false) EXC_RAISING(ROCTRACER_STATUS_ROCTX_ERR, "ROCTX::RegisterApiCallback(" << op << ") failed");
|
||||
const bool suc =
|
||||
roctracer::RocTxLoader::Instance().RegisterApiCallback(op, (void*)callback, user_data);
|
||||
if (suc == false)
|
||||
EXC_RAISING(ROCTRACER_STATUS_ROCTX_ERR,
|
||||
"ROCTX::RegisterApiCallback(" << op << ") failed");
|
||||
}
|
||||
break;
|
||||
}
|
||||
@@ -823,32 +838,23 @@ static roctracer_status_t roctracer_enable_callback_fun(
|
||||
return ROCTRACER_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
static void roctracer_enable_callback_impl(
|
||||
roctracer_domain_t domain,
|
||||
uint32_t op,
|
||||
roctracer_rtapi_callback_t callback,
|
||||
void* user_data)
|
||||
{
|
||||
static void roctracer_enable_callback_impl(roctracer_domain_t domain, uint32_t op,
|
||||
roctracer_rtapi_callback_t callback, void* user_data) {
|
||||
roctracer::cb_journal->Insert(domain, op, {callback, user_data});
|
||||
roctracer_enable_callback_fun(domain, op, callback, user_data);
|
||||
}
|
||||
|
||||
PUBLIC_API roctracer_status_t roctracer_enable_op_callback(
|
||||
roctracer_domain_t domain,
|
||||
uint32_t op,
|
||||
roctracer_rtapi_callback_t callback,
|
||||
void* user_data)
|
||||
{
|
||||
PUBLIC_API roctracer_status_t roctracer_enable_op_callback(roctracer_domain_t domain, uint32_t op,
|
||||
roctracer_rtapi_callback_t callback,
|
||||
void* user_data) {
|
||||
API_METHOD_PREFIX
|
||||
roctracer_enable_callback_impl(domain, op, callback, user_data);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
PUBLIC_API roctracer_status_t roctracer_enable_domain_callback(
|
||||
roctracer_domain_t domain,
|
||||
roctracer_rtapi_callback_t callback,
|
||||
void* user_data)
|
||||
{
|
||||
PUBLIC_API roctracer_status_t roctracer_enable_domain_callback(roctracer_domain_t domain,
|
||||
roctracer_rtapi_callback_t callback,
|
||||
void* user_data) {
|
||||
API_METHOD_PREFIX
|
||||
const uint32_t op_end = get_op_end(domain);
|
||||
for (uint32_t op = get_op_begin(domain); op < op_end; ++op)
|
||||
@@ -856,10 +862,8 @@ PUBLIC_API roctracer_status_t roctracer_enable_domain_callback(
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
PUBLIC_API roctracer_status_t roctracer_enable_callback(
|
||||
roctracer_rtapi_callback_t callback,
|
||||
void* user_data)
|
||||
{
|
||||
PUBLIC_API roctracer_status_t roctracer_enable_callback(roctracer_rtapi_callback_t callback,
|
||||
void* user_data) {
|
||||
API_METHOD_PREFIX
|
||||
for (uint32_t domain = 0; domain < ACTIVITY_DOMAIN_NUMBER; ++domain) {
|
||||
const uint32_t op_end = get_op_end(domain);
|
||||
@@ -870,12 +874,10 @@ PUBLIC_API roctracer_status_t roctracer_enable_callback(
|
||||
}
|
||||
|
||||
// Disable runtime API callbacks
|
||||
static roctracer_status_t roctracer_disable_callback_fun(
|
||||
roctracer_domain_t domain,
|
||||
uint32_t op)
|
||||
{
|
||||
static roctracer_status_t roctracer_disable_callback_fun(roctracer_domain_t domain, uint32_t op) {
|
||||
switch (domain) {
|
||||
case ACTIVITY_DOMAIN_HSA_OPS: break;
|
||||
case ACTIVITY_DOMAIN_HSA_OPS:
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HSA_API: {
|
||||
#if 0
|
||||
if (op == HSA_API_ID_DISPATCH) {
|
||||
@@ -887,29 +889,38 @@ static roctracer_status_t roctracer_disable_callback_fun(
|
||||
roctracer::hsa_support::cb_table.set(op, NULL, NULL);
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HCC_OPS: break;
|
||||
case ACTIVITY_DOMAIN_HCC_OPS:
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HIP_API: {
|
||||
if (roctracer::HipLoader::Instance().Enabled() == false) break;
|
||||
std::lock_guard<roctracer::hip_activity_mutex_t> lock(roctracer::hip_activity_mutex);
|
||||
|
||||
const hipError_t hip_err = roctracer::HipLoader::Instance().RemoveApiCallback(op);
|
||||
if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "HIP::RemoveApiCallback(" << op << "), error(" << hip_err << ")");
|
||||
if (hip_err != hipSuccess)
|
||||
HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR,
|
||||
"HIP::RemoveApiCallback(" << op << "), error(" << hip_err << ")");
|
||||
|
||||
if (roctracer::HipApiActivityDisableCheck(op) == 0) {
|
||||
const hipError_t hip_err = roctracer::HipLoader::Instance().RemoveActivityCallback(op);
|
||||
if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "HIPAPI: HIP::RemoveActivityCallback op(" << op << "), error(" << hip_err << ")");
|
||||
if (hip_err != hipSuccess)
|
||||
HIP_EXC_RAISING(
|
||||
ROCTRACER_STATUS_HIP_API_ERR,
|
||||
"HIPAPI: HIP::RemoveActivityCallback op(" << op << "), error(" << hip_err << ")");
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HSA_EVT: {
|
||||
const bool succ = roctracer::RocpLoader::Instance().RemoveEvtCallback(op);
|
||||
if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HSA_ERR, "HSA::RemoveEvtCallback error(" << op << ") failed");
|
||||
if (succ == false)
|
||||
HCC_EXC_RAISING(ROCTRACER_STATUS_HSA_ERR,
|
||||
"HSA::RemoveEvtCallback error(" << op << ") failed");
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_ROCTX: {
|
||||
if (roctracer::RocTxLoader::Instance().Enabled()) {
|
||||
const bool suc = roctracer::RocTxLoader::Instance().RemoveApiCallback(op);
|
||||
if (suc == false) EXC_RAISING(ROCTRACER_STATUS_ROCTX_ERR, "ROCTX::RemoveApiCallback(" << op << ") failed");
|
||||
if (suc == false)
|
||||
EXC_RAISING(ROCTRACER_STATUS_ROCTX_ERR, "ROCTX::RemoveApiCallback(" << op << ") failed");
|
||||
}
|
||||
break;
|
||||
}
|
||||
@@ -919,26 +930,19 @@ static roctracer_status_t roctracer_disable_callback_fun(
|
||||
return ROCTRACER_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
static void roctracer_disable_callback_impl(
|
||||
roctracer_domain_t domain,
|
||||
uint32_t op)
|
||||
{
|
||||
roctracer::cb_journal->Remove(domain, op);
|
||||
roctracer_disable_callback_fun(domain, op);
|
||||
static void roctracer_disable_callback_impl(roctracer_domain_t domain, uint32_t op) {
|
||||
roctracer::cb_journal->Remove(domain, op);
|
||||
roctracer_disable_callback_fun(domain, op);
|
||||
}
|
||||
|
||||
PUBLIC_API roctracer_status_t roctracer_disable_op_callback(
|
||||
roctracer_domain_t domain,
|
||||
uint32_t op)
|
||||
{
|
||||
PUBLIC_API roctracer_status_t roctracer_disable_op_callback(roctracer_domain_t domain,
|
||||
uint32_t op) {
|
||||
API_METHOD_PREFIX
|
||||
roctracer_disable_callback_impl(domain, op);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
PUBLIC_API roctracer_status_t roctracer_disable_domain_callback(
|
||||
roctracer_domain_t domain)
|
||||
{
|
||||
PUBLIC_API roctracer_status_t roctracer_disable_domain_callback(roctracer_domain_t domain) {
|
||||
API_METHOD_PREFIX
|
||||
const uint32_t op_end = get_op_end(domain);
|
||||
for (uint32_t op = get_op_begin(domain); op < op_end; ++op)
|
||||
@@ -946,8 +950,7 @@ PUBLIC_API roctracer_status_t roctracer_disable_domain_callback(
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
PUBLIC_API roctracer_status_t roctracer_disable_callback()
|
||||
{
|
||||
PUBLIC_API roctracer_status_t roctracer_disable_callback() {
|
||||
API_METHOD_PREFIX
|
||||
for (uint32_t domain = 0; domain < ACTIVITY_DOMAIN_NUMBER; ++domain) {
|
||||
const uint32_t op_end = get_op_end(domain);
|
||||
@@ -966,10 +969,8 @@ PUBLIC_API roctracer_pool_t* roctracer_default_pool_expl(roctracer_pool_t* pool)
|
||||
}
|
||||
|
||||
// Open memory pool
|
||||
PUBLIC_API roctracer_status_t roctracer_open_pool_expl(
|
||||
const roctracer_properties_t* properties,
|
||||
roctracer_pool_t** pool)
|
||||
{
|
||||
PUBLIC_API roctracer_status_t roctracer_open_pool_expl(const roctracer_properties_t* properties,
|
||||
roctracer_pool_t** pool) {
|
||||
API_METHOD_PREFIX
|
||||
std::lock_guard<roctracer::memory_pool_mutex_t> lock(roctracer::memory_pool_mutex);
|
||||
if ((pool == NULL) && (roctracer::memory_pool != NULL)) {
|
||||
@@ -977,8 +978,10 @@ PUBLIC_API roctracer_status_t roctracer_open_pool_expl(
|
||||
}
|
||||
roctracer::MemoryPool* p = new roctracer::MemoryPool(*properties);
|
||||
if (p == NULL) EXC_RAISING(ROCTRACER_STATUS_ERROR, "MemoryPool() error");
|
||||
if (pool != NULL) *pool = p;
|
||||
else roctracer::memory_pool = p;
|
||||
if (pool != NULL)
|
||||
*pool = p;
|
||||
else
|
||||
roctracer::memory_pool = p;
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
@@ -988,17 +991,14 @@ PUBLIC_API roctracer_status_t roctracer_close_pool_expl(roctracer_pool_t* pool)
|
||||
std::lock_guard<roctracer::memory_pool_mutex_t> lock(roctracer::memory_pool_mutex);
|
||||
roctracer_pool_t* ptr = (pool == NULL) ? roctracer_default_pool() : pool;
|
||||
roctracer::MemoryPool* memory_pool = reinterpret_cast<roctracer::MemoryPool*>(ptr);
|
||||
delete(memory_pool);
|
||||
delete (memory_pool);
|
||||
if (pool == NULL) roctracer::memory_pool = NULL;
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
// Enable activity records logging
|
||||
static roctracer_status_t roctracer_enable_activity_fun(
|
||||
roctracer_domain_t domain,
|
||||
uint32_t op,
|
||||
roctracer_pool_t* pool)
|
||||
{
|
||||
static roctracer_status_t roctracer_enable_activity_fun(roctracer_domain_t domain, uint32_t op,
|
||||
roctracer_pool_t* pool) {
|
||||
if (pool == NULL) pool = roctracer_default_pool();
|
||||
switch (domain) {
|
||||
case ACTIVITY_DOMAIN_HSA_OPS: {
|
||||
@@ -1008,27 +1008,31 @@ static roctracer_status_t roctracer_enable_activity_fun(
|
||||
const bool init_phase = (roctracer::RocpLoader::GetRef() == NULL);
|
||||
if (roctracer::RocpLoader::GetRef() == NULL) break;
|
||||
if (init_phase == true) {
|
||||
roctracer::RocpLoader::Instance().InitActivityCallback((void*)roctracer::HSA_AsyncActivityCallback,
|
||||
(void*)pool);
|
||||
roctracer::RocpLoader::Instance().InitActivityCallback(
|
||||
(void*)roctracer::HSA_AsyncActivityCallback, (void*)pool);
|
||||
}
|
||||
const bool succ = roctracer::RocpLoader::Instance().EnableActivityCallback(op, true);
|
||||
if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HSA_ERR, "HSA::EnableActivityCallback error");
|
||||
if (succ == false)
|
||||
HCC_EXC_RAISING(ROCTRACER_STATUS_HSA_ERR, "HSA::EnableActivityCallback error");
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HSA_API: break;
|
||||
case ACTIVITY_DOMAIN_HSA_EVT: break;
|
||||
case ACTIVITY_DOMAIN_HSA_API:
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HSA_EVT:
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HCC_OPS: {
|
||||
const bool init_phase = (roctracer::HccLoader::GetRef() == NULL);
|
||||
if (roctracer::HccLoader::Instance().Enabled() == false) break;
|
||||
|
||||
if (init_phase == true) {
|
||||
roctracer::HccLoader::Instance().InitActivityCallback((void*)roctracer::HCC_ActivityIdCallback,
|
||||
(void*)roctracer::HCC_AsyncActivityCallback,
|
||||
(void*)pool);
|
||||
roctracer::HccLoader::Instance().InitActivityCallback(
|
||||
(void*)roctracer::HCC_ActivityIdCallback, (void*)roctracer::HCC_AsyncActivityCallback,
|
||||
(void*)pool);
|
||||
}
|
||||
const bool succ = roctracer::HccLoader::Instance().EnableActivityCallback(op, true);
|
||||
if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HCC_OPS_ERR, "HCC::EnableActivityCallback error");
|
||||
if (succ == false)
|
||||
HCC_EXC_RAISING(ROCTRACER_STATUS_HCC_OPS_ERR, "HCC::EnableActivityCallback error");
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HIP_API: {
|
||||
@@ -1036,41 +1040,38 @@ static roctracer_status_t roctracer_enable_activity_fun(
|
||||
std::lock_guard<roctracer::hip_activity_mutex_t> lock(roctracer::hip_activity_mutex);
|
||||
|
||||
if (roctracer::HipActActivityEnableCheck(op) == 0) {
|
||||
const hipError_t hip_err = roctracer::HipLoader::Instance().RegisterActivityCallback(op, (void*)roctracer::HIP_SyncActivityCallback, (void*)pool);
|
||||
if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "HIP::RegisterActivityCallback(" << op << " error(" << hip_err << ")");
|
||||
const hipError_t hip_err = roctracer::HipLoader::Instance().RegisterActivityCallback(
|
||||
op, (void*)roctracer::HIP_SyncActivityCallback, (void*)pool);
|
||||
if (hip_err != hipSuccess)
|
||||
HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR,
|
||||
"HIP::RegisterActivityCallback(" << op << " error(" << hip_err << ")");
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_ROCTX: break;
|
||||
case ACTIVITY_DOMAIN_ROCTX:
|
||||
break;
|
||||
default:
|
||||
EXC_RAISING(ROCTRACER_STATUS_BAD_DOMAIN, "invalid domain ID(" << domain << ")");
|
||||
}
|
||||
return ROCTRACER_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
static void roctracer_enable_activity_impl(
|
||||
roctracer_domain_t domain,
|
||||
uint32_t op,
|
||||
roctracer_pool_t* pool)
|
||||
{
|
||||
roctracer::act_journal->Insert(domain, op, {pool});
|
||||
roctracer_enable_activity_fun(domain, op, pool);
|
||||
static void roctracer_enable_activity_impl(roctracer_domain_t domain, uint32_t op,
|
||||
roctracer_pool_t* pool) {
|
||||
roctracer::act_journal->Insert(domain, op, {pool});
|
||||
roctracer_enable_activity_fun(domain, op, pool);
|
||||
}
|
||||
|
||||
PUBLIC_API roctracer_status_t roctracer_enable_op_activity_expl(
|
||||
roctracer_domain_t domain,
|
||||
uint32_t op,
|
||||
roctracer_pool_t* pool)
|
||||
{
|
||||
PUBLIC_API roctracer_status_t roctracer_enable_op_activity_expl(roctracer_domain_t domain,
|
||||
uint32_t op,
|
||||
roctracer_pool_t* pool) {
|
||||
API_METHOD_PREFIX
|
||||
roctracer_enable_activity_impl(domain, op, pool);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
PUBLIC_API roctracer_status_t roctracer_enable_domain_activity_expl(
|
||||
roctracer_domain_t domain,
|
||||
roctracer_pool_t* pool)
|
||||
{
|
||||
PUBLIC_API roctracer_status_t roctracer_enable_domain_activity_expl(roctracer_domain_t domain,
|
||||
roctracer_pool_t* pool) {
|
||||
API_METHOD_PREFIX
|
||||
const uint32_t op_end = get_op_end(domain);
|
||||
for (uint32_t op = get_op_begin(domain); op < op_end; ++op)
|
||||
@@ -1078,9 +1079,7 @@ PUBLIC_API roctracer_status_t roctracer_enable_domain_activity_expl(
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
PUBLIC_API roctracer_status_t roctracer_enable_activity_expl(
|
||||
roctracer_pool_t* pool)
|
||||
{
|
||||
PUBLIC_API roctracer_status_t roctracer_enable_activity_expl(roctracer_pool_t* pool) {
|
||||
API_METHOD_PREFIX
|
||||
for (uint32_t domain = 0; domain < ACTIVITY_DOMAIN_NUMBER; ++domain) {
|
||||
const uint32_t op_end = get_op_end(domain);
|
||||
@@ -1091,10 +1090,7 @@ PUBLIC_API roctracer_status_t roctracer_enable_activity_expl(
|
||||
}
|
||||
|
||||
// Disable activity records logging
|
||||
static roctracer_status_t roctracer_disable_activity_fun(
|
||||
roctracer_domain_t domain,
|
||||
uint32_t op)
|
||||
{
|
||||
static roctracer_status_t roctracer_disable_activity_fun(roctracer_domain_t domain, uint32_t op) {
|
||||
switch (domain) {
|
||||
case ACTIVITY_DOMAIN_HSA_OPS: {
|
||||
if (op == HSA_OP_ID_COPY) {
|
||||
@@ -1102,17 +1098,23 @@ static roctracer_status_t roctracer_disable_activity_fun(
|
||||
} else {
|
||||
if (roctracer::RocpLoader::GetRef() == NULL) break;
|
||||
const bool succ = roctracer::RocpLoader::Instance().EnableActivityCallback(op, false);
|
||||
if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HSA_ERR, "HSA::EnableActivityCallback(false) error, op(" << op << ")");
|
||||
if (succ == false)
|
||||
HCC_EXC_RAISING(ROCTRACER_STATUS_HSA_ERR,
|
||||
"HSA::EnableActivityCallback(false) error, op(" << op << ")");
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HSA_API: break;
|
||||
case ACTIVITY_DOMAIN_HSA_EVT: break;
|
||||
case ACTIVITY_DOMAIN_HSA_API:
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HSA_EVT:
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HCC_OPS: {
|
||||
if (roctracer::HccLoader::Instance().Enabled() == false) break;
|
||||
|
||||
const bool succ = roctracer::HccLoader::Instance().EnableActivityCallback(op, false);
|
||||
if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HCC_OPS_ERR, "HCC::EnableActivityCallback(NULL) error, op(" << op << ")");
|
||||
if (succ == false)
|
||||
HCC_EXC_RAISING(ROCTRACER_STATUS_HCC_OPS_ERR,
|
||||
"HCC::EnableActivityCallback(NULL) error, op(" << op << ")");
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HIP_API: {
|
||||
@@ -1121,40 +1123,40 @@ static roctracer_status_t roctracer_disable_activity_fun(
|
||||
|
||||
if (roctracer::HipActActivityDisableCheck(op) == 0) {
|
||||
const hipError_t hip_err = roctracer::HipLoader::Instance().RemoveActivityCallback(op);
|
||||
if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "HIP::RemoveActivityCallback op(" << op << "), error(" << hip_err << ")");
|
||||
if (hip_err != hipSuccess)
|
||||
HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR,
|
||||
"HIP::RemoveActivityCallback op(" << op << "), error(" << hip_err << ")");
|
||||
} else {
|
||||
const hipError_t hip_err = roctracer::HipLoader::Instance().RegisterActivityCallback(op, (void*)roctracer::HIP_SyncApiDataCallback, (void*)1);
|
||||
if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "HIPACT: HIP::RegisterActivityCallback(" << op << ") error(" << hip_err << ")");
|
||||
const hipError_t hip_err = roctracer::HipLoader::Instance().RegisterActivityCallback(
|
||||
op, (void*)roctracer::HIP_SyncApiDataCallback, (void*)1);
|
||||
if (hip_err != hipSuccess)
|
||||
HIP_EXC_RAISING(
|
||||
ROCTRACER_STATUS_HIP_API_ERR,
|
||||
"HIPACT: HIP::RegisterActivityCallback(" << op << ") error(" << hip_err << ")");
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_ROCTX: break;
|
||||
case ACTIVITY_DOMAIN_ROCTX:
|
||||
break;
|
||||
default:
|
||||
EXC_RAISING(ROCTRACER_STATUS_BAD_DOMAIN, "invalid domain ID(" << domain << ")");
|
||||
}
|
||||
return ROCTRACER_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
static void roctracer_disable_activity_impl(
|
||||
roctracer_domain_t domain,
|
||||
uint32_t op)
|
||||
{
|
||||
static void roctracer_disable_activity_impl(roctracer_domain_t domain, uint32_t op) {
|
||||
roctracer::act_journal->Remove(domain, op);
|
||||
roctracer_disable_activity_fun(domain, op);
|
||||
}
|
||||
|
||||
PUBLIC_API roctracer_status_t roctracer_disable_op_activity(
|
||||
roctracer_domain_t domain,
|
||||
uint32_t op)
|
||||
{
|
||||
PUBLIC_API roctracer_status_t roctracer_disable_op_activity(roctracer_domain_t domain,
|
||||
uint32_t op) {
|
||||
API_METHOD_PREFIX
|
||||
roctracer_disable_activity_impl(domain, op);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
PUBLIC_API roctracer_status_t roctracer_disable_domain_activity(
|
||||
roctracer_domain_t domain)
|
||||
{
|
||||
PUBLIC_API roctracer_status_t roctracer_disable_domain_activity(roctracer_domain_t domain) {
|
||||
API_METHOD_PREFIX
|
||||
const uint32_t op_end = get_op_end(domain);
|
||||
for (uint32_t op = get_op_begin(domain); op < op_end; ++op)
|
||||
@@ -1162,8 +1164,7 @@ PUBLIC_API roctracer_status_t roctracer_disable_domain_activity(
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
PUBLIC_API roctracer_status_t roctracer_disable_activity()
|
||||
{
|
||||
PUBLIC_API roctracer_status_t roctracer_disable_activity() {
|
||||
API_METHOD_PREFIX
|
||||
for (uint32_t domain = 0; domain < ACTIVITY_DOMAIN_NUMBER; ++domain) {
|
||||
const uint32_t op_end = get_op_end(domain);
|
||||
@@ -1185,7 +1186,8 @@ PUBLIC_API roctracer_status_t roctracer_flush_activity_expl(roctracer_pool_t* po
|
||||
|
||||
// Notifies that the calling thread is entering an external API region.
|
||||
// Push an external correlation id for the calling thread.
|
||||
PUBLIC_API roctracer_status_t roctracer_activity_push_external_correlation_id(activity_correlation_id_t id) {
|
||||
PUBLIC_API roctracer_status_t
|
||||
roctracer_activity_push_external_correlation_id(activity_correlation_id_t id) {
|
||||
API_METHOD_PREFIX
|
||||
roctracer::external_id_stack.push(id);
|
||||
API_METHOD_SUFFIX
|
||||
@@ -1194,7 +1196,8 @@ PUBLIC_API roctracer_status_t roctracer_activity_push_external_correlation_id(ac
|
||||
// Notifies that the calling thread is leaving an external API region.
|
||||
// Pop an external correlation id for the calling thread.
|
||||
// 'lastId' returns the last external correlation
|
||||
PUBLIC_API roctracer_status_t roctracer_activity_pop_external_correlation_id(activity_correlation_id_t* last_id) {
|
||||
PUBLIC_API roctracer_status_t
|
||||
roctracer_activity_pop_external_correlation_id(activity_correlation_id_t* last_id) {
|
||||
API_METHOD_PREFIX
|
||||
if (last_id != NULL) *last_id = 0;
|
||||
|
||||
@@ -1214,7 +1217,8 @@ PUBLIC_API roctracer_status_t roctracer_activity_pop_external_correlation_id(act
|
||||
PUBLIC_API void roctracer_mark(const char* str) {
|
||||
if (mark_api_callback_ptr) {
|
||||
mark_api_callback_ptr(ACTIVITY_DOMAIN_EXT_API, ACTIVITY_EXT_OP_MARK, str, NULL);
|
||||
roctracer::GlobalCounter::Increment(); // account for user-defined markers when tracking correlation id
|
||||
roctracer::GlobalCounter::Increment(); // account for user-defined markers when tracking
|
||||
// correlation id
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1245,15 +1249,14 @@ PUBLIC_API roctracer_status_t roctracer_get_timestamp(uint64_t* timestamp) {
|
||||
}
|
||||
|
||||
// Set properties
|
||||
PUBLIC_API roctracer_status_t roctracer_set_properties(
|
||||
roctracer_domain_t domain,
|
||||
void* properties)
|
||||
{
|
||||
PUBLIC_API roctracer_status_t roctracer_set_properties(roctracer_domain_t domain,
|
||||
void* properties) {
|
||||
API_METHOD_PREFIX
|
||||
switch (domain) {
|
||||
case ACTIVITY_DOMAIN_HSA_OPS: {
|
||||
// HSA OPS properties
|
||||
roctracer::hsa_ops_properties_t* ops_properties = reinterpret_cast<roctracer::hsa_ops_properties_t*>(properties);
|
||||
roctracer::hsa_ops_properties_t* ops_properties =
|
||||
reinterpret_cast<roctracer::hsa_ops_properties_t*>(properties);
|
||||
HsaApiTable* table = reinterpret_cast<HsaApiTable*>(ops_properties->table);
|
||||
roctracer::hsa_support::async_copy_callback_fun = ops_properties->async_copy_callback_fun;
|
||||
roctracer::hsa_support::async_copy_callback_arg = ops_properties->async_copy_callback_arg;
|
||||
@@ -1271,9 +1274,12 @@ PUBLIC_API roctracer_status_t roctracer_set_properties(
|
||||
hsa_status_t status = hsa_amd_profiling_async_copy_enable(true);
|
||||
if (status != HSA_STATUS_SUCCESS) EXC_ABORT(status, "hsa_amd_profiling_async_copy_enable");
|
||||
roctracer::hsa_amd_memory_async_copy_fn = table->amd_ext_->hsa_amd_memory_async_copy_fn;
|
||||
roctracer::hsa_amd_memory_async_copy_rect_fn = table->amd_ext_->hsa_amd_memory_async_copy_rect_fn;
|
||||
table->amd_ext_->hsa_amd_memory_async_copy_fn = roctracer::hsa_amd_memory_async_copy_interceptor;
|
||||
table->amd_ext_->hsa_amd_memory_async_copy_rect_fn = roctracer::hsa_amd_memory_async_copy_rect_interceptor;
|
||||
roctracer::hsa_amd_memory_async_copy_rect_fn =
|
||||
table->amd_ext_->hsa_amd_memory_async_copy_rect_fn;
|
||||
table->amd_ext_->hsa_amd_memory_async_copy_fn =
|
||||
roctracer::hsa_amd_memory_async_copy_interceptor;
|
||||
table->amd_ext_->hsa_amd_memory_async_copy_rect_fn =
|
||||
roctracer::hsa_amd_memory_async_copy_rect_interceptor;
|
||||
|
||||
break;
|
||||
}
|
||||
@@ -1291,11 +1297,13 @@ PUBLIC_API roctracer_status_t roctracer_set_properties(
|
||||
case ACTIVITY_DOMAIN_HCC_OPS:
|
||||
case ACTIVITY_DOMAIN_HIP_API: {
|
||||
mark_api_callback_ptr = reinterpret_cast<mark_api_callback_t*>(properties);
|
||||
if (roctracer::hip_act_cb_tracker == NULL) roctracer::hip_act_cb_tracker = new roctracer::hip_act_cb_tracker_t;
|
||||
if (roctracer::hip_act_cb_tracker == NULL)
|
||||
roctracer::hip_act_cb_tracker = new roctracer::hip_act_cb_tracker_t;
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_EXT_API: {
|
||||
roctracer_ext_properties_t* ops_properties = reinterpret_cast<roctracer_ext_properties_t*>(properties);
|
||||
roctracer_ext_properties_t* ops_properties =
|
||||
reinterpret_cast<roctracer_ext_properties_t*>(properties);
|
||||
roctracer::ext_support::roctracer_start_cb = ops_properties->start_cb;
|
||||
roctracer::ext_support::roctracer_stop_cb = ops_properties->stop_cb;
|
||||
break;
|
||||
@@ -1349,7 +1357,8 @@ PUBLIC_API void roctracer_flush_buf() {
|
||||
CONSTRUCTOR_API void constructor() {
|
||||
ONLOAD_TRACE_BEG();
|
||||
roctracer::util::Logger::Create();
|
||||
roctracer::trace_buffer = new roctracer::TraceBuffer<roctracer::trace_entry_t>("HSA GPU", 0x200000, roctracer::trace_buffer_prm, 2);
|
||||
roctracer::trace_buffer = new roctracer::TraceBuffer<roctracer::trace_entry_t>(
|
||||
"HSA GPU", 0x200000, roctracer::trace_buffer_prm, 2);
|
||||
roctracer_load();
|
||||
ONLOAD_TRACE_END();
|
||||
}
|
||||
|
||||
+36
-36
@@ -50,11 +50,7 @@
|
||||
} while (0)
|
||||
|
||||
namespace roctracer {
|
||||
enum {
|
||||
TRACE_ENTRY_INV = 0,
|
||||
TRACE_ENTRY_INIT = 1,
|
||||
TRACE_ENTRY_COMPL = 2
|
||||
};
|
||||
enum { TRACE_ENTRY_INV = 0, TRACE_ENTRY_INIT = 1, TRACE_ENTRY_COMPL = 2 };
|
||||
|
||||
enum entry_type_t {
|
||||
DFLT_ENTRY_TYPE = 0,
|
||||
@@ -68,8 +64,8 @@ struct trace_entry_t {
|
||||
std::atomic<uint32_t> valid;
|
||||
entry_type_t type;
|
||||
uint64_t dispatch;
|
||||
uint64_t begin; // kernel begin timestamp, ns
|
||||
uint64_t end; // kernel end timestamp, ns
|
||||
uint64_t begin; // kernel begin timestamp, ns
|
||||
uint64_t end; // kernel end timestamp, ns
|
||||
uint64_t complete;
|
||||
hsa_agent_t agent;
|
||||
uint32_t dev_index;
|
||||
@@ -86,8 +82,7 @@ struct trace_entry_t {
|
||||
};
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct push_element_fun {
|
||||
template <class T> struct push_element_fun {
|
||||
T* const elem_;
|
||||
T** prev_;
|
||||
bool fun(T* node) {
|
||||
@@ -105,10 +100,12 @@ struct push_element_fun {
|
||||
push_element_fun(T* elem, T** prev) : elem_(elem), prev_(prev) {}
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct call_element_fun {
|
||||
template <class T> struct call_element_fun {
|
||||
void (T::*fptr_)();
|
||||
bool fun(T* node) const { (node->*fptr_)(); return false; }
|
||||
bool fun(T* node) const {
|
||||
(node->*fptr_)();
|
||||
return false;
|
||||
}
|
||||
call_element_fun(void (T::*f)()) : fptr_(f) {}
|
||||
};
|
||||
|
||||
@@ -118,18 +115,26 @@ struct TraceBufferBase {
|
||||
virtual void StartWorkerThread() = 0;
|
||||
virtual void Flush() = 0;
|
||||
|
||||
static void StartWorkerThreadAll() { foreach(call_element_fun<TraceBufferBase>(&TraceBufferBase::StartWorkerThread)); }
|
||||
static void FlushAll() { foreach(call_element_fun<TraceBufferBase>(&TraceBufferBase::Flush)); }
|
||||
static void StartWorkerThreadAll() {
|
||||
foreach (call_element_fun<TraceBufferBase>(&TraceBufferBase::StartWorkerThread))
|
||||
;
|
||||
}
|
||||
static void FlushAll() {
|
||||
foreach (call_element_fun<TraceBufferBase>(&TraceBufferBase::Flush))
|
||||
;
|
||||
}
|
||||
|
||||
static void Push(TraceBufferBase* elem) {
|
||||
if (head_elem_ == NULL) head_elem_ = elem;
|
||||
else foreach(push_element_fun<TraceBufferBase>(elem, &head_elem_));
|
||||
if (head_elem_ == NULL)
|
||||
head_elem_ = elem;
|
||||
else
|
||||
foreach (push_element_fun<TraceBufferBase>(elem, &head_elem_))
|
||||
;
|
||||
}
|
||||
|
||||
TraceBufferBase(const uint32_t& prior) : priority_(prior), next_elem_(NULL) {}
|
||||
|
||||
template<class F>
|
||||
static void foreach(const F& f_in) {
|
||||
template <class F> static void foreach (const F& f_in) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
F f = f_in;
|
||||
TraceBufferBase* p = head_elem_;
|
||||
@@ -146,9 +151,8 @@ struct TraceBufferBase {
|
||||
static mutex_t mutex_;
|
||||
};
|
||||
|
||||
template <typename Entry>
|
||||
class TraceBuffer : protected TraceBufferBase {
|
||||
public:
|
||||
template <typename Entry> class TraceBuffer : protected TraceBufferBase {
|
||||
public:
|
||||
typedef void (*callback_t)(Entry*);
|
||||
typedef TraceBuffer<Entry> Obj;
|
||||
typedef uint64_t pointer_t;
|
||||
@@ -161,11 +165,9 @@ class TraceBuffer : protected TraceBufferBase {
|
||||
callback_t fun;
|
||||
};
|
||||
|
||||
TraceBuffer(const char* name, uint32_t size, const flush_prm_t* flush_prm_arr, uint32_t flush_prm_count, uint32_t prior = 0) :
|
||||
TraceBufferBase(prior),
|
||||
size_(size),
|
||||
work_thread_started_(false)
|
||||
{
|
||||
TraceBuffer(const char* name, uint32_t size, const flush_prm_t* flush_prm_arr,
|
||||
uint32_t flush_prm_count, uint32_t prior = 0)
|
||||
: TraceBufferBase(prior), size_(size), work_thread_started_(false) {
|
||||
name_ = strdup(name);
|
||||
data_ = allocate_fun();
|
||||
next_ = allocate_fun();
|
||||
@@ -204,7 +206,7 @@ class TraceBuffer : protected TraceBufferBase {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
if (work_thread_started_ == true) {
|
||||
PTHREAD_CALL(pthread_cancel(work_thread_));
|
||||
void *res;
|
||||
void* res;
|
||||
PTHREAD_CALL(pthread_join(work_thread_, &res));
|
||||
if (res != PTHREAD_CANCELED) FATAL("consumer thread wasn't stopped correctly");
|
||||
work_thread_started_ = false;
|
||||
@@ -223,7 +225,7 @@ class TraceBuffer : protected TraceBufferBase {
|
||||
|
||||
void Flush() { flush_buf(); }
|
||||
|
||||
private:
|
||||
private:
|
||||
void flush_buf() {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
|
||||
@@ -231,7 +233,7 @@ class TraceBuffer : protected TraceBufferBase {
|
||||
pointer_t curr_pointer = write_pointer_.load(std::memory_order_relaxed);
|
||||
buf_list_it_t it = buf_list_.begin();
|
||||
buf_list_it_t end_it = buf_list_.end();
|
||||
while(it != end_it) {
|
||||
while (it != end_it) {
|
||||
Entry* buf = *it;
|
||||
Entry* ptr = buf + (pointer % size_);
|
||||
Entry* end_ptr = buf + size_;
|
||||
@@ -261,15 +263,13 @@ class TraceBuffer : protected TraceBufferBase {
|
||||
}
|
||||
|
||||
inline Entry* allocate_fun() {
|
||||
Entry* ptr = (Entry*) malloc(size_ * sizeof(Entry));
|
||||
Entry* ptr = (Entry*)malloc(size_ * sizeof(Entry));
|
||||
if (ptr == NULL) FATAL("malloc failed");
|
||||
//memset(ptr, 0, size_ * sizeof(Entry));
|
||||
// memset(ptr, 0, size_ * sizeof(Entry));
|
||||
return ptr;
|
||||
}
|
||||
|
||||
inline void free_fun(void* ptr) {
|
||||
free(ptr);
|
||||
}
|
||||
inline void free_fun(void* ptr) { free(ptr); }
|
||||
|
||||
static void* allocate_worker(void* arg) {
|
||||
Obj* obj = (Obj*)arg;
|
||||
@@ -321,8 +321,8 @@ class TraceBuffer : protected TraceBufferBase {
|
||||
};
|
||||
} // namespace roctracer
|
||||
|
||||
#define TRACE_BUFFER_INSTANTIATE() \
|
||||
roctracer::TraceBufferBase* roctracer::TraceBufferBase::head_elem_ = NULL; \
|
||||
#define TRACE_BUFFER_INSTANTIATE() \
|
||||
roctracer::TraceBufferBase* roctracer::TraceBufferBase::head_elem_ = NULL; \
|
||||
roctracer::TraceBufferBase::mutex_t roctracer::TraceBufferBase::mutex_;
|
||||
|
||||
#endif // SRC_CORE_TRACE_BUFFER_H_
|
||||
|
||||
@@ -31,9 +31,7 @@ class HsaQueue : public Queue {
|
||||
public:
|
||||
HsaQueue(const util::AgentInfo* agent_info, hsa_queue_t* queue) : queue_(queue) {}
|
||||
|
||||
void Submit(const packet_t* packet) {
|
||||
util::HsaRsrcFactory::Instance().Submit(queue_, packet);
|
||||
}
|
||||
void Submit(const packet_t* packet) { util::HsaRsrcFactory::Instance().Submit(queue_, packet); }
|
||||
|
||||
private:
|
||||
hsa_queue_t* queue_;
|
||||
|
||||
@@ -27,10 +27,10 @@ void InterceptQueue::HsaIntercept(HsaApiTable* table) {
|
||||
}
|
||||
|
||||
InterceptQueue::mutex_t InterceptQueue::mutex_;
|
||||
//rocprofiler_callback_t InterceptQueue::dispatch_callback_ = NULL;
|
||||
//InterceptQueue::queue_callback_t InterceptQueue::create_callback_ = NULL;
|
||||
//InterceptQueue::queue_callback_t InterceptQueue::destroy_callback_ = NULL;
|
||||
//void* InterceptQueue::callback_data_ = NULL;
|
||||
// rocprofiler_callback_t InterceptQueue::dispatch_callback_ = NULL;
|
||||
// InterceptQueue::queue_callback_t InterceptQueue::create_callback_ = NULL;
|
||||
// InterceptQueue::queue_callback_t InterceptQueue::destroy_callback_ = NULL;
|
||||
// void* InterceptQueue::callback_data_ = NULL;
|
||||
InterceptQueue::obj_map_t* InterceptQueue::obj_map_ = NULL;
|
||||
const char* InterceptQueue::kernel_none_ = "";
|
||||
bool InterceptQueue::in_create_call_ = false;
|
||||
|
||||
@@ -37,7 +37,9 @@
|
||||
#include "util/hsa_rsrc_factory.h"
|
||||
#include "util/exception.h"
|
||||
|
||||
namespace roctracer { extern TraceBuffer<trace_entry_t>* trace_buffer; }
|
||||
namespace roctracer {
|
||||
extern TraceBuffer<trace_entry_t>* trace_buffer;
|
||||
}
|
||||
|
||||
namespace rocprofiler {
|
||||
extern decltype(hsa_queue_create)* hsa_queue_create_fn;
|
||||
@@ -48,25 +50,25 @@ class InterceptQueue {
|
||||
typedef std::recursive_mutex mutex_t;
|
||||
typedef std::map<uint64_t, InterceptQueue*> obj_map_t;
|
||||
typedef hsa_status_t (*queue_callback_t)(hsa_queue_t*, void* data);
|
||||
typedef void (*queue_event_callback_t)(hsa_status_t status, hsa_queue_t *queue, void *arg);
|
||||
typedef void (*queue_event_callback_t)(hsa_status_t status, hsa_queue_t* queue, void* arg);
|
||||
typedef uint32_t queue_id_t;
|
||||
|
||||
static void HsaIntercept(HsaApiTable* table);
|
||||
|
||||
static hsa_status_t InterceptQueueCreate(hsa_agent_t agent, uint32_t size, hsa_queue_type32_t type,
|
||||
void (*callback)(hsa_status_t status, hsa_queue_t* source,
|
||||
void* data),
|
||||
void* data, uint32_t private_segment_size,
|
||||
uint32_t group_segment_size, hsa_queue_t** queue,
|
||||
const bool& tracker_on) {
|
||||
static hsa_status_t InterceptQueueCreate(
|
||||
hsa_agent_t agent, uint32_t size, hsa_queue_type32_t type,
|
||||
void (*callback)(hsa_status_t status, hsa_queue_t* source, void* data), void* data,
|
||||
uint32_t private_segment_size, uint32_t group_segment_size, hsa_queue_t** queue,
|
||||
const bool& tracker_on) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
|
||||
if (in_create_call_) EXC_ABORT(status, "recursive InterceptQueueCreate()");
|
||||
in_create_call_ = true;
|
||||
|
||||
ProxyQueue* proxy = ProxyQueue::Create(agent, size, type, queue_event_callback, data, private_segment_size,
|
||||
group_segment_size, queue, &status);
|
||||
ProxyQueue* proxy =
|
||||
ProxyQueue::Create(agent, size, type, queue_event_callback, data, private_segment_size,
|
||||
group_segment_size, queue, &status);
|
||||
if (status != HSA_STATUS_SUCCESS) EXC_ABORT(status, "ProxyQueue::Create()");
|
||||
|
||||
status = util::HsaRsrcFactory::HsaApi()->hsa_amd_profiling_set_profiler_enabled(*queue, true);
|
||||
@@ -78,7 +80,8 @@ class InterceptQueue {
|
||||
obj->queue_id = current_queue_id;
|
||||
(*obj_map_)[(uint64_t)(*queue)] = obj;
|
||||
|
||||
status = (is_enabled) ? proxy->SetInterceptCB(OnSubmitCB, obj) : proxy->SetInterceptCB(OnSubmitCB_dummy, obj);
|
||||
status = (is_enabled) ? proxy->SetInterceptCB(OnSubmitCB, obj)
|
||||
: proxy->SetInterceptCB(OnSubmitCB_dummy, obj);
|
||||
|
||||
#if 0
|
||||
if (create_callback_ != NULL) {
|
||||
@@ -96,15 +99,17 @@ class InterceptQueue {
|
||||
void* data),
|
||||
void* data, uint32_t private_segment_size,
|
||||
uint32_t group_segment_size, hsa_queue_t** queue) {
|
||||
return InterceptQueueCreate(agent, size, type, callback, data, private_segment_size, group_segment_size, queue, false);
|
||||
return InterceptQueueCreate(agent, size, type, callback, data, private_segment_size,
|
||||
group_segment_size, queue, false);
|
||||
}
|
||||
|
||||
static hsa_status_t QueueCreateTracked(hsa_agent_t agent, uint32_t size, hsa_queue_type32_t type,
|
||||
void (*callback)(hsa_status_t status, hsa_queue_t* source,
|
||||
void* data),
|
||||
void* data, uint32_t private_segment_size,
|
||||
uint32_t group_segment_size, hsa_queue_t** queue) {
|
||||
return InterceptQueueCreate(agent, size, type, callback, data, private_segment_size, group_segment_size, queue, true);
|
||||
void (*callback)(hsa_status_t status, hsa_queue_t* source,
|
||||
void* data),
|
||||
void* data, uint32_t private_segment_size,
|
||||
uint32_t group_segment_size, hsa_queue_t** queue) {
|
||||
return InterceptQueueCreate(agent, size, type, callback, data, private_segment_size,
|
||||
group_segment_size, queue, true);
|
||||
}
|
||||
|
||||
static hsa_status_t QueueDestroy(hsa_queue_t* queue) {
|
||||
@@ -122,8 +127,8 @@ class InterceptQueue {
|
||||
return status;
|
||||
}
|
||||
|
||||
static void OnSubmitCB_dummy(const void* in_packets, uint64_t count, uint64_t user_que_idx, void* data,
|
||||
hsa_amd_queue_intercept_packet_writer writer) {
|
||||
static void OnSubmitCB_dummy(const void* in_packets, uint64_t count, uint64_t user_que_idx,
|
||||
void* data, hsa_amd_queue_intercept_packet_writer writer) {
|
||||
const packet_t* packets_arr = reinterpret_cast<const packet_t*>(in_packets);
|
||||
|
||||
// Submitting the original packets if profiling was not enabled
|
||||
@@ -161,8 +166,10 @@ class InterceptQueue {
|
||||
::proxy::Tracker::entry_t* entry = roctracer::trace_buffer->GetEntry();
|
||||
entry->kernel.tid = syscall(__NR_gettid);
|
||||
entry->kernel.name = kernel_name;
|
||||
::proxy::Tracker::Enable(roctracer::KERNEL_ENTRY_TYPE, obj->agent_info_->dev_id, completion_signal, entry);
|
||||
const_cast<hsa_kernel_dispatch_packet_t*>(dispatch_packet)->completion_signal = entry->signal;
|
||||
::proxy::Tracker::Enable(roctracer::KERNEL_ENTRY_TYPE, obj->agent_info_->dev_id,
|
||||
completion_signal, entry);
|
||||
const_cast<hsa_kernel_dispatch_packet_t*>(dispatch_packet)->completion_signal =
|
||||
entry->signal;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -190,7 +197,7 @@ class InterceptQueue {
|
||||
static void Enable(bool val) { is_enabled = val; }
|
||||
|
||||
private:
|
||||
static void queue_event_callback(hsa_status_t status, hsa_queue_t *queue, void *arg) {
|
||||
static void queue_event_callback(hsa_status_t status, hsa_queue_t* queue, void* arg) {
|
||||
if (status != HSA_STATUS_SUCCESS) EXC_ABORT(status, "queue error handling is not supported");
|
||||
InterceptQueue* obj = GetObj(queue);
|
||||
if (obj->queue_event_callback_) obj->queue_event_callback_(status, obj->queue_, arg);
|
||||
@@ -201,7 +208,8 @@ class InterceptQueue {
|
||||
return static_cast<hsa_packet_type_t>((*header >> HSA_PACKET_HEADER_TYPE) & header_type_mask);
|
||||
}
|
||||
|
||||
static const amd_kernel_code_t* GetKernelCode(const hsa_kernel_dispatch_packet_t* dispatch_packet) {
|
||||
static const amd_kernel_code_t* GetKernelCode(
|
||||
const hsa_kernel_dispatch_packet_t* dispatch_packet) {
|
||||
const amd_kernel_code_t* kernel_code = NULL;
|
||||
hsa_status_t status =
|
||||
util::HsaRsrcFactory::Instance().LoaderApi()->hsa_ven_amd_loader_query_host_address(
|
||||
@@ -256,22 +264,19 @@ class InterceptQueue {
|
||||
assert(queue == obj->queue_);
|
||||
delete obj;
|
||||
obj_map_->erase(it);
|
||||
status = HSA_STATUS_SUCCESS;;
|
||||
status = HSA_STATUS_SUCCESS;
|
||||
;
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
InterceptQueue(const hsa_agent_t& agent, hsa_queue_t* const queue, ProxyQueue* proxy) :
|
||||
queue_(queue),
|
||||
proxy_(proxy)
|
||||
{
|
||||
InterceptQueue(const hsa_agent_t& agent, hsa_queue_t* const queue, ProxyQueue* proxy)
|
||||
: queue_(queue), proxy_(proxy) {
|
||||
agent_info_ = util::HsaRsrcFactory::Instance().GetAgentInfo(agent);
|
||||
queue_event_callback_ = NULL;
|
||||
}
|
||||
|
||||
~InterceptQueue() {
|
||||
ProxyQueue::Destroy(proxy_);
|
||||
}
|
||||
~InterceptQueue() { ProxyQueue::Destroy(proxy_); }
|
||||
|
||||
static bool is_enabled;
|
||||
|
||||
|
||||
@@ -36,10 +36,10 @@ ProxyQueue* ProxyQueue::Create(hsa_agent_t agent, uint32_t size, hsa_queue_type3
|
||||
hsa_status_t* status) {
|
||||
hsa_status_t suc = HSA_STATUS_ERROR;
|
||||
ProxyQueue* instance =
|
||||
(rocp_type_) ? (ProxyQueue*) new SimpleProxyQueue() : (ProxyQueue*) new HsaProxyQueue();
|
||||
(rocp_type_) ? (ProxyQueue*)new SimpleProxyQueue() : (ProxyQueue*)new HsaProxyQueue();
|
||||
if (instance != NULL) {
|
||||
suc = instance->Init(agent, size, type, callback, data, private_segment_size,
|
||||
group_segment_size, queue);
|
||||
group_segment_size, queue);
|
||||
if (suc != HSA_STATUS_SUCCESS) {
|
||||
delete instance;
|
||||
instance = NULL;
|
||||
|
||||
@@ -25,13 +25,19 @@ void SimpleProxyQueue::HsaIntercept(HsaApiTable* table) {
|
||||
table->core_->hsa_signal_store_relaxed_fn = rocprofiler::SimpleProxyQueue::SignalStore;
|
||||
table->core_->hsa_signal_store_screlease_fn = rocprofiler::SimpleProxyQueue::SignalStore;
|
||||
|
||||
table->core_->hsa_queue_load_write_index_relaxed_fn = rocprofiler::SimpleProxyQueue::GetQueueIndex;
|
||||
table->core_->hsa_queue_store_write_index_relaxed_fn = rocprofiler::SimpleProxyQueue::SetQueueIndex;
|
||||
table->core_->hsa_queue_load_read_index_relaxed_fn = rocprofiler::SimpleProxyQueue::GetSubmitIndex;
|
||||
table->core_->hsa_queue_load_write_index_relaxed_fn =
|
||||
rocprofiler::SimpleProxyQueue::GetQueueIndex;
|
||||
table->core_->hsa_queue_store_write_index_relaxed_fn =
|
||||
rocprofiler::SimpleProxyQueue::SetQueueIndex;
|
||||
table->core_->hsa_queue_load_read_index_relaxed_fn =
|
||||
rocprofiler::SimpleProxyQueue::GetSubmitIndex;
|
||||
|
||||
table->core_->hsa_queue_load_write_index_scacquire_fn = rocprofiler::SimpleProxyQueue::GetQueueIndex;
|
||||
table->core_->hsa_queue_store_write_index_screlease_fn = rocprofiler::SimpleProxyQueue::SetQueueIndex;
|
||||
table->core_->hsa_queue_load_read_index_scacquire_fn = rocprofiler::SimpleProxyQueue::GetSubmitIndex;
|
||||
table->core_->hsa_queue_load_write_index_scacquire_fn =
|
||||
rocprofiler::SimpleProxyQueue::GetQueueIndex;
|
||||
table->core_->hsa_queue_store_write_index_screlease_fn =
|
||||
rocprofiler::SimpleProxyQueue::SetQueueIndex;
|
||||
table->core_->hsa_queue_load_read_index_scacquire_fn =
|
||||
rocprofiler::SimpleProxyQueue::GetSubmitIndex;
|
||||
}
|
||||
|
||||
SimpleProxyQueue::queue_map_t* SimpleProxyQueue::queue_map_ = NULL;
|
||||
|
||||
@@ -30,7 +30,7 @@
|
||||
#include "util/hsa_rsrc_factory.h"
|
||||
|
||||
#ifndef ROCP_PROXY_LOCK
|
||||
# define ROCP_PROXY_LOCK 1
|
||||
#define ROCP_PROXY_LOCK 1
|
||||
#endif
|
||||
|
||||
namespace rocprofiler {
|
||||
@@ -125,7 +125,8 @@ class SimpleProxyQueue : public ProxyQueue {
|
||||
const uint64_t que_idx = hsa_queue_load_write_index_relaxed_fn(queue_);
|
||||
|
||||
// Waiting untill there is a free space in the queue
|
||||
while (que_idx >= (hsa_queue_load_read_index_relaxed_fn(queue_) + size_));
|
||||
while (que_idx >= (hsa_queue_load_read_index_relaxed_fn(queue_) + size_))
|
||||
;
|
||||
|
||||
// Increment the write index
|
||||
hsa_queue_store_write_index_relaxed_fn(queue_, que_idx + 1);
|
||||
@@ -160,8 +161,7 @@ class SimpleProxyQueue : public ProxyQueue {
|
||||
queue_mask_(0),
|
||||
submit_index_(0),
|
||||
on_submit_cb_(NULL),
|
||||
on_submit_cb_data_(NULL)
|
||||
{
|
||||
on_submit_cb_data_(NULL) {
|
||||
printf("ROCProfiler: SimpleProxyQueue is enabled\n");
|
||||
fflush(stdout);
|
||||
}
|
||||
@@ -200,8 +200,8 @@ class SimpleProxyQueue : public ProxyQueue {
|
||||
|
||||
if (queue_map_ == NULL) queue_map_ = new queue_map_t;
|
||||
(*queue_map_)[queue_->doorbell_signal.handle] = this;
|
||||
}
|
||||
else abort();
|
||||
} else
|
||||
abort();
|
||||
}
|
||||
}
|
||||
if (status != HSA_STATUS_SUCCESS) abort();
|
||||
|
||||
+17
-11
@@ -35,20 +35,21 @@
|
||||
|
||||
namespace proxy {
|
||||
class Tracker {
|
||||
public:
|
||||
public:
|
||||
typedef util::HsaRsrcFactory::timestamp_t timestamp_t;
|
||||
typedef roctracer::trace_entry_t entry_t;
|
||||
typedef roctracer::entry_type_t entry_type_t;
|
||||
|
||||
// Add tracker entry
|
||||
inline static void Enable(entry_type_t type, const hsa_agent_t& agent, const hsa_signal_t& signal, entry_t* entry) {
|
||||
inline static void Enable(entry_type_t type, const hsa_agent_t& agent, const hsa_signal_t& signal,
|
||||
entry_t* entry) {
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
util::HsaRsrcFactory* hsa_rsrc = &(util::HsaRsrcFactory::Instance());
|
||||
|
||||
// Creating a new tracker entry
|
||||
entry->type = type;
|
||||
entry->agent = agent;
|
||||
entry->dev_index = 0; //hsa_rsrc->GetAgentInfo(agent)->dev_index;
|
||||
entry->dev_index = 0; // hsa_rsrc->GetAgentInfo(agent)->dev_index;
|
||||
entry->orig = signal;
|
||||
entry->dispatch = hsa_rsrc->TimestampNs();
|
||||
entry->valid.store(roctracer::TRACE_ENTRY_INIT, std::memory_order_release);
|
||||
@@ -56,7 +57,8 @@ class Tracker {
|
||||
// Creating a proxy signal
|
||||
status = hsa_signal_create(1, 0, NULL, &(entry->signal));
|
||||
if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "hsa_signal_create");
|
||||
status = hsa_amd_signal_async_handler(entry->signal, HSA_SIGNAL_CONDITION_LT, 1, Handler, entry);
|
||||
status =
|
||||
hsa_amd_signal_async_handler(entry->signal, HSA_SIGNAL_CONDITION_LT, 1, Handler, entry);
|
||||
if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "hsa_amd_signal_async_handler");
|
||||
}
|
||||
|
||||
@@ -66,7 +68,7 @@ class Tracker {
|
||||
entry->valid.store(roctracer::TRACE_ENTRY_INV, std::memory_order_release);
|
||||
}
|
||||
|
||||
private:
|
||||
private:
|
||||
// Entry completion
|
||||
inline static void Complete(hsa_signal_value_t signal_value, entry_t* entry) {
|
||||
// Query begin/end and complete timestamps
|
||||
@@ -74,12 +76,14 @@ class Tracker {
|
||||
if (entry->type == roctracer::COPY_ENTRY_TYPE) {
|
||||
hsa_amd_profiling_async_copy_time_t async_copy_time{};
|
||||
hsa_status_t status = hsa_amd_profiling_get_async_copy_time(entry->signal, &async_copy_time);
|
||||
if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "hsa_amd_profiling_get_async_copy_time");
|
||||
if (status != HSA_STATUS_SUCCESS)
|
||||
EXC_RAISING(status, "hsa_amd_profiling_get_async_copy_time");
|
||||
entry->begin = hsa_rsrc->SysclockToNs(async_copy_time.start);
|
||||
entry->end = hsa_rsrc->SysclockToNs(async_copy_time.end);
|
||||
} else {
|
||||
hsa_amd_profiling_dispatch_time_t dispatch_time{};
|
||||
hsa_status_t status = hsa_amd_profiling_get_dispatch_time(entry->agent, entry->signal, &dispatch_time);
|
||||
hsa_status_t status =
|
||||
hsa_amd_profiling_get_dispatch_time(entry->agent, entry->signal, &dispatch_time);
|
||||
if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "hsa_amd_profiling_get_dispatch_time");
|
||||
entry->begin = hsa_rsrc->SysclockToNs(dispatch_time.start);
|
||||
entry->end = hsa_rsrc->SysclockToNs(dispatch_time.end);
|
||||
@@ -101,7 +105,8 @@ class Tracker {
|
||||
orig_signal_ptr->end_ts = prof_signal_ptr->end_ts;
|
||||
|
||||
const hsa_signal_value_t new_value = hsa_signal_load_relaxed(orig) - 1;
|
||||
if (signal_value != new_value) EXC_ABORT(HSA_STATUS_ERROR, "Tracker::Complete bad signal value");
|
||||
if (signal_value != new_value)
|
||||
EXC_ABORT(HSA_STATUS_ERROR, "Tracker::Complete bad signal value");
|
||||
hsa_signal_store_screlease(orig, signal_value);
|
||||
}
|
||||
hsa_signal_destroy(signal);
|
||||
@@ -111,7 +116,8 @@ class Tracker {
|
||||
static bool Handler(hsa_signal_value_t signal_value, void* arg) {
|
||||
// Acquire entry
|
||||
entry_t* entry = reinterpret_cast<entry_t*>(arg);
|
||||
while (entry->valid.load(std::memory_order_acquire) != roctracer::TRACE_ENTRY_INIT) sched_yield();
|
||||
while (entry->valid.load(std::memory_order_acquire) != roctracer::TRACE_ENTRY_INIT)
|
||||
sched_yield();
|
||||
|
||||
// Complete entry
|
||||
Tracker::Complete(signal_value, entry);
|
||||
@@ -119,6 +125,6 @@ class Tracker {
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace rocprofiler
|
||||
} // namespace proxy
|
||||
|
||||
#endif // SRC_PROXY_TRACKER_H_
|
||||
#endif // SRC_PROXY_TRACKER_H_
|
||||
|
||||
@@ -34,11 +34,12 @@ typedef hsa_ext_amd_aql_pm4_packet_t packet_t;
|
||||
typedef uint32_t packet_word_t;
|
||||
typedef uint64_t timestamp_t;
|
||||
|
||||
inline std::ostream& operator<< (std::ostream& out, const event_t& event) {
|
||||
out << "[block_name(" << event.block_name << "). block_index(" << event.block_index << "). counter_id(" << event.counter_id << ")]";
|
||||
inline std::ostream& operator<<(std::ostream& out, const event_t& event) {
|
||||
out << "[block_name(" << event.block_name << "). block_index(" << event.block_index
|
||||
<< "). counter_id(" << event.counter_id << ")]";
|
||||
return out;
|
||||
}
|
||||
inline std::ostream& operator<< (std::ostream& out, const parameter_t& parameter) {
|
||||
inline std::ostream& operator<<(std::ostream& out, const parameter_t& parameter) {
|
||||
out << "[parameter_name(" << parameter.parameter_name << "). value(" << parameter.value << ")]";
|
||||
return out;
|
||||
}
|
||||
|
||||
+22
-12
@@ -37,7 +37,6 @@
|
||||
#define API_METHOD_PREFIX \
|
||||
roctx_status_t err = ROCTX_STATUS_SUCCESS; \
|
||||
try {
|
||||
|
||||
#define API_METHOD_SUFFIX \
|
||||
} \
|
||||
catch (std::exception & e) { \
|
||||
@@ -52,7 +51,7 @@
|
||||
ERR_LOGGING(__FUNCTION__ << "(), " << e.what()); \
|
||||
err = roctx::GetExcStatus(e); \
|
||||
} \
|
||||
(void)err; \
|
||||
(void)err;
|
||||
|
||||
#define API_METHOD_CATCH(X) \
|
||||
} \
|
||||
@@ -84,8 +83,10 @@ thread_map_t thread_map;
|
||||
static thread_local message_stack_t* message_stack = NULL;
|
||||
|
||||
roctx_status_t GetExcStatus(const std::exception& e) {
|
||||
const roctracer::util::exception* roctx_exc_ptr = dynamic_cast<const roctracer::util::exception*>(&e);
|
||||
return (roctx_exc_ptr) ? static_cast<roctx_status_t>(roctx_exc_ptr->status()) : ROCTX_STATUS_ERROR;
|
||||
const roctracer::util::exception* roctx_exc_ptr =
|
||||
dynamic_cast<const roctracer::util::exception*>(&e);
|
||||
return (roctx_exc_ptr) ? static_cast<roctx_status_t>(roctx_exc_ptr->status())
|
||||
: ROCTX_STATUS_ERROR;
|
||||
}
|
||||
|
||||
void thread_data_init() {
|
||||
@@ -124,7 +125,8 @@ PUBLIC_API void roctxMarkA(const char* message) {
|
||||
activity_rtapi_callback_t api_callback_fun = NULL;
|
||||
void* api_callback_arg = NULL;
|
||||
roctx::cb_table.get(ROCTX_API_ID_roctxMarkA, &api_callback_fun, &api_callback_arg);
|
||||
if (api_callback_fun) api_callback_fun(ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_roctxMarkA, &api_data, api_callback_arg);
|
||||
if (api_callback_fun)
|
||||
api_callback_fun(ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_roctxMarkA, &api_data, api_callback_arg);
|
||||
API_METHOD_SUFFIX_NRET
|
||||
}
|
||||
|
||||
@@ -137,7 +139,9 @@ PUBLIC_API int roctxRangePushA(const char* message) {
|
||||
activity_rtapi_callback_t api_callback_fun = NULL;
|
||||
void* api_callback_arg = NULL;
|
||||
roctx::cb_table.get(ROCTX_API_ID_roctxRangePushA, &api_callback_fun, &api_callback_arg);
|
||||
if (api_callback_fun) api_callback_fun(ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_roctxRangePushA, &api_data, api_callback_arg);
|
||||
if (api_callback_fun)
|
||||
api_callback_fun(ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_roctxRangePushA, &api_data,
|
||||
api_callback_arg);
|
||||
roctx::message_stack->push(strdup(message));
|
||||
|
||||
return roctx::message_stack->size() - 1;
|
||||
@@ -152,11 +156,13 @@ PUBLIC_API int roctxRangePop() {
|
||||
activity_rtapi_callback_t api_callback_fun = NULL;
|
||||
void* api_callback_arg = NULL;
|
||||
roctx::cb_table.get(ROCTX_API_ID_roctxRangePop, &api_callback_fun, &api_callback_arg);
|
||||
if (api_callback_fun) api_callback_fun(ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_roctxRangePop, &api_data, api_callback_arg);
|
||||
if (api_callback_fun)
|
||||
api_callback_fun(ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_roctxRangePop, &api_data,
|
||||
api_callback_arg);
|
||||
if (roctx::message_stack->empty()) {
|
||||
EXC_ABORT(ROCTX_STATUS_ERROR, "Pop from empty stack!");
|
||||
EXC_ABORT(ROCTX_STATUS_ERROR, "Pop from empty stack!");
|
||||
} else {
|
||||
roctx::message_stack->pop();
|
||||
roctx::message_stack->pop();
|
||||
}
|
||||
|
||||
return roctx::message_stack->size();
|
||||
@@ -173,7 +179,9 @@ PUBLIC_API roctx_range_id_t roctxRangeStartA(const char* message) {
|
||||
activity_rtapi_callback_t api_callback_fun = NULL;
|
||||
void* api_callback_arg = NULL;
|
||||
roctx::cb_table.get(ROCTX_API_ID_roctxRangeStartA, &api_callback_fun, &api_callback_arg);
|
||||
if (api_callback_fun) api_callback_fun(ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_roctxRangeStartA, &api_data, api_callback_arg);
|
||||
if (api_callback_fun)
|
||||
api_callback_fun(ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_roctxRangeStartA, &api_data,
|
||||
api_callback_arg);
|
||||
|
||||
return roctx_range_counter;
|
||||
API_METHOD_CATCH(-1);
|
||||
@@ -186,14 +194,16 @@ PUBLIC_API void roctxRangeStop(roctx_range_id_t rangeId) {
|
||||
activity_rtapi_callback_t api_callback_fun = NULL;
|
||||
void* api_callback_arg = NULL;
|
||||
roctx::cb_table.get(ROCTX_API_ID_roctxRangeStop, &api_callback_fun, &api_callback_arg);
|
||||
if (api_callback_fun) api_callback_fun(ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_roctxRangeStop, &api_data, api_callback_arg);
|
||||
if (api_callback_fun)
|
||||
api_callback_fun(ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_roctxRangeStop, &api_data,
|
||||
api_callback_arg);
|
||||
API_METHOD_SUFFIX_NRET
|
||||
}
|
||||
|
||||
PUBLIC_API void RangeStackIterate(roctx_range_iterate_cb_t callback, void* arg) {
|
||||
for (const auto& entry : roctx::thread_map) {
|
||||
const auto tid = entry.first;
|
||||
for (roctx::message_stack_t stack = *(entry.second); !stack.empty(); stack.pop()){
|
||||
for (roctx::message_stack_t stack = *(entry.second); !stack.empty(); stack.pop()) {
|
||||
std::string message = stack.top();
|
||||
roctx_range_data_t data{};
|
||||
data.message = message.c_str();
|
||||
|
||||
@@ -45,8 +45,6 @@ PUBLIC_API bool RegisterApiCallback(uint32_t op, void* callback, void* arg) {
|
||||
return roctx::cb_table.set(op, reinterpret_cast<activity_rtapi_callback_t>(callback), arg);
|
||||
}
|
||||
|
||||
PUBLIC_API bool RemoveApiCallback(uint32_t op) {
|
||||
return roctx::cb_table.set(op, NULL, NULL);
|
||||
}
|
||||
PUBLIC_API bool RemoveApiCallback(uint32_t op) { return roctx::cb_table.set(op, NULL, NULL); }
|
||||
|
||||
} // extern "C"
|
||||
|
||||
@@ -43,12 +43,12 @@
|
||||
#define HCC_EXC_RAISING(error, stream) \
|
||||
do { \
|
||||
EXC_RAISING(error, "HCC error: " << stream); \
|
||||
} while(0)
|
||||
} while (0)
|
||||
|
||||
#define HIP_EXC_RAISING(error, stream) \
|
||||
do { \
|
||||
EXC_RAISING(error, "HIP error: " << stream); \
|
||||
} while(0)
|
||||
} while (0)
|
||||
|
||||
namespace roctracer {
|
||||
namespace util {
|
||||
|
||||
@@ -69,13 +69,15 @@ static hsa_status_t FindGlobalPool(hsa_amd_memory_pool_t pool, void* data, bool
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
err = HsaRsrcFactory::HsaApi()->hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment);
|
||||
err = HsaRsrcFactory::HsaApi()->hsa_amd_memory_pool_get_info(
|
||||
pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment);
|
||||
CHECK_STATUS("hsa_amd_memory_pool_get_info", err);
|
||||
if (HSA_AMD_SEGMENT_GLOBAL != segment) {
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
err = HsaRsrcFactory::HsaApi()->hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flag);
|
||||
err = HsaRsrcFactory::HsaApi()->hsa_amd_memory_pool_get_info(
|
||||
pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flag);
|
||||
CHECK_STATUS("hsa_amd_memory_pool_get_info", err);
|
||||
|
||||
uint32_t karg_st = flag & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT;
|
||||
@@ -128,19 +130,22 @@ HsaRsrcFactory::HsaRsrcFactory(bool initialize_hsa) : initialize_hsa_(initialize
|
||||
#ifdef ROCP_LD_AQLPROFILE
|
||||
status = LoadAqlProfileLib(&aqlprofile_api_);
|
||||
#else
|
||||
status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_AQLPROFILE, hsa_ven_amd_aqlprofile_VERSION_MAJOR, sizeof(aqlprofile_api_), &aqlprofile_api_);
|
||||
status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_AQLPROFILE,
|
||||
hsa_ven_amd_aqlprofile_VERSION_MAJOR,
|
||||
sizeof(aqlprofile_api_), &aqlprofile_api_);
|
||||
#endif
|
||||
CHECK_STATUS("aqlprofile API table load failed", status);
|
||||
|
||||
// Get Loader API table
|
||||
loader_api_ = {0};
|
||||
status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_LOADER, 1, sizeof(loader_api_), &loader_api_);
|
||||
status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_LOADER, 1,
|
||||
sizeof(loader_api_), &loader_api_);
|
||||
CHECK_STATUS("loader API table query failed", status);
|
||||
|
||||
// Instantiate HSA timer
|
||||
timer_ = new HsaTimer(&hsa_api_);
|
||||
CHECK_STATUS("HSA timer allocation failed",
|
||||
(timer_ == NULL) ? HSA_STATUS_ERROR : HSA_STATUS_SUCCESS);
|
||||
(timer_ == NULL) ? HSA_STATUS_ERROR : HSA_STATUS_SUCCESS);
|
||||
|
||||
// Time correlation
|
||||
const uint32_t corr_iters = 1000;
|
||||
@@ -148,7 +153,8 @@ HsaRsrcFactory::HsaRsrcFactory(bool initialize_hsa) : initialize_hsa_(initialize
|
||||
CorrelateTime(HsaTimer::TIME_ID_CLOCK_MONOTONIC, corr_iters);
|
||||
|
||||
// System timeout
|
||||
timeout_ = (timeout_ns_ == HsaTimer::TIMESTAMP_MAX) ? timeout_ns_ : timer_->ns_to_sysclock(timeout_ns_);
|
||||
timeout_ =
|
||||
(timeout_ns_ == HsaTimer::TIMESTAMP_MAX) ? timeout_ns_ : timer_->ns_to_sysclock(timeout_ns_);
|
||||
}
|
||||
|
||||
// Destructor of the class
|
||||
@@ -174,9 +180,12 @@ void HsaRsrcFactory::InitHsaApiTable(HsaApiTable* table) {
|
||||
|
||||
hsa_api_.hsa_queue_create = table->core_->hsa_queue_create_fn;
|
||||
hsa_api_.hsa_queue_destroy = table->core_->hsa_queue_destroy_fn;
|
||||
hsa_api_.hsa_queue_load_write_index_relaxed = table->core_->hsa_queue_load_write_index_relaxed_fn;
|
||||
hsa_api_.hsa_queue_store_write_index_relaxed = table->core_->hsa_queue_store_write_index_relaxed_fn;
|
||||
hsa_api_.hsa_queue_load_read_index_relaxed = table->core_->hsa_queue_load_read_index_relaxed_fn;
|
||||
hsa_api_.hsa_queue_load_write_index_relaxed =
|
||||
table->core_->hsa_queue_load_write_index_relaxed_fn;
|
||||
hsa_api_.hsa_queue_store_write_index_relaxed =
|
||||
table->core_->hsa_queue_store_write_index_relaxed_fn;
|
||||
hsa_api_.hsa_queue_load_read_index_relaxed =
|
||||
table->core_->hsa_queue_load_read_index_relaxed_fn;
|
||||
|
||||
hsa_api_.hsa_signal_create = table->core_->hsa_signal_create_fn;
|
||||
hsa_api_.hsa_signal_destroy = table->core_->hsa_signal_destroy_fn;
|
||||
@@ -185,18 +194,22 @@ void HsaRsrcFactory::InitHsaApiTable(HsaApiTable* table) {
|
||||
hsa_api_.hsa_signal_wait_scacquire = table->core_->hsa_signal_wait_scacquire_fn;
|
||||
hsa_api_.hsa_signal_store_screlease = table->core_->hsa_signal_store_screlease_fn;
|
||||
|
||||
hsa_api_.hsa_code_object_reader_create_from_file = table->core_->hsa_code_object_reader_create_from_file_fn;
|
||||
hsa_api_.hsa_code_object_reader_create_from_file =
|
||||
table->core_->hsa_code_object_reader_create_from_file_fn;
|
||||
hsa_api_.hsa_executable_create_alt = table->core_->hsa_executable_create_alt_fn;
|
||||
hsa_api_.hsa_executable_load_agent_code_object = table->core_->hsa_executable_load_agent_code_object_fn;
|
||||
hsa_api_.hsa_executable_load_agent_code_object =
|
||||
table->core_->hsa_executable_load_agent_code_object_fn;
|
||||
hsa_api_.hsa_executable_freeze = table->core_->hsa_executable_freeze_fn;
|
||||
hsa_api_.hsa_executable_get_symbol = table->core_->hsa_executable_get_symbol_fn;
|
||||
hsa_api_.hsa_executable_symbol_get_info = table->core_->hsa_executable_symbol_get_info_fn;
|
||||
hsa_api_.hsa_executable_iterate_symbols = table->core_->hsa_executable_iterate_symbols_fn;
|
||||
|
||||
hsa_api_.hsa_system_get_info = table->core_->hsa_system_get_info_fn;
|
||||
hsa_api_.hsa_system_get_major_extension_table = table->core_->hsa_system_get_major_extension_table_fn;
|
||||
hsa_api_.hsa_system_get_major_extension_table =
|
||||
table->core_->hsa_system_get_major_extension_table_fn;
|
||||
|
||||
hsa_api_.hsa_amd_agent_iterate_memory_pools = table->amd_ext_->hsa_amd_agent_iterate_memory_pools_fn;
|
||||
hsa_api_.hsa_amd_agent_iterate_memory_pools =
|
||||
table->amd_ext_->hsa_amd_agent_iterate_memory_pools_fn;
|
||||
hsa_api_.hsa_amd_memory_pool_get_info = table->amd_ext_->hsa_amd_memory_pool_get_info_fn;
|
||||
hsa_api_.hsa_amd_memory_pool_allocate = table->amd_ext_->hsa_amd_memory_pool_allocate_fn;
|
||||
hsa_api_.hsa_amd_agents_allow_access = table->amd_ext_->hsa_amd_agents_allow_access_fn;
|
||||
@@ -204,9 +217,12 @@ void HsaRsrcFactory::InitHsaApiTable(HsaApiTable* table) {
|
||||
hsa_api_.hsa_amd_memory_async_copy_rect = table->amd_ext_->hsa_amd_memory_async_copy_rect_fn;
|
||||
|
||||
hsa_api_.hsa_amd_signal_async_handler = table->amd_ext_->hsa_amd_signal_async_handler_fn;
|
||||
hsa_api_.hsa_amd_profiling_set_profiler_enabled = table->amd_ext_->hsa_amd_profiling_set_profiler_enabled_fn;
|
||||
hsa_api_.hsa_amd_profiling_get_async_copy_time = table->amd_ext_->hsa_amd_profiling_get_async_copy_time_fn;
|
||||
hsa_api_.hsa_amd_profiling_get_dispatch_time = table->amd_ext_->hsa_amd_profiling_get_dispatch_time_fn;
|
||||
hsa_api_.hsa_amd_profiling_set_profiler_enabled =
|
||||
table->amd_ext_->hsa_amd_profiling_set_profiler_enabled_fn;
|
||||
hsa_api_.hsa_amd_profiling_get_async_copy_time =
|
||||
table->amd_ext_->hsa_amd_profiling_get_async_copy_time_fn;
|
||||
hsa_api_.hsa_amd_profiling_get_dispatch_time =
|
||||
table->amd_ext_->hsa_amd_profiling_get_dispatch_time_fn;
|
||||
} else {
|
||||
hsa_api_.hsa_init = hsa_init;
|
||||
hsa_api_.hsa_shut_down = hsa_shut_down;
|
||||
@@ -302,10 +318,13 @@ const AgentInfo* HsaRsrcFactory::AddAgentInfo(const hsa_agent_t agent) {
|
||||
agent_info->dev_type = HSA_DEVICE_TYPE_CPU;
|
||||
agent_info->dev_index = cpu_list_.size();
|
||||
|
||||
status = hsa_api_.hsa_amd_agent_iterate_memory_pools(agent, FindStandardPool, &agent_info->cpu_pool);
|
||||
status =
|
||||
hsa_api_.hsa_amd_agent_iterate_memory_pools(agent, FindStandardPool, &agent_info->cpu_pool);
|
||||
if ((status == HSA_STATUS_INFO_BREAK) && (cpu_pool_ == NULL)) cpu_pool_ = &agent_info->cpu_pool;
|
||||
status = hsa_api_.hsa_amd_agent_iterate_memory_pools(agent, FindKernArgPool, &agent_info->kern_arg_pool);
|
||||
if ((status == HSA_STATUS_INFO_BREAK) && (kern_arg_pool_ == NULL)) kern_arg_pool_ = &agent_info->kern_arg_pool;
|
||||
status = hsa_api_.hsa_amd_agent_iterate_memory_pools(agent, FindKernArgPool,
|
||||
&agent_info->kern_arg_pool);
|
||||
if ((status == HSA_STATUS_INFO_BREAK) && (kern_arg_pool_ == NULL))
|
||||
kern_arg_pool_ = &agent_info->kern_arg_pool;
|
||||
agent_info->gpu_pool = {};
|
||||
|
||||
cpu_list_.push_back(agent_info);
|
||||
@@ -317,28 +336,34 @@ const AgentInfo* HsaRsrcFactory::AddAgentInfo(const hsa_agent_t agent) {
|
||||
agent_info->dev_id = agent;
|
||||
agent_info->dev_type = HSA_DEVICE_TYPE_GPU;
|
||||
hsa_api_.hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, agent_info->name);
|
||||
const int gfxip_label_len = std::min (strlen(agent_info->name) - 2, sizeof (agent_info->gfxip) - 1);
|
||||
const int gfxip_label_len =
|
||||
std::min(strlen(agent_info->name) - 2, sizeof(agent_info->gfxip) - 1);
|
||||
memcpy(agent_info->gfxip, agent_info->name, gfxip_label_len);
|
||||
agent_info->gfxip[gfxip_label_len] = '\0';
|
||||
hsa_api_.hsa_agent_get_info(agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &agent_info->max_wave_size);
|
||||
hsa_api_.hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &agent_info->max_queue_size);
|
||||
hsa_api_.hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &agent_info->profile);
|
||||
agent_info->is_apu = (agent_info->profile == HSA_PROFILE_FULL) ? true : false;
|
||||
hsa_api_.hsa_agent_get_info(agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT),
|
||||
&agent_info->cu_num);
|
||||
hsa_api_.hsa_agent_get_info(agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU),
|
||||
&agent_info->waves_per_cu);
|
||||
hsa_api_.hsa_agent_get_info(agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU),
|
||||
&agent_info->simds_per_cu);
|
||||
hsa_api_.hsa_agent_get_info(agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES),
|
||||
&agent_info->se_num);
|
||||
hsa_api_.hsa_agent_get_info(
|
||||
agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT),
|
||||
&agent_info->cu_num);
|
||||
hsa_api_.hsa_agent_get_info(agent,
|
||||
static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_SHADER_ARRAYS_PER_SE),
|
||||
&agent_info->shader_arrays_per_se);
|
||||
static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU),
|
||||
&agent_info->waves_per_cu);
|
||||
hsa_api_.hsa_agent_get_info(agent,
|
||||
static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU),
|
||||
&agent_info->simds_per_cu);
|
||||
hsa_api_.hsa_agent_get_info(
|
||||
agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES),
|
||||
&agent_info->se_num);
|
||||
hsa_api_.hsa_agent_get_info(
|
||||
agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_SHADER_ARRAYS_PER_SE),
|
||||
&agent_info->shader_arrays_per_se);
|
||||
|
||||
agent_info->cpu_pool = {};
|
||||
agent_info->kern_arg_pool = {};
|
||||
status = hsa_api_.hsa_amd_agent_iterate_memory_pools(agent, FindStandardPool, &agent_info->gpu_pool);
|
||||
status =
|
||||
hsa_api_.hsa_amd_agent_iterate_memory_pools(agent, FindStandardPool, &agent_info->gpu_pool);
|
||||
CHECK_ITER_STATUS("hsa_amd_agent_iterate_memory_pools(gpu pool)", status);
|
||||
|
||||
// GFX8 and GFX9 SGPR/VGPR block sizes
|
||||
@@ -435,7 +460,7 @@ bool HsaRsrcFactory::CreateQueue(const AgentInfo* agent_info, uint32_t num_pkts,
|
||||
hsa_queue_t** queue) {
|
||||
hsa_status_t status;
|
||||
status = hsa_api_.hsa_queue_create(agent_info->dev_id, num_pkts, HSA_QUEUE_TYPE_MULTI, NULL, NULL,
|
||||
UINT32_MAX, UINT32_MAX, queue);
|
||||
UINT32_MAX, UINT32_MAX, queue);
|
||||
return (status == HSA_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
@@ -458,7 +483,8 @@ uint8_t* HsaRsrcFactory::AllocateLocalMemory(const AgentInfo* agent_info, size_t
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
uint8_t* buffer = NULL;
|
||||
size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK;
|
||||
status = hsa_api_.hsa_amd_memory_pool_allocate(agent_info->gpu_pool, size, 0, reinterpret_cast<void**>(&buffer));
|
||||
status = hsa_api_.hsa_amd_memory_pool_allocate(agent_info->gpu_pool, size, 0,
|
||||
reinterpret_cast<void**>(&buffer));
|
||||
uint8_t* ptr = (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
|
||||
return ptr;
|
||||
}
|
||||
@@ -473,7 +499,8 @@ uint8_t* HsaRsrcFactory::AllocateKernArgMemory(const AgentInfo* agent_info, size
|
||||
uint8_t* buffer = NULL;
|
||||
if (!cpu_agents_.empty()) {
|
||||
size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK;
|
||||
status = hsa_api_.hsa_amd_memory_pool_allocate(*kern_arg_pool_, size, 0, reinterpret_cast<void**>(&buffer));
|
||||
status = hsa_api_.hsa_amd_memory_pool_allocate(*kern_arg_pool_, size, 0,
|
||||
reinterpret_cast<void**>(&buffer));
|
||||
// Both the CPU and GPU can access the kernel arguments
|
||||
if (status == HSA_STATUS_SUCCESS) {
|
||||
hsa_agent_t ag_list[1] = {agent_info->dev_id};
|
||||
@@ -493,7 +520,8 @@ uint8_t* HsaRsrcFactory::AllocateSysMemory(const AgentInfo* agent_info, size_t s
|
||||
uint8_t* buffer = NULL;
|
||||
size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK;
|
||||
if (!cpu_agents_.empty()) {
|
||||
status = hsa_api_.hsa_amd_memory_pool_allocate(*cpu_pool_, size, 0, reinterpret_cast<void**>(&buffer));
|
||||
status = hsa_api_.hsa_amd_memory_pool_allocate(*cpu_pool_, size, 0,
|
||||
reinterpret_cast<void**>(&buffer));
|
||||
// Both the CPU and GPU can access the memory
|
||||
if (status == HSA_STATUS_SUCCESS) {
|
||||
hsa_agent_t ag_list[1] = {agent_info->dev_id};
|
||||
@@ -518,16 +546,18 @@ uint8_t* HsaRsrcFactory::AllocateCmdMemory(const AgentInfo* agent_info, size_t s
|
||||
}
|
||||
|
||||
// Wait signal
|
||||
hsa_signal_value_t HsaRsrcFactory::SignalWait(const hsa_signal_t& signal, const hsa_signal_value_t& signal_value) const {
|
||||
hsa_signal_value_t HsaRsrcFactory::SignalWait(const hsa_signal_t& signal,
|
||||
const hsa_signal_value_t& signal_value) const {
|
||||
const hsa_signal_value_t exp_value = signal_value - 1;
|
||||
hsa_signal_value_t ret_value = signal_value;
|
||||
while (1) {
|
||||
ret_value =
|
||||
hsa_api_.hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, signal_value, timeout_, HSA_WAIT_STATE_BLOCKED);
|
||||
ret_value = hsa_api_.hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, signal_value,
|
||||
timeout_, HSA_WAIT_STATE_BLOCKED);
|
||||
if (ret_value == exp_value) break;
|
||||
if (ret_value != signal_value) {
|
||||
std::cerr << "Error: HsaRsrcFactory::SignalWait: signal_value(" << signal_value
|
||||
<< "), ret_value(" << ret_value << ")" << std::endl << std::flush;
|
||||
<< "), ret_value(" << ret_value << ")" << std::endl
|
||||
<< std::flush;
|
||||
abort();
|
||||
}
|
||||
}
|
||||
@@ -535,7 +565,8 @@ hsa_signal_value_t HsaRsrcFactory::SignalWait(const hsa_signal_t& signal, const
|
||||
}
|
||||
|
||||
// Wait signal with signal value restore
|
||||
void HsaRsrcFactory::SignalWaitRestore(const hsa_signal_t& signal, const hsa_signal_value_t& signal_value) const {
|
||||
void HsaRsrcFactory::SignalWaitRestore(const hsa_signal_t& signal,
|
||||
const hsa_signal_value_t& signal_value) const {
|
||||
SignalWait(signal, signal_value);
|
||||
hsa_api_.hsa_signal_store_relaxed(const_cast<hsa_signal_t&>(signal), signal_value);
|
||||
}
|
||||
@@ -599,13 +630,13 @@ bool HsaRsrcFactory::LoadAndFinalize(const AgentInfo* agent_info, const char* br
|
||||
}
|
||||
|
||||
// Create executable.
|
||||
status = hsa_api_.hsa_executable_create_alt(HSA_PROFILE_FULL, HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT,
|
||||
NULL, executable);
|
||||
status = hsa_api_.hsa_executable_create_alt(
|
||||
HSA_PROFILE_FULL, HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, NULL, executable);
|
||||
CHECK_STATUS("Error in creating executable object", status);
|
||||
|
||||
// Load code object.
|
||||
status = hsa_api_.hsa_executable_load_agent_code_object(*executable, agent_info->dev_id, code_obj_rdr,
|
||||
NULL, NULL);
|
||||
status = hsa_api_.hsa_executable_load_agent_code_object(*executable, agent_info->dev_id,
|
||||
code_obj_rdr, NULL, NULL);
|
||||
CHECK_STATUS("Error in loading executable object", status);
|
||||
|
||||
// Freeze executable.
|
||||
@@ -615,7 +646,7 @@ bool HsaRsrcFactory::LoadAndFinalize(const AgentInfo* agent_info, const char* br
|
||||
// Get symbol handle.
|
||||
hsa_executable_symbol_t kernelSymbol;
|
||||
status = hsa_api_.hsa_executable_get_symbol(*executable, NULL, kernel_name, agent_info->dev_id, 0,
|
||||
&kernelSymbol);
|
||||
&kernelSymbol);
|
||||
CHECK_STATUS("Error in looking up kernel symbol", status);
|
||||
|
||||
// Update output parameter
|
||||
@@ -659,7 +690,8 @@ uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, const void* packet) {
|
||||
}
|
||||
|
||||
uint32_t slot_idx = (uint32_t)(write_idx % queue->size);
|
||||
uint32_t* queue_slot = reinterpret_cast<uint32_t*>((uintptr_t)(queue->base_address) + (slot_idx * slot_size_b));
|
||||
uint32_t* queue_slot =
|
||||
reinterpret_cast<uint32_t*>((uintptr_t)(queue->base_address) + (slot_idx * slot_size_b));
|
||||
const uint32_t* slot_data = reinterpret_cast<const uint32_t*>(packet);
|
||||
|
||||
// Copy buffered commands into the queue slot.
|
||||
@@ -709,18 +741,22 @@ void HsaRsrcFactory::EnableExecutableTracking(HsaApiTable* table) {
|
||||
table->core_->hsa_executable_freeze_fn = hsa_executable_freeze_interceptor;
|
||||
}
|
||||
|
||||
hsa_status_t HsaRsrcFactory::executable_symbols_cb(hsa_executable_t exec, hsa_executable_symbol_t symbol, void *data) {
|
||||
hsa_status_t HsaRsrcFactory::executable_symbols_cb(hsa_executable_t exec,
|
||||
hsa_executable_symbol_t symbol, void* data) {
|
||||
hsa_symbol_kind_t value = (hsa_symbol_kind_t)0;
|
||||
hsa_status_t status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_TYPE, &value);
|
||||
hsa_status_t status =
|
||||
hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_TYPE, &value);
|
||||
CHECK_STATUS("Error in getting symbol info", status);
|
||||
if (value == HSA_SYMBOL_KIND_KERNEL) {
|
||||
uint64_t addr = 0;
|
||||
uint32_t len = 0;
|
||||
status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &addr);
|
||||
status = hsa_api_.hsa_executable_symbol_get_info(
|
||||
symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &addr);
|
||||
CHECK_STATUS("Error in getting kernel object", status);
|
||||
status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, &len);
|
||||
status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH,
|
||||
&len);
|
||||
CHECK_STATUS("Error in getting name len", status);
|
||||
char *name = new char[len + 1];
|
||||
char* name = new char[len + 1];
|
||||
status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME, name);
|
||||
CHECK_STATUS("Error in getting kernel name", status);
|
||||
name[len] = 0;
|
||||
@@ -733,12 +769,15 @@ hsa_status_t HsaRsrcFactory::executable_symbols_cb(hsa_executable_t exec, hsa_ex
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t HsaRsrcFactory::hsa_executable_freeze_interceptor(hsa_executable_t executable, const char *options) {
|
||||
hsa_status_t HsaRsrcFactory::hsa_executable_freeze_interceptor(hsa_executable_t executable,
|
||||
const char* options) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
if (symbols_map_ == NULL) symbols_map_ = new symbols_map_t;
|
||||
hsa_status_t status = hsa_api_.hsa_executable_iterate_symbols(executable, executable_symbols_cb, NULL);
|
||||
hsa_status_t status =
|
||||
hsa_api_.hsa_executable_iterate_symbols(executable, executable_symbols_cb, NULL);
|
||||
CHECK_STATUS("Error in iterating executable symbols", status);
|
||||
return hsa_api_.hsa_executable_freeze(executable, options);;
|
||||
return hsa_api_.hsa_executable_freeze(executable, options);
|
||||
;
|
||||
}
|
||||
|
||||
void HsaRsrcFactory::DumpHandles(FILE* file) {
|
||||
@@ -746,10 +785,14 @@ void HsaRsrcFactory::DumpHandles(FILE* file) {
|
||||
auto end = agent_map_.end();
|
||||
for (auto it = beg; it != end; ++it) {
|
||||
const AgentInfo* agent_info = it->second;
|
||||
fprintf(file, "0x%lx agent %s\n", agent_info->dev_id.handle, (agent_info->dev_type == HSA_DEVICE_TYPE_CPU) ? "cpu" : "gpu");
|
||||
if (agent_info->cpu_pool.handle != 0) fprintf(file, "0x%lx pool cpu\n", agent_info->cpu_pool.handle);
|
||||
if (agent_info->kern_arg_pool.handle != 0) fprintf(file, "0x%lx pool cpu kernarg\n", agent_info->kern_arg_pool.handle);
|
||||
if (agent_info->gpu_pool.handle != 0) fprintf(file, "0x%lx pool gpu%u\n", agent_info->gpu_pool.handle, agent_info->dev_index);
|
||||
fprintf(file, "0x%lx agent %s\n", agent_info->dev_id.handle,
|
||||
(agent_info->dev_type == HSA_DEVICE_TYPE_CPU) ? "cpu" : "gpu");
|
||||
if (agent_info->cpu_pool.handle != 0)
|
||||
fprintf(file, "0x%lx pool cpu\n", agent_info->cpu_pool.handle);
|
||||
if (agent_info->kern_arg_pool.handle != 0)
|
||||
fprintf(file, "0x%lx pool cpu kernarg\n", agent_info->kern_arg_pool.handle);
|
||||
if (agent_info->gpu_pool.handle != 0)
|
||||
fprintf(file, "0x%lx pool gpu%u\n", agent_info->gpu_pool.handle, agent_info->dev_index);
|
||||
}
|
||||
fflush(file);
|
||||
}
|
||||
|
||||
@@ -44,23 +44,25 @@
|
||||
#define HSA_QUEUE_ALIGN_BYTES 64
|
||||
#define HSA_PACKET_ALIGN_BYTES 64
|
||||
|
||||
#define CHECK_STATUS(msg, status) do { \
|
||||
if ((status) != HSA_STATUS_SUCCESS) { \
|
||||
const char* emsg = 0; \
|
||||
hsa_status_string(status, &emsg); \
|
||||
printf("%s: %s\n", msg, emsg ? emsg : "<unknown error>"); \
|
||||
abort(); \
|
||||
} \
|
||||
} while (0)
|
||||
#define CHECK_STATUS(msg, status) \
|
||||
do { \
|
||||
if ((status) != HSA_STATUS_SUCCESS) { \
|
||||
const char* emsg = 0; \
|
||||
hsa_status_string(status, &emsg); \
|
||||
printf("%s: %s\n", msg, emsg ? emsg : "<unknown error>"); \
|
||||
abort(); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define CHECK_ITER_STATUS(msg, status) do { \
|
||||
if ((status) != HSA_STATUS_INFO_BREAK) { \
|
||||
const char* emsg = 0; \
|
||||
hsa_status_string(status, &emsg); \
|
||||
printf("%s: %s\n", msg, emsg ? emsg : "<unknown error>"); \
|
||||
abort(); \
|
||||
} \
|
||||
} while (0)
|
||||
#define CHECK_ITER_STATUS(msg, status) \
|
||||
do { \
|
||||
if ((status) != HSA_STATUS_INFO_BREAK) { \
|
||||
const char* emsg = 0; \
|
||||
hsa_status_string(status, &emsg); \
|
||||
printf("%s: %s\n", msg, emsg ? emsg : "<unknown error>"); \
|
||||
abort(); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
namespace util {
|
||||
static const size_t MEM_PAGE_BYTES = 0x1000;
|
||||
@@ -174,15 +176,12 @@ class HsaTimer {
|
||||
static const timestamp_t TIMESTAMP_MAX = UINT64_MAX;
|
||||
typedef long double freq_t;
|
||||
|
||||
enum time_id_t {
|
||||
TIME_ID_CLOCK_REALTIME = 0,
|
||||
TIME_ID_CLOCK_MONOTONIC = 1,
|
||||
TIME_ID_NUMBER
|
||||
};
|
||||
enum time_id_t { TIME_ID_CLOCK_REALTIME = 0, TIME_ID_CLOCK_MONOTONIC = 1, TIME_ID_NUMBER };
|
||||
|
||||
HsaTimer(const hsa_pfn_t* hsa_api) : hsa_api_(hsa_api) {
|
||||
timestamp_t sysclock_hz = 0;
|
||||
hsa_status_t status = hsa_api_->hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sysclock_hz);
|
||||
hsa_status_t status =
|
||||
hsa_api_->hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sysclock_hz);
|
||||
CHECK_STATUS("hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY)", status);
|
||||
sysclock_factor_ = (freq_t)1000000000 / (freq_t)sysclock_hz;
|
||||
}
|
||||
@@ -217,8 +216,8 @@ class HsaTimer {
|
||||
|
||||
// Return pair of correlated values of profiling timestamp and time with
|
||||
// correlation error for a given time ID and number of iterations
|
||||
void correlated_pair_ns(time_id_t time_id, uint32_t iters,
|
||||
timestamp_t* timestamp_v, timestamp_t* time_v, timestamp_t* error_v) const {
|
||||
void correlated_pair_ns(time_id_t time_id, uint32_t iters, timestamp_t* timestamp_v,
|
||||
timestamp_t* time_v, timestamp_t* error_v) const {
|
||||
clockid_t clock_id = 0;
|
||||
switch (time_id) {
|
||||
case TIME_ID_CLOCK_REALTIME:
|
||||
@@ -357,7 +356,8 @@ class HsaRsrcFactory {
|
||||
uint8_t* AllocateCmdMemory(const AgentInfo* agent_info, size_t size);
|
||||
|
||||
// Wait signal
|
||||
hsa_signal_value_t SignalWait(const hsa_signal_t& signal, const hsa_signal_value_t& signal_value) const;
|
||||
hsa_signal_value_t SignalWait(const hsa_signal_t& signal,
|
||||
const hsa_signal_value_t& signal_value) const;
|
||||
|
||||
// Wait signal with signal value restore
|
||||
void SignalWaitRestore(const hsa_signal_t& signal, const hsa_signal_value_t& signal_value) const;
|
||||
@@ -403,7 +403,9 @@ class HsaRsrcFactory {
|
||||
const hsa_ven_amd_loader_1_00_pfn_t* LoaderApi() const { return &loader_api_; }
|
||||
|
||||
// Methods for system-clock/ns conversion and timestamp in 'ns'
|
||||
timestamp_t SysclockToNs(const timestamp_t& sysclock) const { return timer_->sysclock_to_ns(sysclock); }
|
||||
timestamp_t SysclockToNs(const timestamp_t& sysclock) const {
|
||||
return timer_->sysclock_to_ns(sysclock);
|
||||
}
|
||||
timestamp_t NsToSysclock(const timestamp_t& time) const { return timer_->ns_to_sysclock(time); }
|
||||
timestamp_t TimestampNs() const { return timer_->timestamp_ns(); }
|
||||
|
||||
@@ -482,8 +484,10 @@ class HsaRsrcFactory {
|
||||
typedef std::map<uint64_t, const char*> symbols_map_t;
|
||||
static symbols_map_t* symbols_map_;
|
||||
static bool executable_tracking_on_;
|
||||
static hsa_status_t hsa_executable_freeze_interceptor(hsa_executable_t executable, const char *options);
|
||||
static hsa_status_t executable_symbols_cb(hsa_executable_t exec, hsa_executable_symbol_t symbol, void *data);
|
||||
static hsa_status_t hsa_executable_freeze_interceptor(hsa_executable_t executable,
|
||||
const char* options);
|
||||
static hsa_status_t executable_symbols_cb(hsa_executable_t exec, hsa_executable_symbol_t symbol,
|
||||
void* data);
|
||||
|
||||
// HSA runtime API table
|
||||
static hsa_pfn_t hsa_api_;
|
||||
@@ -507,8 +511,8 @@ class HsaRsrcFactory {
|
||||
timestamp_t time_error_[HsaTimer::TIME_ID_NUMBER];
|
||||
|
||||
// CPU/kern-arg memory pools
|
||||
hsa_amd_memory_pool_t *cpu_pool_;
|
||||
hsa_amd_memory_pool_t *kern_arg_pool_;
|
||||
hsa_amd_memory_pool_t* cpu_pool_;
|
||||
hsa_amd_memory_pool_t* kern_arg_pool_;
|
||||
};
|
||||
|
||||
} // namespace util
|
||||
|
||||
+20
-20
@@ -168,32 +168,32 @@ class Logger {
|
||||
} // namespace roctracer
|
||||
|
||||
#define ERR_LOGGING(stream) \
|
||||
do { \
|
||||
roctracer::util::Logger::Instance() << "error: " << roctracer::util::Logger::begm \
|
||||
<< stream << roctracer::util::Logger::endl; \
|
||||
} while(0)
|
||||
do { \
|
||||
roctracer::util::Logger::Instance() \
|
||||
<< "error: " << roctracer::util::Logger::begm << stream << roctracer::util::Logger::endl; \
|
||||
} while (0)
|
||||
|
||||
#define INFO_LOGGING(stream) \
|
||||
do { \
|
||||
roctracer::util::Logger::Instance() << "info: " << roctracer::util::Logger::begm << stream \
|
||||
<< roctracer::util::Logger::endl; \
|
||||
} while(0)
|
||||
do { \
|
||||
roctracer::util::Logger::Instance() \
|
||||
<< "info: " << roctracer::util::Logger::begm << stream << roctracer::util::Logger::endl; \
|
||||
} while (0)
|
||||
|
||||
#define WARN_LOGGING(stream) \
|
||||
do { \
|
||||
std::cerr << "ROCProfiler: " << stream << std::endl; \
|
||||
roctracer::util::Logger::Instance() << "warning: " << roctracer::util::Logger::begm << stream \
|
||||
<< roctracer::util::Logger::endl; \
|
||||
} while(0)
|
||||
do { \
|
||||
std::cerr << "ROCProfiler: " << stream << std::endl; \
|
||||
roctracer::util::Logger::Instance() << "warning: " << roctracer::util::Logger::begm << stream \
|
||||
<< roctracer::util::Logger::endl; \
|
||||
} while (0)
|
||||
|
||||
#ifdef DEBUG
|
||||
#define DBG_LOGGING(stream) \
|
||||
do { \
|
||||
roctracer::util::Logger::Instance() << roctracer::util::Logger::begm << "debug: \"" \
|
||||
<< stream << "\"" < < < < \
|
||||
do { \
|
||||
roctracer::util::Logger::Instance() \
|
||||
<< roctracer::util::Logger::begm << "debug: \"" << stream << "\"" < < < < \
|
||||
" in " << __FUNCTION__ << " at " << __FILE__ << " line " << __LINE__ \
|
||||
<< roctracer::util::Logger::endl; \
|
||||
} while(0)
|
||||
<< roctracer::util::Logger::endl; \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
#if DEBUG_TRACE_ON
|
||||
@@ -204,8 +204,8 @@ inline static void DEBUG_TRACE(const char* fmt, ...) {
|
||||
va_list valist;
|
||||
va_start(valist, fmt);
|
||||
vsnprintf(buf, size, fmt, valist);
|
||||
printf("%u:%u %s",
|
||||
roctracer::util::Logger::GetPid(), roctracer::util::Logger::GetTid(), buf); fflush(stdout);
|
||||
printf("%u:%u %s", roctracer::util::Logger::GetPid(), roctracer::util::Logger::GetTid(), buf);
|
||||
fflush(stdout);
|
||||
va_end(valist);
|
||||
}
|
||||
#else
|
||||
|
||||
@@ -37,57 +37,56 @@
|
||||
#define THREADS_PER_BLOCK_Z 1
|
||||
|
||||
// Mark API
|
||||
extern "C"
|
||||
void roctracer_mark(const char* str);
|
||||
extern "C" void roctracer_mark(const char* str);
|
||||
|
||||
// Device (Kernel) function, it must be void
|
||||
__global__ void matrixTranspose(float* out, float* in, const int width) {
|
||||
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
|
||||
int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
|
||||
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
|
||||
int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
|
||||
|
||||
out[y * width + x] = in[x * width + y];
|
||||
out[y * width + x] = in[x * width + y];
|
||||
}
|
||||
|
||||
// CPU implementation of matrix transpose
|
||||
void matrixTransposeCPUReference(float* output, float* input, const unsigned int width) {
|
||||
for (unsigned int j = 0; j < width; j++) {
|
||||
for (unsigned int i = 0; i < width; i++) {
|
||||
output[i * width + j] = input[j * width + i];
|
||||
}
|
||||
for (unsigned int j = 0; j < width; j++) {
|
||||
for (unsigned int i = 0; i < width; i++) {
|
||||
output[i * width + j] = input[j * width + i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int main() {
|
||||
float* Matrix;
|
||||
float* TransposeMatrix;
|
||||
float* cpuTransposeMatrix;
|
||||
float* Matrix;
|
||||
float* TransposeMatrix;
|
||||
float* cpuTransposeMatrix;
|
||||
|
||||
float* gpuMatrix;
|
||||
float* gpuTransposeMatrix;
|
||||
float* gpuMatrix;
|
||||
float* gpuTransposeMatrix;
|
||||
|
||||
hipDeviceProp_t devProp;
|
||||
hipGetDeviceProperties(&devProp, 0);
|
||||
hipDeviceProp_t devProp;
|
||||
hipGetDeviceProperties(&devProp, 0);
|
||||
|
||||
std::cout << "Device name " << devProp.name << std::endl;
|
||||
std::cout << "Device name " << devProp.name << std::endl;
|
||||
|
||||
int i;
|
||||
int errors;
|
||||
int i;
|
||||
int errors;
|
||||
|
||||
Matrix = (float*)malloc(NUM * sizeof(float));
|
||||
TransposeMatrix = (float*)malloc(NUM * sizeof(float));
|
||||
cpuTransposeMatrix = (float*)malloc(NUM * sizeof(float));
|
||||
Matrix = (float*)malloc(NUM * sizeof(float));
|
||||
TransposeMatrix = (float*)malloc(NUM * sizeof(float));
|
||||
cpuTransposeMatrix = (float*)malloc(NUM * sizeof(float));
|
||||
|
||||
// initialize the input data
|
||||
for (i = 0; i < NUM; i++) {
|
||||
Matrix[i] = (float)i * 10.0f;
|
||||
}
|
||||
// initialize the input data
|
||||
for (i = 0; i < NUM; i++) {
|
||||
Matrix[i] = (float)i * 10.0f;
|
||||
}
|
||||
|
||||
// allocate the memory on the device side
|
||||
hipMalloc((void**)&gpuMatrix, NUM * sizeof(float));
|
||||
hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float));
|
||||
// allocate the memory on the device side
|
||||
hipMalloc((void**)&gpuMatrix, NUM * sizeof(float));
|
||||
hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float));
|
||||
|
||||
uint32_t iterations = 100;
|
||||
while (iterations-- > 0) {
|
||||
uint32_t iterations = 100;
|
||||
while (iterations-- > 0) {
|
||||
std::cout << "## Iteration (" << iterations << ") #################" << std::endl;
|
||||
|
||||
// Memory transfer from host to device
|
||||
@@ -98,9 +97,9 @@ int main() {
|
||||
int rangeId = roctxRangeStart("hipLaunchKernel range");
|
||||
roctxRangePush("hipLaunchKernel");
|
||||
// Lauching kernel from host
|
||||
hipLaunchKernelGGL(matrixTranspose, dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y),
|
||||
dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, gpuTransposeMatrix,
|
||||
gpuMatrix, WIDTH);
|
||||
hipLaunchKernelGGL(
|
||||
matrixTranspose, dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y),
|
||||
dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, gpuTransposeMatrix, gpuMatrix, WIDTH);
|
||||
roctracer_mark("after HIP LaunchKernel");
|
||||
roctxMark("after hipLaunchKernel");
|
||||
|
||||
@@ -109,8 +108,8 @@ int main() {
|
||||
|
||||
hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost);
|
||||
|
||||
roctxRangePop(); // for "hipMemcpy"
|
||||
roctxRangePop(); // for "hipLaunchKernel"
|
||||
roctxRangePop(); // for "hipMemcpy"
|
||||
roctxRangePop(); // for "hipLaunchKernel"
|
||||
roctxRangeStop(rangeId);
|
||||
|
||||
// CPU MatrixTranspose computation
|
||||
@@ -120,26 +119,25 @@ int main() {
|
||||
errors = 0;
|
||||
double eps = 1.0E-6;
|
||||
for (i = 0; i < NUM; i++) {
|
||||
if (std::abs(TransposeMatrix[i] - cpuTransposeMatrix[i]) > eps) {
|
||||
errors++;
|
||||
}
|
||||
if (std::abs(TransposeMatrix[i] - cpuTransposeMatrix[i]) > eps) {
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
if (errors != 0) {
|
||||
printf("FAILED: %d errors\n", errors);
|
||||
printf("FAILED: %d errors\n", errors);
|
||||
} else {
|
||||
printf("PASSED!\n");
|
||||
printf("PASSED!\n");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
// free the resources on device side
|
||||
hipFree(gpuMatrix);
|
||||
hipFree(gpuTransposeMatrix);
|
||||
|
||||
// free the resources on device side
|
||||
hipFree(gpuMatrix);
|
||||
hipFree(gpuTransposeMatrix);
|
||||
// free the resources on host side
|
||||
free(Matrix);
|
||||
free(TransposeMatrix);
|
||||
free(cpuTransposeMatrix);
|
||||
|
||||
// free the resources on host side
|
||||
free(Matrix);
|
||||
free(TransposeMatrix);
|
||||
free(cpuTransposeMatrix);
|
||||
|
||||
return errors;
|
||||
return errors;
|
||||
}
|
||||
|
||||
@@ -45,7 +45,7 @@ static char* message = NULL;
|
||||
#endif
|
||||
void SPRINT(const char* fmt, ...) {
|
||||
if (msg_buf == NULL) {
|
||||
msg_buf = (char*) calloc(msg_size, 1);
|
||||
msg_buf = (char*)calloc(msg_size, 1);
|
||||
message = msg_buf;
|
||||
}
|
||||
|
||||
@@ -66,13 +66,18 @@ void SFLUSH() {
|
||||
// hip header file
|
||||
#include <hip/hip_runtime.h>
|
||||
// Macro to call HIP API
|
||||
#define HIP_CALL(call) do { call; } while(0)
|
||||
#define HIP_CALL(call) \
|
||||
do { \
|
||||
call; \
|
||||
} while (0)
|
||||
#else
|
||||
#define HIP_CALL(call) do {} while(0)
|
||||
#define HIP_CALL(call) \
|
||||
do { \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
#ifndef ITERATIONS
|
||||
# define ITERATIONS 101
|
||||
#define ITERATIONS 101
|
||||
#endif
|
||||
#define WIDTH 1024
|
||||
#define NUM (WIDTH * WIDTH)
|
||||
@@ -83,20 +88,20 @@ void SFLUSH() {
|
||||
#if HIP_TEST
|
||||
// Device (Kernel) function, it must be void
|
||||
__global__ void matrixTranspose(float* out, float* in, const int width) {
|
||||
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
|
||||
int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
|
||||
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
|
||||
int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
|
||||
|
||||
out[y * width + x] = in[x * width + y];
|
||||
out[y * width + x] = in[x * width + y];
|
||||
}
|
||||
#endif
|
||||
|
||||
// CPU implementation of matrix transpose
|
||||
void matrixTransposeCPUReference(float* output, float* input, const unsigned int width) {
|
||||
for (unsigned int j = 0; j < width; j++) {
|
||||
for (unsigned int i = 0; i < width; i++) {
|
||||
output[i * width + j] = input[j * width + i];
|
||||
}
|
||||
for (unsigned int j = 0; j < width; j++) {
|
||||
for (unsigned int i = 0; i < width; i++) {
|
||||
output[i * width + j] = input[j * width + i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int iterations = ITERATIONS;
|
||||
@@ -105,28 +110,28 @@ void start_tracing();
|
||||
void stop_tracing();
|
||||
|
||||
int main() {
|
||||
float* Matrix;
|
||||
float* TransposeMatrix;
|
||||
float* cpuTransposeMatrix;
|
||||
float* Matrix;
|
||||
float* TransposeMatrix;
|
||||
float* cpuTransposeMatrix;
|
||||
|
||||
float* gpuMatrix;
|
||||
float* gpuTransposeMatrix;
|
||||
float* gpuMatrix;
|
||||
float* gpuTransposeMatrix;
|
||||
|
||||
int i;
|
||||
int errors;
|
||||
int i;
|
||||
int errors;
|
||||
|
||||
init_tracing();
|
||||
init_tracing();
|
||||
|
||||
#if HIP_TEST
|
||||
int gpuCount = 1;
|
||||
int gpuCount = 1;
|
||||
#if MGPU_TEST
|
||||
hipGetDeviceCount(&gpuCount);
|
||||
printf("Number of GPUs: %d\n", gpuCount);
|
||||
hipGetDeviceCount(&gpuCount);
|
||||
printf("Number of GPUs: %d\n", gpuCount);
|
||||
#endif
|
||||
iterations *= gpuCount;
|
||||
iterations *= gpuCount;
|
||||
#endif
|
||||
|
||||
while (iterations-- > 0) {
|
||||
while (iterations-- > 0) {
|
||||
start_tracing();
|
||||
|
||||
#if HIP_TEST
|
||||
@@ -145,7 +150,7 @@ int main() {
|
||||
|
||||
// initialize the input data
|
||||
for (i = 0; i < NUM; i++) {
|
||||
Matrix[i] = (float)i * 10.0f;
|
||||
Matrix[i] = (float)i * 10.0f;
|
||||
}
|
||||
|
||||
// allocate the memory on the device side
|
||||
@@ -167,9 +172,10 @@ int main() {
|
||||
roctxRangePush("hipLaunchKernel");
|
||||
|
||||
// Lauching kernel from host
|
||||
HIP_CALL(hipLaunchKernelGGL(matrixTranspose, dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y),
|
||||
dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, gpuTransposeMatrix,
|
||||
gpuMatrix, WIDTH));
|
||||
HIP_CALL(hipLaunchKernelGGL(matrixTranspose,
|
||||
dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y),
|
||||
dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0,
|
||||
gpuTransposeMatrix, gpuMatrix, WIDTH));
|
||||
|
||||
roctxMark("after hipLaunchKernel");
|
||||
|
||||
@@ -179,10 +185,11 @@ int main() {
|
||||
// Memory transfer from device to host
|
||||
roctxRangePush("hipMemcpy");
|
||||
|
||||
HIP_CALL(hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost));
|
||||
HIP_CALL(
|
||||
hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost));
|
||||
|
||||
roctxRangePop(); // for "hipMemcpy"
|
||||
roctxRangePop(); // for "hipLaunchKernel"
|
||||
roctxRangePop(); // for "hipMemcpy"
|
||||
roctxRangePop(); // for "hipLaunchKernel"
|
||||
|
||||
// correlation reagion end
|
||||
roctracer_activity_pop_external_correlation_id(NULL);
|
||||
@@ -194,15 +201,15 @@ int main() {
|
||||
errors = 0;
|
||||
double eps = 1.0E-6;
|
||||
for (i = 0; i < NUM; i++) {
|
||||
if (abs(TransposeMatrix[i] - cpuTransposeMatrix[i]) > eps) {
|
||||
errors++;
|
||||
}
|
||||
if (abs(TransposeMatrix[i] - cpuTransposeMatrix[i]) > eps) {
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
if ((HIP_TEST != 0) && (errors != 0)) {
|
||||
printf("FAILED: %d errors\n", errors);
|
||||
printf("FAILED: %d errors\n", errors);
|
||||
} else {
|
||||
errors = 0;
|
||||
printf("PASSED!\n");
|
||||
errors = 0;
|
||||
printf("PASSED!\n");
|
||||
}
|
||||
|
||||
// free the resources on device side
|
||||
@@ -218,11 +225,11 @@ int main() {
|
||||
free(Matrix);
|
||||
free(TransposeMatrix);
|
||||
free(cpuTransposeMatrix);
|
||||
}
|
||||
}
|
||||
|
||||
stop_tracing();
|
||||
stop_tracing();
|
||||
|
||||
return errors;
|
||||
return errors;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@@ -234,15 +241,15 @@ int main() {
|
||||
#include <roctracer_hsa.h>
|
||||
#include <roctracer_roctx.h>
|
||||
|
||||
#include <unistd.h>
|
||||
#include <sys/syscall.h> /* For SYS_xxx definitions */
|
||||
#include <unistd.h>
|
||||
#include <sys/syscall.h> /* For SYS_xxx definitions */
|
||||
|
||||
// Macro to check ROC-tracer calls status
|
||||
#define ROCTRACER_CALL(call) \
|
||||
do { \
|
||||
int err = call; \
|
||||
if (err != 0) { \
|
||||
fprintf(stderr, "%s\n", roctracer_error_string()); \
|
||||
fprintf(stderr, "%s\n", roctracer_error_string()); \
|
||||
abort(); \
|
||||
} \
|
||||
} while (0)
|
||||
@@ -252,12 +259,7 @@ static inline uint32_t GetPid() { return syscall(__NR_getpid); }
|
||||
|
||||
|
||||
// Runtime API callback function
|
||||
void api_callback(
|
||||
uint32_t domain,
|
||||
uint32_t cid,
|
||||
const void* callback_data,
|
||||
void* arg)
|
||||
{
|
||||
void api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg) {
|
||||
(void)arg;
|
||||
|
||||
if (domain == ACTIVITY_DOMAIN_ROCTX) {
|
||||
@@ -267,31 +269,25 @@ void api_callback(
|
||||
}
|
||||
const hip_api_data_t* data = (const hip_api_data_t*)(callback_data);
|
||||
SPRINT("<%s id(%u)\tcorrelation_id(%lu) %s pid(%d) tid(%d)> ",
|
||||
roctracer_op_string(ACTIVITY_DOMAIN_HIP_API, cid, 0),
|
||||
cid,
|
||||
data->correlation_id,
|
||||
(data->phase == ACTIVITY_API_PHASE_ENTER) ? "on-enter" : "on-exit", GetPid(), GetTid());
|
||||
roctracer_op_string(ACTIVITY_DOMAIN_HIP_API, cid, 0), cid, data->correlation_id,
|
||||
(data->phase == ACTIVITY_API_PHASE_ENTER) ? "on-enter" : "on-exit", GetPid(), GetTid());
|
||||
if (data->phase == ACTIVITY_API_PHASE_ENTER) {
|
||||
switch (cid) {
|
||||
case HIP_API_ID_hipMemcpy:
|
||||
SPRINT("dst(%p) src(%p) size(0x%x) kind(%u)",
|
||||
data->args.hipMemcpy.dst,
|
||||
data->args.hipMemcpy.src,
|
||||
(uint32_t)(data->args.hipMemcpy.sizeBytes),
|
||||
(uint32_t)(data->args.hipMemcpy.kind));
|
||||
SPRINT("dst(%p) src(%p) size(0x%x) kind(%u)", data->args.hipMemcpy.dst,
|
||||
data->args.hipMemcpy.src, (uint32_t)(data->args.hipMemcpy.sizeBytes),
|
||||
(uint32_t)(data->args.hipMemcpy.kind));
|
||||
break;
|
||||
case HIP_API_ID_hipMalloc:
|
||||
SPRINT("ptr(%p) size(0x%x)",
|
||||
data->args.hipMalloc.ptr,
|
||||
(uint32_t)(data->args.hipMalloc.size));
|
||||
SPRINT("ptr(%p) size(0x%x)", data->args.hipMalloc.ptr,
|
||||
(uint32_t)(data->args.hipMalloc.size));
|
||||
break;
|
||||
case HIP_API_ID_hipFree:
|
||||
SPRINT("ptr(%p)", data->args.hipFree.ptr);
|
||||
break;
|
||||
case HIP_API_ID_hipModuleLaunchKernel:
|
||||
SPRINT("kernel(\"%s\") stream(%p)",
|
||||
hipKernelNameRef(data->args.hipModuleLaunchKernel.f),
|
||||
data->args.hipModuleLaunchKernel.stream);
|
||||
SPRINT("kernel(\"%s\") stream(%p)", hipKernelNameRef(data->args.hipModuleLaunchKernel.f),
|
||||
data->args.hipModuleLaunchKernel.stream);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
@@ -316,26 +312,17 @@ void activity_callback(const char* begin, const char* end, void* arg) {
|
||||
|
||||
SPRINT("\tActivity records:\n");
|
||||
while (record < end_record) {
|
||||
const char * name = roctracer_op_string(record->domain, record->op, record->kind);
|
||||
SPRINT("\t%s\tcorrelation_id(%lu) time_ns(%lu:%lu)",
|
||||
name,
|
||||
record->correlation_id,
|
||||
record->begin_ns,
|
||||
record->end_ns);
|
||||
const char* name = roctracer_op_string(record->domain, record->op, record->kind);
|
||||
SPRINT("\t%s\tcorrelation_id(%lu) time_ns(%lu:%lu)", name, record->correlation_id,
|
||||
record->begin_ns, record->end_ns);
|
||||
if (record->domain == ACTIVITY_DOMAIN_HIP_API) {
|
||||
SPRINT(" process_id(%u) thread_id(%u)",
|
||||
record->process_id,
|
||||
record->thread_id);
|
||||
SPRINT(" process_id(%u) thread_id(%u)", record->process_id, record->thread_id);
|
||||
} else if (record->domain == ACTIVITY_DOMAIN_HCC_OPS) {
|
||||
SPRINT(" device_id(%d) queue_id(%lu)",
|
||||
record->device_id,
|
||||
record->queue_id);
|
||||
SPRINT(" device_id(%d) queue_id(%lu)", record->device_id, record->queue_id);
|
||||
if (record->op == HIP_OP_ID_COPY) SPRINT(" bytes(0x%zx)", record->bytes);
|
||||
} else if (record->domain == ACTIVITY_DOMAIN_HSA_OPS) {
|
||||
SPRINT(" se(%u) cycle(%lu) pc(%lx)",
|
||||
record->pc_sample.se,
|
||||
record->pc_sample.cycle,
|
||||
record->pc_sample.pc);
|
||||
SPRINT(" se(%u) cycle(%lu) pc(%lx)", record->pc_sample.se, record->pc_sample.cycle,
|
||||
record->pc_sample.pc);
|
||||
} else if (record->domain == ACTIVITY_DOMAIN_EXT_API) {
|
||||
SPRINT(" external_id(%lu)", record->external_id);
|
||||
} else {
|
||||
@@ -377,8 +364,10 @@ void init_tracing() {
|
||||
void start_tracing() {
|
||||
printf("# START (%d) #############################\n", iterations);
|
||||
// Start
|
||||
if ((iterations & 1) == 1) roctracer_start();
|
||||
else roctracer_stop();
|
||||
if ((iterations & 1) == 1)
|
||||
roctracer_start();
|
||||
else
|
||||
roctracer_stop();
|
||||
}
|
||||
|
||||
// Stop tracing routine
|
||||
|
||||
@@ -43,19 +43,18 @@ void check_status(roctracer_status_t status) {
|
||||
void codeobj_callback(uint32_t domain, uint32_t cid, const void* data, void* arg) {
|
||||
const hsa_evt_data_t* evt_data = reinterpret_cast<const hsa_evt_data_t*>(data);
|
||||
const char* uri = evt_data->codeobj.uri;
|
||||
printf("codeobj_callback domain(%u) cid(%u): load_base(0x%lx) load_size(0x%lx) load_delta(0x%lx) uri(\"%s\")\n",
|
||||
domain,
|
||||
cid,
|
||||
evt_data->codeobj.load_base,
|
||||
evt_data->codeobj.load_size,
|
||||
evt_data->codeobj.load_delta,
|
||||
uri);
|
||||
printf(
|
||||
"codeobj_callback domain(%u) cid(%u): load_base(0x%lx) load_size(0x%lx) load_delta(0x%lx) "
|
||||
"uri(\"%s\")\n",
|
||||
domain, cid, evt_data->codeobj.load_base, evt_data->codeobj.load_size,
|
||||
evt_data->codeobj.load_delta, uri);
|
||||
free((void*)uri);
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
void initialize() {
|
||||
roctracer_status_t status = roctracer_enable_op_callback(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_CODEOBJ, codeobj_callback, NULL);
|
||||
roctracer_status_t status = roctracer_enable_op_callback(
|
||||
ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_CODEOBJ, codeobj_callback, NULL);
|
||||
check_status(status);
|
||||
}
|
||||
|
||||
@@ -79,9 +78,8 @@ extern "C" PUBLIC_API void OnUnloadTool() {
|
||||
}
|
||||
|
||||
extern "C" CONSTRUCTOR_API void constructor() {
|
||||
printf("constructor\n"); fflush(stdout);
|
||||
printf("constructor\n");
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
extern "C" DESTRUCTOR_API void destructor() {
|
||||
OnUnloadTool();
|
||||
}
|
||||
extern "C" DESTRUCTOR_API void destructor() { OnUnloadTool(); }
|
||||
|
||||
+44
-57
@@ -28,25 +28,24 @@
|
||||
#define CONSTRUCTOR_API __attribute__((constructor))
|
||||
#define DESTRUCTOR_API __attribute__((destructor))
|
||||
|
||||
#define HSA_RT(call) \
|
||||
do { \
|
||||
const hsa_status_t status = call; \
|
||||
if (status != HSA_STATUS_SUCCESS) { \
|
||||
printf("error \"%s\"\n", #call); fflush(stdout); \
|
||||
abort(); \
|
||||
} \
|
||||
} while(0)
|
||||
#define HSA_RT(call) \
|
||||
do { \
|
||||
const hsa_status_t status = call; \
|
||||
if (status != HSA_STATUS_SUCCESS) { \
|
||||
printf("error \"%s\"\n", #call); \
|
||||
fflush(stdout); \
|
||||
abort(); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
// HSA API intercepting primitives
|
||||
decltype(hsa_executable_freeze)* hsa_executable_freeze_fn;
|
||||
hsa_ven_amd_loader_1_01_pfn_t loader_api_table{};
|
||||
|
||||
hsa_status_t code_object_callback(
|
||||
hsa_executable_t executable,
|
||||
hsa_loaded_code_object_t loaded_code_object,
|
||||
void* arg)
|
||||
{
|
||||
printf("code_object_callback\n"); fflush(stdout);
|
||||
hsa_status_t code_object_callback(hsa_executable_t executable,
|
||||
hsa_loaded_code_object_t loaded_code_object, void* arg) {
|
||||
printf("code_object_callback\n");
|
||||
fflush(stdout);
|
||||
|
||||
uint64_t load_base = 0;
|
||||
uint64_t load_size = 0;
|
||||
@@ -55,21 +54,13 @@ hsa_status_t code_object_callback(
|
||||
char* uri_str = NULL;
|
||||
|
||||
HSA_RT(loader_api_table.hsa_ven_amd_loader_loaded_code_object_get_info(
|
||||
loaded_code_object,
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_BASE,
|
||||
&load_base));
|
||||
loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_BASE, &load_base));
|
||||
HSA_RT(loader_api_table.hsa_ven_amd_loader_loaded_code_object_get_info(
|
||||
loaded_code_object,
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_SIZE,
|
||||
&load_size));
|
||||
loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_SIZE, &load_size));
|
||||
HSA_RT(loader_api_table.hsa_ven_amd_loader_loaded_code_object_get_info(
|
||||
loaded_code_object,
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_DELTA,
|
||||
&load_delta));
|
||||
loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_DELTA, &load_delta));
|
||||
HSA_RT(loader_api_table.hsa_ven_amd_loader_loaded_code_object_get_info(
|
||||
loaded_code_object,
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI_LENGTH,
|
||||
&uri_len));
|
||||
loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI_LENGTH, &uri_len));
|
||||
|
||||
uri_str = (char*)calloc(uri_len + 1, sizeof(char));
|
||||
if (!uri_str) {
|
||||
@@ -78,63 +69,59 @@ hsa_status_t code_object_callback(
|
||||
}
|
||||
|
||||
HSA_RT(loader_api_table.hsa_ven_amd_loader_loaded_code_object_get_info(
|
||||
loaded_code_object,
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI,
|
||||
uri_str));
|
||||
loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI, uri_str));
|
||||
|
||||
printf("load_base(0x%lx)\n", load_base); fflush(stdout);
|
||||
printf("load_size(0x%lx)\n", load_size); fflush(stdout);
|
||||
printf("load_delta(0x%lx)\n", load_delta); fflush(stdout);
|
||||
printf("uri_len(%u)\n", uri_len); fflush(stdout);
|
||||
printf("uri_str(\"%s\")\n", uri_str); fflush(stdout);
|
||||
printf("load_base(0x%lx)\n", load_base);
|
||||
fflush(stdout);
|
||||
printf("load_size(0x%lx)\n", load_size);
|
||||
fflush(stdout);
|
||||
printf("load_delta(0x%lx)\n", load_delta);
|
||||
fflush(stdout);
|
||||
printf("uri_len(%u)\n", uri_len);
|
||||
fflush(stdout);
|
||||
printf("uri_str(\"%s\")\n", uri_str);
|
||||
fflush(stdout);
|
||||
|
||||
free(uri_str);
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t hsa_executable_freeze_interceptor(
|
||||
hsa_executable_t executable,
|
||||
const char *options)
|
||||
{
|
||||
hsa_status_t hsa_executable_freeze_interceptor(hsa_executable_t executable, const char* options) {
|
||||
HSA_RT(loader_api_table.hsa_ven_amd_loader_executable_iterate_loaded_code_objects(
|
||||
executable,
|
||||
code_object_callback,
|
||||
NULL));
|
||||
HSA_RT(hsa_executable_freeze_fn(
|
||||
executable,
|
||||
options));
|
||||
executable, code_object_callback, NULL));
|
||||
HSA_RT(hsa_executable_freeze_fn(executable, options));
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
// HSA-runtime tool on-load method
|
||||
extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table,
|
||||
uint64_t runtime_version,
|
||||
extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
|
||||
uint64_t failed_tool_count,
|
||||
const char* const* failed_tool_names)
|
||||
{
|
||||
printf("OnLoad: begin\n"); fflush(stdout);
|
||||
const char* const* failed_tool_names) {
|
||||
printf("OnLoad: begin\n");
|
||||
fflush(stdout);
|
||||
// intercepting hsa_executable_freeze API
|
||||
hsa_executable_freeze_fn = table->core_->hsa_executable_freeze_fn;
|
||||
table->core_->hsa_executable_freeze_fn = hsa_executable_freeze_interceptor;
|
||||
// Fetching AMD Loader HSA extension API
|
||||
HSA_RT(hsa_system_get_major_extension_table(
|
||||
HSA_EXTENSION_AMD_LOADER,
|
||||
1,
|
||||
sizeof(hsa_ven_amd_loader_1_01_pfn_t),
|
||||
&loader_api_table));
|
||||
printf("OnLoad: end\n"); fflush(stdout);
|
||||
HSA_EXTENSION_AMD_LOADER, 1, sizeof(hsa_ven_amd_loader_1_01_pfn_t), &loader_api_table));
|
||||
printf("OnLoad: end\n");
|
||||
fflush(stdout);
|
||||
return true;
|
||||
}
|
||||
|
||||
extern "C" PUBLIC_API void OnUnload() {
|
||||
printf("OnUnload\n"); fflush(stdout);
|
||||
printf("OnUnload\n");
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
extern "C" CONSTRUCTOR_API void constructor() {
|
||||
printf("constructor\n"); fflush(stdout);
|
||||
printf("constructor\n");
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
extern "C" DESTRUCTOR_API void destructor() {
|
||||
printf("destructor\n"); fflush(stdout);
|
||||
printf("destructor\n");
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
@@ -67,13 +67,15 @@ static hsa_status_t FindGlobalPool(hsa_amd_memory_pool_t pool, void* data, bool
|
||||
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
err = HsaRsrcFactory::HsaApi()->hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment);
|
||||
err = HsaRsrcFactory::HsaApi()->hsa_amd_memory_pool_get_info(
|
||||
pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment);
|
||||
CHECK_STATUS("hsa_amd_memory_pool_get_info", err);
|
||||
if (HSA_AMD_SEGMENT_GLOBAL != segment) {
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
err = HsaRsrcFactory::HsaApi()->hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flag);
|
||||
err = HsaRsrcFactory::HsaApi()->hsa_amd_memory_pool_get_info(
|
||||
pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flag);
|
||||
CHECK_STATUS("hsa_amd_memory_pool_get_info", err);
|
||||
|
||||
uint32_t karg_st = flag & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT;
|
||||
@@ -126,19 +128,22 @@ HsaRsrcFactory::HsaRsrcFactory(bool initialize_hsa) : initialize_hsa_(initialize
|
||||
#ifdef ROCP_LD_AQLPROFILE
|
||||
status = LoadAqlProfileLib(&aqlprofile_api_);
|
||||
#else
|
||||
status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_AQLPROFILE, hsa_ven_amd_aqlprofile_VERSION_MAJOR, sizeof(aqlprofile_api_), &aqlprofile_api_);
|
||||
status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_AQLPROFILE,
|
||||
hsa_ven_amd_aqlprofile_VERSION_MAJOR,
|
||||
sizeof(aqlprofile_api_), &aqlprofile_api_);
|
||||
#endif
|
||||
CHECK_STATUS("aqlprofile API table load failed", status);
|
||||
|
||||
// Get Loader API table
|
||||
loader_api_ = {0};
|
||||
status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_LOADER, 1, sizeof(loader_api_), &loader_api_);
|
||||
status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_LOADER, 1,
|
||||
sizeof(loader_api_), &loader_api_);
|
||||
CHECK_STATUS("loader API table query failed", status);
|
||||
|
||||
// Instantiate HSA timer
|
||||
timer_ = new HsaTimer(&hsa_api_);
|
||||
CHECK_STATUS("HSA timer allocation failed",
|
||||
(timer_ == NULL) ? HSA_STATUS_ERROR : HSA_STATUS_SUCCESS);
|
||||
(timer_ == NULL) ? HSA_STATUS_ERROR : HSA_STATUS_SUCCESS);
|
||||
|
||||
// Time correlation
|
||||
const uint32_t corr_iters = 1000;
|
||||
@@ -146,7 +151,8 @@ HsaRsrcFactory::HsaRsrcFactory(bool initialize_hsa) : initialize_hsa_(initialize
|
||||
CorrelateTime(HsaTimer::TIME_ID_CLOCK_MONOTONIC, corr_iters);
|
||||
|
||||
// System timeout
|
||||
timeout_ = (timeout_ns_ == HsaTimer::TIMESTAMP_MAX) ? timeout_ns_ : timer_->ns_to_sysclock(timeout_ns_);
|
||||
timeout_ =
|
||||
(timeout_ns_ == HsaTimer::TIMESTAMP_MAX) ? timeout_ns_ : timer_->ns_to_sysclock(timeout_ns_);
|
||||
}
|
||||
|
||||
// Destructor of the class
|
||||
@@ -172,9 +178,12 @@ void HsaRsrcFactory::InitHsaApiTable(HsaApiTable* table) {
|
||||
|
||||
hsa_api_.hsa_queue_create = table->core_->hsa_queue_create_fn;
|
||||
hsa_api_.hsa_queue_destroy = table->core_->hsa_queue_destroy_fn;
|
||||
hsa_api_.hsa_queue_load_write_index_relaxed = table->core_->hsa_queue_load_write_index_relaxed_fn;
|
||||
hsa_api_.hsa_queue_store_write_index_relaxed = table->core_->hsa_queue_store_write_index_relaxed_fn;
|
||||
hsa_api_.hsa_queue_load_read_index_relaxed = table->core_->hsa_queue_load_read_index_relaxed_fn;
|
||||
hsa_api_.hsa_queue_load_write_index_relaxed =
|
||||
table->core_->hsa_queue_load_write_index_relaxed_fn;
|
||||
hsa_api_.hsa_queue_store_write_index_relaxed =
|
||||
table->core_->hsa_queue_store_write_index_relaxed_fn;
|
||||
hsa_api_.hsa_queue_load_read_index_relaxed =
|
||||
table->core_->hsa_queue_load_read_index_relaxed_fn;
|
||||
|
||||
hsa_api_.hsa_signal_create = table->core_->hsa_signal_create_fn;
|
||||
hsa_api_.hsa_signal_destroy = table->core_->hsa_signal_destroy_fn;
|
||||
@@ -183,27 +192,34 @@ void HsaRsrcFactory::InitHsaApiTable(HsaApiTable* table) {
|
||||
hsa_api_.hsa_signal_wait_scacquire = table->core_->hsa_signal_wait_scacquire_fn;
|
||||
hsa_api_.hsa_signal_store_screlease = table->core_->hsa_signal_store_screlease_fn;
|
||||
|
||||
hsa_api_.hsa_code_object_reader_create_from_file = table->core_->hsa_code_object_reader_create_from_file_fn;
|
||||
hsa_api_.hsa_code_object_reader_create_from_file =
|
||||
table->core_->hsa_code_object_reader_create_from_file_fn;
|
||||
hsa_api_.hsa_executable_create_alt = table->core_->hsa_executable_create_alt_fn;
|
||||
hsa_api_.hsa_executable_load_agent_code_object = table->core_->hsa_executable_load_agent_code_object_fn;
|
||||
hsa_api_.hsa_executable_load_agent_code_object =
|
||||
table->core_->hsa_executable_load_agent_code_object_fn;
|
||||
hsa_api_.hsa_executable_freeze = table->core_->hsa_executable_freeze_fn;
|
||||
hsa_api_.hsa_executable_get_symbol = table->core_->hsa_executable_get_symbol_fn;
|
||||
hsa_api_.hsa_executable_symbol_get_info = table->core_->hsa_executable_symbol_get_info_fn;
|
||||
hsa_api_.hsa_executable_iterate_symbols = table->core_->hsa_executable_iterate_symbols_fn;
|
||||
|
||||
hsa_api_.hsa_system_get_info = table->core_->hsa_system_get_info_fn;
|
||||
hsa_api_.hsa_system_get_major_extension_table = table->core_->hsa_system_get_major_extension_table_fn;
|
||||
hsa_api_.hsa_system_get_major_extension_table =
|
||||
table->core_->hsa_system_get_major_extension_table_fn;
|
||||
|
||||
hsa_api_.hsa_amd_agent_iterate_memory_pools = table->amd_ext_->hsa_amd_agent_iterate_memory_pools_fn;
|
||||
hsa_api_.hsa_amd_agent_iterate_memory_pools =
|
||||
table->amd_ext_->hsa_amd_agent_iterate_memory_pools_fn;
|
||||
hsa_api_.hsa_amd_memory_pool_get_info = table->amd_ext_->hsa_amd_memory_pool_get_info_fn;
|
||||
hsa_api_.hsa_amd_memory_pool_allocate = table->amd_ext_->hsa_amd_memory_pool_allocate_fn;
|
||||
hsa_api_.hsa_amd_agents_allow_access = table->amd_ext_->hsa_amd_agents_allow_access_fn;
|
||||
hsa_api_.hsa_amd_memory_async_copy = table->amd_ext_->hsa_amd_memory_async_copy_fn;
|
||||
|
||||
hsa_api_.hsa_amd_signal_async_handler = table->amd_ext_->hsa_amd_signal_async_handler_fn;
|
||||
hsa_api_.hsa_amd_profiling_set_profiler_enabled = table->amd_ext_->hsa_amd_profiling_set_profiler_enabled_fn;
|
||||
hsa_api_.hsa_amd_profiling_get_async_copy_time = table->amd_ext_->hsa_amd_profiling_get_async_copy_time_fn;
|
||||
hsa_api_.hsa_amd_profiling_get_dispatch_time = table->amd_ext_->hsa_amd_profiling_get_dispatch_time_fn;
|
||||
hsa_api_.hsa_amd_profiling_set_profiler_enabled =
|
||||
table->amd_ext_->hsa_amd_profiling_set_profiler_enabled_fn;
|
||||
hsa_api_.hsa_amd_profiling_get_async_copy_time =
|
||||
table->amd_ext_->hsa_amd_profiling_get_async_copy_time_fn;
|
||||
hsa_api_.hsa_amd_profiling_get_dispatch_time =
|
||||
table->amd_ext_->hsa_amd_profiling_get_dispatch_time_fn;
|
||||
} else {
|
||||
hsa_api_.hsa_init = hsa_init;
|
||||
hsa_api_.hsa_shut_down = hsa_shut_down;
|
||||
@@ -298,10 +314,13 @@ const AgentInfo* HsaRsrcFactory::AddAgentInfo(const hsa_agent_t agent) {
|
||||
agent_info->dev_type = HSA_DEVICE_TYPE_CPU;
|
||||
agent_info->dev_index = cpu_list_.size();
|
||||
|
||||
status = hsa_api_.hsa_amd_agent_iterate_memory_pools(agent, FindStandardPool, &agent_info->cpu_pool);
|
||||
status =
|
||||
hsa_api_.hsa_amd_agent_iterate_memory_pools(agent, FindStandardPool, &agent_info->cpu_pool);
|
||||
if ((status == HSA_STATUS_INFO_BREAK) && (cpu_pool_ == NULL)) cpu_pool_ = &agent_info->cpu_pool;
|
||||
status = hsa_api_.hsa_amd_agent_iterate_memory_pools(agent, FindKernArgPool, &agent_info->kern_arg_pool);
|
||||
if ((status == HSA_STATUS_INFO_BREAK) && (kern_arg_pool_ == NULL)) kern_arg_pool_ = &agent_info->kern_arg_pool;
|
||||
status = hsa_api_.hsa_amd_agent_iterate_memory_pools(agent, FindKernArgPool,
|
||||
&agent_info->kern_arg_pool);
|
||||
if ((status == HSA_STATUS_INFO_BREAK) && (kern_arg_pool_ == NULL))
|
||||
kern_arg_pool_ = &agent_info->kern_arg_pool;
|
||||
agent_info->gpu_pool = {};
|
||||
|
||||
cpu_list_.push_back(agent_info);
|
||||
@@ -319,21 +338,26 @@ const AgentInfo* HsaRsrcFactory::AddAgentInfo(const hsa_agent_t agent) {
|
||||
hsa_api_.hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &agent_info->max_queue_size);
|
||||
hsa_api_.hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &agent_info->profile);
|
||||
agent_info->is_apu = (agent_info->profile == HSA_PROFILE_FULL) ? true : false;
|
||||
hsa_api_.hsa_agent_get_info(agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT),
|
||||
&agent_info->cu_num);
|
||||
hsa_api_.hsa_agent_get_info(agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU),
|
||||
&agent_info->waves_per_cu);
|
||||
hsa_api_.hsa_agent_get_info(agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU),
|
||||
&agent_info->simds_per_cu);
|
||||
hsa_api_.hsa_agent_get_info(agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES),
|
||||
&agent_info->se_num);
|
||||
hsa_api_.hsa_agent_get_info(
|
||||
agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT),
|
||||
&agent_info->cu_num);
|
||||
hsa_api_.hsa_agent_get_info(agent,
|
||||
static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_SHADER_ARRAYS_PER_SE),
|
||||
&agent_info->shader_arrays_per_se);
|
||||
static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU),
|
||||
&agent_info->waves_per_cu);
|
||||
hsa_api_.hsa_agent_get_info(agent,
|
||||
static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU),
|
||||
&agent_info->simds_per_cu);
|
||||
hsa_api_.hsa_agent_get_info(
|
||||
agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES),
|
||||
&agent_info->se_num);
|
||||
hsa_api_.hsa_agent_get_info(
|
||||
agent, static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_NUM_SHADER_ARRAYS_PER_SE),
|
||||
&agent_info->shader_arrays_per_se);
|
||||
|
||||
agent_info->cpu_pool = {};
|
||||
agent_info->kern_arg_pool = {};
|
||||
status = hsa_api_.hsa_amd_agent_iterate_memory_pools(agent, FindStandardPool, &agent_info->gpu_pool);
|
||||
status =
|
||||
hsa_api_.hsa_amd_agent_iterate_memory_pools(agent, FindStandardPool, &agent_info->gpu_pool);
|
||||
CHECK_ITER_STATUS("hsa_amd_agent_iterate_memory_pools(gpu pool)", status);
|
||||
|
||||
// GFX8 and GFX9 SGPR/VGPR block sizes
|
||||
@@ -430,7 +454,7 @@ bool HsaRsrcFactory::CreateQueue(const AgentInfo* agent_info, uint32_t num_pkts,
|
||||
hsa_queue_t** queue) {
|
||||
hsa_status_t status;
|
||||
status = hsa_api_.hsa_queue_create(agent_info->dev_id, num_pkts, HSA_QUEUE_TYPE_MULTI, NULL, NULL,
|
||||
UINT32_MAX, UINT32_MAX, queue);
|
||||
UINT32_MAX, UINT32_MAX, queue);
|
||||
return (status == HSA_STATUS_SUCCESS);
|
||||
}
|
||||
|
||||
@@ -453,7 +477,8 @@ uint8_t* HsaRsrcFactory::AllocateLocalMemory(const AgentInfo* agent_info, size_t
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
uint8_t* buffer = NULL;
|
||||
size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK;
|
||||
status = hsa_api_.hsa_amd_memory_pool_allocate(agent_info->gpu_pool, size, 0, reinterpret_cast<void**>(&buffer));
|
||||
status = hsa_api_.hsa_amd_memory_pool_allocate(agent_info->gpu_pool, size, 0,
|
||||
reinterpret_cast<void**>(&buffer));
|
||||
uint8_t* ptr = (status == HSA_STATUS_SUCCESS) ? buffer : NULL;
|
||||
return ptr;
|
||||
}
|
||||
@@ -468,7 +493,8 @@ uint8_t* HsaRsrcFactory::AllocateKernArgMemory(const AgentInfo* agent_info, size
|
||||
uint8_t* buffer = NULL;
|
||||
if (!cpu_agents_.empty()) {
|
||||
size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK;
|
||||
status = hsa_api_.hsa_amd_memory_pool_allocate(*kern_arg_pool_, size, 0, reinterpret_cast<void**>(&buffer));
|
||||
status = hsa_api_.hsa_amd_memory_pool_allocate(*kern_arg_pool_, size, 0,
|
||||
reinterpret_cast<void**>(&buffer));
|
||||
// Both the CPU and GPU can access the kernel arguments
|
||||
if (status == HSA_STATUS_SUCCESS) {
|
||||
hsa_agent_t ag_list[1] = {agent_info->dev_id};
|
||||
@@ -488,7 +514,8 @@ uint8_t* HsaRsrcFactory::AllocateSysMemory(const AgentInfo* agent_info, size_t s
|
||||
uint8_t* buffer = NULL;
|
||||
size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK;
|
||||
if (!cpu_agents_.empty()) {
|
||||
status = hsa_api_.hsa_amd_memory_pool_allocate(*cpu_pool_, size, 0, reinterpret_cast<void**>(&buffer));
|
||||
status = hsa_api_.hsa_amd_memory_pool_allocate(*cpu_pool_, size, 0,
|
||||
reinterpret_cast<void**>(&buffer));
|
||||
// Both the CPU and GPU can access the memory
|
||||
if (status == HSA_STATUS_SUCCESS) {
|
||||
hsa_agent_t ag_list[1] = {agent_info->dev_id};
|
||||
@@ -513,16 +540,18 @@ uint8_t* HsaRsrcFactory::AllocateCmdMemory(const AgentInfo* agent_info, size_t s
|
||||
}
|
||||
|
||||
// Wait signal
|
||||
hsa_signal_value_t HsaRsrcFactory::SignalWait(const hsa_signal_t& signal, const hsa_signal_value_t& signal_value) const {
|
||||
hsa_signal_value_t HsaRsrcFactory::SignalWait(const hsa_signal_t& signal,
|
||||
const hsa_signal_value_t& signal_value) const {
|
||||
const hsa_signal_value_t exp_value = signal_value - 1;
|
||||
hsa_signal_value_t ret_value = signal_value;
|
||||
while (1) {
|
||||
ret_value =
|
||||
hsa_api_.hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, signal_value, timeout_, HSA_WAIT_STATE_BLOCKED);
|
||||
ret_value = hsa_api_.hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, signal_value,
|
||||
timeout_, HSA_WAIT_STATE_BLOCKED);
|
||||
if (ret_value == exp_value) break;
|
||||
if (ret_value != signal_value) {
|
||||
std::cerr << "Error: HsaRsrcFactory::SignalWait: signal_value(" << signal_value
|
||||
<< "), ret_value(" << ret_value << ")" << std::endl << std::flush;
|
||||
<< "), ret_value(" << ret_value << ")" << std::endl
|
||||
<< std::flush;
|
||||
abort();
|
||||
}
|
||||
}
|
||||
@@ -530,7 +559,8 @@ hsa_signal_value_t HsaRsrcFactory::SignalWait(const hsa_signal_t& signal, const
|
||||
}
|
||||
|
||||
// Wait signal with signal value restore
|
||||
void HsaRsrcFactory::SignalWaitRestore(const hsa_signal_t& signal, const hsa_signal_value_t& signal_value) const {
|
||||
void HsaRsrcFactory::SignalWaitRestore(const hsa_signal_t& signal,
|
||||
const hsa_signal_value_t& signal_value) const {
|
||||
SignalWait(signal, signal_value);
|
||||
hsa_api_.hsa_signal_store_relaxed(const_cast<hsa_signal_t&>(signal), signal_value);
|
||||
}
|
||||
@@ -594,13 +624,13 @@ bool HsaRsrcFactory::LoadAndFinalize(const AgentInfo* agent_info, const char* br
|
||||
}
|
||||
|
||||
// Create executable.
|
||||
status = hsa_api_.hsa_executable_create_alt(HSA_PROFILE_FULL, HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT,
|
||||
NULL, executable);
|
||||
status = hsa_api_.hsa_executable_create_alt(
|
||||
HSA_PROFILE_FULL, HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, NULL, executable);
|
||||
CHECK_STATUS("Error in creating executable object", status);
|
||||
|
||||
// Load code object.
|
||||
status = hsa_api_.hsa_executable_load_agent_code_object(*executable, agent_info->dev_id, code_obj_rdr,
|
||||
NULL, NULL);
|
||||
status = hsa_api_.hsa_executable_load_agent_code_object(*executable, agent_info->dev_id,
|
||||
code_obj_rdr, NULL, NULL);
|
||||
CHECK_STATUS("Error in loading executable object", status);
|
||||
|
||||
// Freeze executable.
|
||||
@@ -610,7 +640,7 @@ bool HsaRsrcFactory::LoadAndFinalize(const AgentInfo* agent_info, const char* br
|
||||
// Get symbol handle.
|
||||
hsa_executable_symbol_t kernelSymbol;
|
||||
status = hsa_api_.hsa_executable_get_symbol(*executable, NULL, kernel_name, agent_info->dev_id, 0,
|
||||
&kernelSymbol);
|
||||
&kernelSymbol);
|
||||
CHECK_STATUS("Error in looking up kernel symbol", status);
|
||||
|
||||
// Update output parameter
|
||||
@@ -654,7 +684,8 @@ uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, const void* packet) {
|
||||
}
|
||||
|
||||
uint32_t slot_idx = (uint32_t)(write_idx % queue->size);
|
||||
uint32_t* queue_slot = reinterpret_cast<uint32_t*>((uintptr_t)(queue->base_address) + (slot_idx * slot_size_b));
|
||||
uint32_t* queue_slot =
|
||||
reinterpret_cast<uint32_t*>((uintptr_t)(queue->base_address) + (slot_idx * slot_size_b));
|
||||
const uint32_t* slot_data = reinterpret_cast<const uint32_t*>(packet);
|
||||
|
||||
// Copy buffered commands into the queue slot.
|
||||
@@ -704,18 +735,22 @@ void HsaRsrcFactory::EnableExecutableTracking(HsaApiTable* table) {
|
||||
table->core_->hsa_executable_freeze_fn = hsa_executable_freeze_interceptor;
|
||||
}
|
||||
|
||||
hsa_status_t HsaRsrcFactory::executable_symbols_cb(hsa_executable_t exec, hsa_executable_symbol_t symbol, void *data) {
|
||||
hsa_status_t HsaRsrcFactory::executable_symbols_cb(hsa_executable_t exec,
|
||||
hsa_executable_symbol_t symbol, void* data) {
|
||||
hsa_symbol_kind_t value = (hsa_symbol_kind_t)0;
|
||||
hsa_status_t status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_TYPE, &value);
|
||||
hsa_status_t status =
|
||||
hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_TYPE, &value);
|
||||
CHECK_STATUS("Error in getting symbol info", status);
|
||||
if (value == HSA_SYMBOL_KIND_KERNEL) {
|
||||
uint64_t addr = 0;
|
||||
uint32_t len = 0;
|
||||
status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &addr);
|
||||
status = hsa_api_.hsa_executable_symbol_get_info(
|
||||
symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &addr);
|
||||
CHECK_STATUS("Error in getting kernel object", status);
|
||||
status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, &len);
|
||||
status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH,
|
||||
&len);
|
||||
CHECK_STATUS("Error in getting name len", status);
|
||||
char *name = new char[len + 1];
|
||||
char* name = new char[len + 1];
|
||||
status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME, name);
|
||||
CHECK_STATUS("Error in getting kernel name", status);
|
||||
name[len] = 0;
|
||||
@@ -728,12 +763,15 @@ hsa_status_t HsaRsrcFactory::executable_symbols_cb(hsa_executable_t exec, hsa_ex
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
hsa_status_t HsaRsrcFactory::hsa_executable_freeze_interceptor(hsa_executable_t executable, const char *options) {
|
||||
hsa_status_t HsaRsrcFactory::hsa_executable_freeze_interceptor(hsa_executable_t executable,
|
||||
const char* options) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
if (symbols_map_ == NULL) symbols_map_ = new symbols_map_t;
|
||||
hsa_status_t status = hsa_api_.hsa_executable_iterate_symbols(executable, executable_symbols_cb, NULL);
|
||||
hsa_status_t status =
|
||||
hsa_api_.hsa_executable_iterate_symbols(executable, executable_symbols_cb, NULL);
|
||||
CHECK_STATUS("Error in iterating executable symbols", status);
|
||||
return hsa_api_.hsa_executable_freeze(executable, options);;
|
||||
return hsa_api_.hsa_executable_freeze(executable, options);
|
||||
;
|
||||
}
|
||||
|
||||
void HsaRsrcFactory::DumpHandles(FILE* file) {
|
||||
@@ -741,10 +779,14 @@ void HsaRsrcFactory::DumpHandles(FILE* file) {
|
||||
auto end = agent_map_.end();
|
||||
for (auto it = beg; it != end; ++it) {
|
||||
const AgentInfo* agent_info = it->second;
|
||||
fprintf(file, "0x%lx agent %s\n", agent_info->dev_id.handle, (agent_info->dev_type == HSA_DEVICE_TYPE_CPU) ? "cpu" : "gpu");
|
||||
if (agent_info->cpu_pool.handle != 0) fprintf(file, "0x%lx pool cpu\n", agent_info->cpu_pool.handle);
|
||||
if (agent_info->kern_arg_pool.handle != 0) fprintf(file, "0x%lx pool cpu kernarg\n", agent_info->kern_arg_pool.handle);
|
||||
if (agent_info->gpu_pool.handle != 0) fprintf(file, "0x%lx pool gpu\n", agent_info->gpu_pool.handle);
|
||||
fprintf(file, "0x%lx agent %s\n", agent_info->dev_id.handle,
|
||||
(agent_info->dev_type == HSA_DEVICE_TYPE_CPU) ? "cpu" : "gpu");
|
||||
if (agent_info->cpu_pool.handle != 0)
|
||||
fprintf(file, "0x%lx pool cpu\n", agent_info->cpu_pool.handle);
|
||||
if (agent_info->kern_arg_pool.handle != 0)
|
||||
fprintf(file, "0x%lx pool cpu kernarg\n", agent_info->kern_arg_pool.handle);
|
||||
if (agent_info->gpu_pool.handle != 0)
|
||||
fprintf(file, "0x%lx pool gpu\n", agent_info->gpu_pool.handle);
|
||||
}
|
||||
fflush(file);
|
||||
}
|
||||
|
||||
@@ -44,23 +44,25 @@
|
||||
#define HSA_QUEUE_ALIGN_BYTES 64
|
||||
#define HSA_PACKET_ALIGN_BYTES 64
|
||||
|
||||
#define CHECK_STATUS(msg, status) do { \
|
||||
if ((status) != HSA_STATUS_SUCCESS) { \
|
||||
const char* emsg = 0; \
|
||||
hsa_status_string(status, &emsg); \
|
||||
printf("%s: %s\n", msg, emsg ? emsg : "<unknown error>"); \
|
||||
abort(); \
|
||||
} \
|
||||
} while (0)
|
||||
#define CHECK_STATUS(msg, status) \
|
||||
do { \
|
||||
if ((status) != HSA_STATUS_SUCCESS) { \
|
||||
const char* emsg = 0; \
|
||||
hsa_status_string(status, &emsg); \
|
||||
printf("%s: %s\n", msg, emsg ? emsg : "<unknown error>"); \
|
||||
abort(); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define CHECK_ITER_STATUS(msg, status) do { \
|
||||
if ((status) != HSA_STATUS_INFO_BREAK) { \
|
||||
const char* emsg = 0; \
|
||||
hsa_status_string(status, &emsg); \
|
||||
printf("%s: %s\n", msg, emsg ? emsg : "<unknown error>"); \
|
||||
abort(); \
|
||||
} \
|
||||
} while (0)
|
||||
#define CHECK_ITER_STATUS(msg, status) \
|
||||
do { \
|
||||
if ((status) != HSA_STATUS_INFO_BREAK) { \
|
||||
const char* emsg = 0; \
|
||||
hsa_status_string(status, &emsg); \
|
||||
printf("%s: %s\n", msg, emsg ? emsg : "<unknown error>"); \
|
||||
abort(); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static const size_t MEM_PAGE_BYTES = 0x1000;
|
||||
static const size_t MEM_PAGE_MASK = MEM_PAGE_BYTES - 1;
|
||||
@@ -172,15 +174,12 @@ class HsaTimer {
|
||||
static const timestamp_t TIMESTAMP_MAX = UINT64_MAX;
|
||||
typedef long double freq_t;
|
||||
|
||||
enum time_id_t {
|
||||
TIME_ID_CLOCK_REALTIME = 0,
|
||||
TIME_ID_CLOCK_MONOTONIC = 1,
|
||||
TIME_ID_NUMBER
|
||||
};
|
||||
enum time_id_t { TIME_ID_CLOCK_REALTIME = 0, TIME_ID_CLOCK_MONOTONIC = 1, TIME_ID_NUMBER };
|
||||
|
||||
HsaTimer(const hsa_pfn_t* hsa_api) : hsa_api_(hsa_api) {
|
||||
timestamp_t sysclock_hz = 0;
|
||||
hsa_status_t status = hsa_api_->hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sysclock_hz);
|
||||
hsa_status_t status =
|
||||
hsa_api_->hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sysclock_hz);
|
||||
CHECK_STATUS("hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY)", status);
|
||||
sysclock_factor_ = (freq_t)1000000000 / (freq_t)sysclock_hz;
|
||||
}
|
||||
@@ -215,8 +214,8 @@ class HsaTimer {
|
||||
|
||||
// Return pair of correlated values of profiling timestamp and time with
|
||||
// correlation error for a given time ID and number of iterations
|
||||
void correlated_pair_ns(time_id_t time_id, uint32_t iters,
|
||||
timestamp_t* timestamp_v, timestamp_t* time_v, timestamp_t* error_v) const {
|
||||
void correlated_pair_ns(time_id_t time_id, uint32_t iters, timestamp_t* timestamp_v,
|
||||
timestamp_t* time_v, timestamp_t* error_v) const {
|
||||
clockid_t clock_id = 0;
|
||||
switch (clock_id) {
|
||||
case TIME_ID_CLOCK_REALTIME:
|
||||
@@ -355,7 +354,8 @@ class HsaRsrcFactory {
|
||||
uint8_t* AllocateCmdMemory(const AgentInfo* agent_info, size_t size);
|
||||
|
||||
// Wait signal
|
||||
hsa_signal_value_t SignalWait(const hsa_signal_t& signal, const hsa_signal_value_t& signal_value) const;
|
||||
hsa_signal_value_t SignalWait(const hsa_signal_t& signal,
|
||||
const hsa_signal_value_t& signal_value) const;
|
||||
|
||||
// Wait signal with signal value restore
|
||||
void SignalWaitRestore(const hsa_signal_t& signal, const hsa_signal_value_t& signal_value) const;
|
||||
@@ -401,7 +401,9 @@ class HsaRsrcFactory {
|
||||
const hsa_ven_amd_loader_1_00_pfn_t* LoaderApi() const { return &loader_api_; }
|
||||
|
||||
// Methods for system-clock/ns conversion and timestamp in 'ns'
|
||||
timestamp_t SysclockToNs(const timestamp_t& sysclock) const { return timer_->sysclock_to_ns(sysclock); }
|
||||
timestamp_t SysclockToNs(const timestamp_t& sysclock) const {
|
||||
return timer_->sysclock_to_ns(sysclock);
|
||||
}
|
||||
timestamp_t NsToSysclock(const timestamp_t& time) const { return timer_->ns_to_sysclock(time); }
|
||||
timestamp_t TimestampNs() const { return timer_->timestamp_ns(); }
|
||||
|
||||
@@ -480,8 +482,10 @@ class HsaRsrcFactory {
|
||||
typedef std::map<uint64_t, const char*> symbols_map_t;
|
||||
static symbols_map_t* symbols_map_;
|
||||
static bool executable_tracking_on_;
|
||||
static hsa_status_t hsa_executable_freeze_interceptor(hsa_executable_t executable, const char *options);
|
||||
static hsa_status_t executable_symbols_cb(hsa_executable_t exec, hsa_executable_symbol_t symbol, void *data);
|
||||
static hsa_status_t hsa_executable_freeze_interceptor(hsa_executable_t executable,
|
||||
const char* options);
|
||||
static hsa_status_t executable_symbols_cb(hsa_executable_t exec, hsa_executable_symbol_t symbol,
|
||||
void* data);
|
||||
|
||||
// HSA runtime API table
|
||||
static hsa_pfn_t hsa_api_;
|
||||
@@ -505,8 +509,8 @@ class HsaRsrcFactory {
|
||||
timestamp_t time_error_[HsaTimer::TIME_ID_NUMBER];
|
||||
|
||||
// CPU/kern-arg memory pools
|
||||
hsa_amd_memory_pool_t *cpu_pool_;
|
||||
hsa_amd_memory_pool_t *kern_arg_pool_;
|
||||
hsa_amd_memory_pool_t* cpu_pool_;
|
||||
hsa_amd_memory_pool_t* kern_arg_pool_;
|
||||
};
|
||||
|
||||
#endif // _HSA_RSRC_FACTORY_H_
|
||||
|
||||
@@ -24,7 +24,9 @@
|
||||
#include "ctrl/test_hsa.h"
|
||||
#include "util/test_assert.h"
|
||||
|
||||
template <class Kernel, class Test> bool RunKernel(int argc = 0, char* argv[] = NULL, const AgentInfo* agent_info = NULL, hsa_queue_t* queue = NULL, int count = 1) {
|
||||
template <class Kernel, class Test>
|
||||
bool RunKernel(int argc = 0, char* argv[] = NULL, const AgentInfo* agent_info = NULL,
|
||||
hsa_queue_t* queue = NULL, int count = 1) {
|
||||
bool ret_val = false;
|
||||
|
||||
if (getenv("ROC_TEST_TRACE") == NULL) std::clog.rdbuf(NULL);
|
||||
|
||||
@@ -63,7 +63,8 @@ bool TestHsa::Initialize(int /*arg_cnt*/, char** /*arg_list*/) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
std::clog << "> Using agent[" << agent_info_->dev_index << "] : " << agent_info_->name << std::endl;
|
||||
std::clog << "> Using agent[" << agent_info_->dev_index << "] : " << agent_info_->name
|
||||
<< std::endl;
|
||||
|
||||
// Create an instance of Aql Queue
|
||||
if (hsa_queue_ == NULL) {
|
||||
@@ -116,8 +117,8 @@ bool TestHsa::Setup() {
|
||||
size_t size_info = 0;
|
||||
const hsa_status_t status = hsa_executable_symbol_get_info(
|
||||
kernel_code_desc_, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE, &size_info);
|
||||
TEST_ASSERT(status == HSA_STATUS_SUCCESS);
|
||||
size_info = kernarg_size;
|
||||
TEST_ASSERT(status == HSA_STATUS_SUCCESS);
|
||||
size_info = kernarg_size;
|
||||
const bool kernarg_missmatch = (kernarg_size > size_info);
|
||||
if (kernarg_missmatch) {
|
||||
std::cout << "kernarg_size = " << kernarg_size << ", size_info = " << size_info
|
||||
@@ -209,7 +210,8 @@ bool TestHsa::Run() {
|
||||
// Submit AQL packet to the queue
|
||||
const uint64_t que_idx = hsa_rsrc_->Submit(hsa_queue_, &aql);
|
||||
|
||||
std::clog << "> Waiting on kernel dispatch signal, que_idx=" << que_idx << std::endl << std::flush;
|
||||
std::clog << "> Waiting on kernel dispatch signal, que_idx=" << que_idx << std::endl
|
||||
<< std::flush;
|
||||
|
||||
// Wait on the dispatch signal until the kernel is finished.
|
||||
// Update wait condition to HSA_WAIT_STATE_ACTIVE for Polling
|
||||
|
||||
@@ -33,10 +33,7 @@ class DummyKernel : public TestKernel {
|
||||
enum { KERNARG_BUF_ID, LOCAL_BUF_ID };
|
||||
|
||||
// Constructor
|
||||
DummyKernel() :
|
||||
width_(64),
|
||||
height_(64)
|
||||
{
|
||||
DummyKernel() : width_(64), height_(64) {
|
||||
SetInDescr(KERNARG_BUF_ID, KERNARG_DES_ID, 0);
|
||||
SetOutDescr(LOCAL_BUF_ID, LOCAL_DES_ID, 0);
|
||||
}
|
||||
@@ -57,7 +54,9 @@ class DummyKernel : public TestKernel {
|
||||
// Reference CPU implementation
|
||||
bool ReferenceImplementation(uint32_t* output, const uint32_t* input, const float* mask,
|
||||
const uint32_t width, const uint32_t height,
|
||||
const uint32_t maskWidth, const uint32_t maskHeight) { return true; }
|
||||
const uint32_t maskWidth, const uint32_t maskHeight) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Width of the Input array
|
||||
const uint32_t width_;
|
||||
|
||||
@@ -28,9 +28,8 @@
|
||||
#include <sstream>
|
||||
#include <utility>
|
||||
|
||||
template <class evt_id_t, class evt_weight_t>
|
||||
class EvtStatsT {
|
||||
public:
|
||||
template <class evt_id_t, class evt_weight_t> class EvtStatsT {
|
||||
public:
|
||||
typedef std::mutex mutex_t;
|
||||
typedef uint64_t evt_count_t;
|
||||
typedef double evt_avr_t;
|
||||
@@ -51,7 +50,7 @@ class EvtStatsT {
|
||||
|
||||
inline void add_event(evt_id_t id, evt_weight_t weight) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
//printf("EvtStats %p ::add_event %u %lu\n", this, id, weight); fflush(stdout);
|
||||
// printf("EvtStats %p ::add_event %u %lu\n", this, id, weight); fflush(stdout);
|
||||
|
||||
evt_record_t& rec = map_[id];
|
||||
const evt_count_t prev_count = rec.count;
|
||||
@@ -65,7 +64,8 @@ class EvtStatsT {
|
||||
|
||||
void dump() {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
fprintf(stdout, "Dumping %s\n", path_); fflush(stdout);
|
||||
fprintf(stdout, "Dumping %s\n", path_);
|
||||
fflush(stdout);
|
||||
|
||||
typedef typename std::set<std::pair<evt_id_t, evt_record_t>, cmpfun> set_t;
|
||||
set_t s_(map_.begin(), map_.end());
|
||||
@@ -75,7 +75,8 @@ class EvtStatsT {
|
||||
const evt_id_t id = e.first;
|
||||
const char* label = get_label(id);
|
||||
std::ostringstream oss;
|
||||
oss << index << ",\"" << label << "\"," << e.second.count << "," << (uint64_t)(e.second.avr) << "," << (uint64_t)(e.second.count * e.second.avr);
|
||||
oss << index << ",\"" << label << "\"," << e.second.count << "," << (uint64_t)(e.second.avr)
|
||||
<< "," << (uint64_t)(e.second.count * e.second.avr);
|
||||
fprintf(fdes_, "%s\n", oss.str().c_str());
|
||||
index += 1;
|
||||
}
|
||||
@@ -88,24 +89,20 @@ class EvtStatsT {
|
||||
const char* label = ret.first->second;
|
||||
return label;
|
||||
}
|
||||
const char* get_label(const char* id) {
|
||||
return id;
|
||||
}
|
||||
const char* get_label(const std::string& id) {
|
||||
return id.c_str();
|
||||
}
|
||||
const char* get_label(const char* id) { return id; }
|
||||
const char* get_label(const std::string& id) { return id.c_str(); }
|
||||
|
||||
void set_label(evt_id_t id, const char* label) {
|
||||
//printf("EvtStats %p ::set_label %u %s\n", this, id, label); fflush(stdout);
|
||||
// printf("EvtStats %p ::set_label %u %s\n", this, id, label); fflush(stdout);
|
||||
labels_[id] = label;
|
||||
}
|
||||
|
||||
EvtStatsT(FILE* f, const char* path) : fdes_(f), path_(path) {
|
||||
//printf("EvtStats %p ::EvtStatsT()\n", this); fflush(stdout);
|
||||
// printf("EvtStats %p ::EvtStatsT()\n", this); fflush(stdout);
|
||||
fprintf(fdes_, "Index,Name,Count,Avr,Total\n");
|
||||
}
|
||||
|
||||
private:
|
||||
private:
|
||||
mutex_t mutex_;
|
||||
map_t map_;
|
||||
labels_t labels_;
|
||||
@@ -115,4 +112,4 @@ class EvtStatsT {
|
||||
|
||||
typedef EvtStatsT<uint32_t, uint64_t> EvtStats;
|
||||
|
||||
#endif // EVT_STATS_H_
|
||||
#endif // EVT_STATS_H_
|
||||
|
||||
@@ -212,7 +212,8 @@ class Xml {
|
||||
buf[size - 1] = '\0';
|
||||
|
||||
if (strncmp(buf, "#include \"", 10) == 0) {
|
||||
for (ind = 0; (ind < size) && (buf[ind] != '\n'); ++ind) {}
|
||||
for (ind = 0; (ind < size) && (buf[ind] != '\n'); ++ind) {
|
||||
}
|
||||
if (ind == size) {
|
||||
fprintf(stderr, "XML PreProcess failed, line size limit %zu\n", kBufSize);
|
||||
error = true;
|
||||
@@ -222,7 +223,8 @@ class Xml {
|
||||
size = ind;
|
||||
lseek(fd_, pos + ind + 1, SEEK_SET);
|
||||
|
||||
for (ind = 10; (ind < size) && (buf[ind] != '"'); ++ind) {}
|
||||
for (ind = 10; (ind < size) && (buf[ind] != '"'); ++ind) {
|
||||
}
|
||||
if (ind == size) {
|
||||
error = true;
|
||||
break;
|
||||
|
||||
+211
-191
@@ -21,15 +21,15 @@
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
#include <cxxabi.h> /* names denangle */
|
||||
#include <cxxabi.h> /* names denangle */
|
||||
#include <dirent.h>
|
||||
#include <pthread.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <sys/syscall.h> /* SYS_xxx definitions */
|
||||
#include <sys/syscall.h> /* SYS_xxx definitions */
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h> /* usleep */
|
||||
#include <unistd.h> /* usleep */
|
||||
|
||||
#include <roctracer_ext.h>
|
||||
#include "src/util/exception.h"
|
||||
@@ -70,10 +70,12 @@
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define ONLOAD_TRACE(str) \
|
||||
if (getenv("ROCP_ONLOAD_TRACE")) do { \
|
||||
std::cout << "PID(" << GetPid() << "): TRACER_TOOL::" << __FUNCTION__ << " " << str << std::endl << std::flush; \
|
||||
} while(0);
|
||||
#define ONLOAD_TRACE(str) \
|
||||
if (getenv("ROCP_ONLOAD_TRACE")) do { \
|
||||
std::cout << "PID(" << GetPid() << "): TRACER_TOOL::" << __FUNCTION__ << " " << str \
|
||||
<< std::endl \
|
||||
<< std::flush; \
|
||||
} while (0);
|
||||
#define ONLOAD_TRACE_BEG() ONLOAD_TRACE("begin")
|
||||
#define ONLOAD_TRACE_END() ONLOAD_TRACE("end")
|
||||
|
||||
@@ -88,7 +90,8 @@ inline static void DEBUG_TRACE(const char* fmt, ...) {
|
||||
va_list valist;
|
||||
va_start(valist, fmt);
|
||||
vsnprintf(buf, size, fmt, valist);
|
||||
printf("%u:%u %s", GetPid(), GetTid(), buf); fflush(stdout);
|
||||
printf("%u:%u %s", GetPid(), GetTid(), buf);
|
||||
fflush(stdout);
|
||||
va_end(valist);
|
||||
}
|
||||
#else
|
||||
@@ -107,18 +110,18 @@ bool trace_hip_activity = false;
|
||||
bool trace_pcs = false;
|
||||
|
||||
// The below getter functions have been written intentionally to fix an issue
|
||||
// with constructor ordering. Previously when hip_api_vec and hsa_api_vec
|
||||
// with constructor ordering. Previously when hip_api_vec and hsa_api_vec
|
||||
// were left as simple global variables, whenever the tool_load() function
|
||||
// was called from "extern "C" CONSTRUCTOR_API void constructor()" of libtracer_tool.so
|
||||
// the ordering of std::vector constructor becomes undefined. This meant that you could assign
|
||||
// hip_api_vec and hsa_api_vec with a value in tool_load() and once the function returns, the std::vector
|
||||
// default constructor would execute later, causing the values to be lost.
|
||||
// hip_api_vec and hsa_api_vec with a value in tool_load() and once the function returns, the
|
||||
// std::vector default constructor would execute later, causing the values to be lost.
|
||||
|
||||
static std::vector<std::string> &hsa_api_vec() {
|
||||
static std::vector<std::string>& hsa_api_vec() {
|
||||
static std::vector<std::string> hsa_api_vec;
|
||||
return hsa_api_vec;
|
||||
}
|
||||
static std::vector<std::string> &hip_api_vec() {
|
||||
static std::vector<std::string>& hip_api_vec() {
|
||||
static std::vector<std::string> hip_api_vec;
|
||||
return hip_api_vec;
|
||||
}
|
||||
@@ -167,7 +170,8 @@ void fatal(const std::string msg) {
|
||||
static inline const char* cxx_demangle(const char* symbol) {
|
||||
size_t funcnamesize;
|
||||
int status;
|
||||
const char* ret = (symbol != NULL) ? abi::__cxa_demangle(symbol, NULL, &funcnamesize, &status) : symbol;
|
||||
const char* ret =
|
||||
(symbol != NULL) ? abi::__cxa_demangle(symbol, NULL, &funcnamesize, &status) : symbol;
|
||||
return (ret != NULL) ? ret : strdup(symbol);
|
||||
}
|
||||
|
||||
@@ -208,7 +212,8 @@ void* control_thr_fun(void*) {
|
||||
uint32_t control_flush_us = 0;
|
||||
pthread_t flush_thread;
|
||||
bool flush_thread_started = false;
|
||||
std::mutex flush_thread_mutex;;
|
||||
std::mutex flush_thread_mutex;
|
||||
;
|
||||
|
||||
void* flush_thr_fun(void*) {
|
||||
const uint32_t dist_sec = control_flush_us / 1000000;
|
||||
@@ -218,7 +223,8 @@ void* flush_thr_fun(void*) {
|
||||
sleep(dist_sec);
|
||||
usleep(dist_us);
|
||||
std::lock_guard<std::mutex> lock(flush_thread_mutex);
|
||||
if (!flush_thread_started) while(1) sleep(1);
|
||||
if (!flush_thread_started)
|
||||
while (1) sleep(1);
|
||||
ROCTRACER_CALL(roctracer_flush_activity());
|
||||
roctracer::TraceBufferBase::FlushAll();
|
||||
}
|
||||
@@ -241,17 +247,13 @@ struct roctx_trace_entry_t {
|
||||
};
|
||||
|
||||
void roctx_flush_cb(roctx_trace_entry_t* entry);
|
||||
constexpr roctracer::TraceBuffer<roctx_trace_entry_t>::flush_prm_t roctx_flush_prm = {roctracer::DFLT_ENTRY_TYPE, roctx_flush_cb};
|
||||
constexpr roctracer::TraceBuffer<roctx_trace_entry_t>::flush_prm_t roctx_flush_prm = {
|
||||
roctracer::DFLT_ENTRY_TYPE, roctx_flush_cb};
|
||||
roctracer::TraceBuffer<roctx_trace_entry_t>* roctx_trace_buffer = NULL;
|
||||
|
||||
// rocTX callback function
|
||||
static inline void roctx_callback_fun(
|
||||
uint32_t domain,
|
||||
uint32_t cid,
|
||||
uint32_t tid,
|
||||
roctx_range_id_t rid,
|
||||
const char* message)
|
||||
{
|
||||
static inline void roctx_callback_fun(uint32_t domain, uint32_t cid, uint32_t tid,
|
||||
roctx_range_id_t rid, const char* message) {
|
||||
#if ROCTX_CLOCK_TIME
|
||||
const timestamp_t time = HsaTimer::clocktime_ns(HsaTimer::TIME_ID_CLOCK_MONOTONIC);
|
||||
#else
|
||||
@@ -267,12 +269,7 @@ static inline void roctx_callback_fun(
|
||||
entry->valid.store(roctracer::TRACE_ENTRY_COMPL, std::memory_order_release);
|
||||
}
|
||||
|
||||
void roctx_api_callback(
|
||||
uint32_t domain,
|
||||
uint32_t cid,
|
||||
const void* callback_data,
|
||||
void* arg)
|
||||
{
|
||||
void roctx_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg) {
|
||||
(void)arg;
|
||||
const roctx_api_data_t* data = reinterpret_cast<const roctx_api_data_t*>(callback_data);
|
||||
roctx_callback_fun(domain, cid, GetTid(), data->args.id, data->args.message);
|
||||
@@ -280,27 +277,37 @@ void roctx_api_callback(
|
||||
|
||||
// rocTX Start/Stop callbacks
|
||||
void roctx_range_start_callback(const roctx_range_data_t* data, void* arg) {
|
||||
roctx_callback_fun(ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_roctxRangePushA, data->tid, 0, data->message);
|
||||
roctx_callback_fun(ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_roctxRangePushA, data->tid, 0,
|
||||
data->message);
|
||||
}
|
||||
void roctx_range_stop_callback(const roctx_range_data_t* data, void* arg) {
|
||||
roctx_callback_fun(ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_roctxRangePop, data->tid, 0, NULL);
|
||||
}
|
||||
void start_callback() { roctracer::RocTxLoader::Instance().RangeStackIterate(roctx_range_start_callback, NULL); }
|
||||
void stop_callback() { roctracer::RocTxLoader::Instance().RangeStackIterate(roctx_range_stop_callback, NULL); }
|
||||
void start_callback() {
|
||||
roctracer::RocTxLoader::Instance().RangeStackIterate(roctx_range_start_callback, NULL);
|
||||
}
|
||||
void stop_callback() {
|
||||
roctracer::RocTxLoader::Instance().RangeStackIterate(roctx_range_stop_callback, NULL);
|
||||
}
|
||||
|
||||
// rocTX buffer flush function
|
||||
void roctx_flush_cb(roctx_trace_entry_t* entry) {
|
||||
#if ROCTX_CLOCK_TIME
|
||||
timestamp_t timestamp = 0;
|
||||
HsaRsrcFactory::Instance().GetTimestamp(HsaTimer::TIME_ID_CLOCK_MONOTONIC, entry->time, ×tamp);
|
||||
HsaRsrcFactory::Instance().GetTimestamp(HsaTimer::TIME_ID_CLOCK_MONOTONIC, entry->time,
|
||||
×tamp);
|
||||
#else
|
||||
const timestamp_t timestamp = entry->time;
|
||||
#endif
|
||||
std::ostringstream os;
|
||||
os << timestamp << " " << entry->pid << ":" << entry->tid << " " << entry->cid << ":" << entry->rid;
|
||||
if (entry->message != NULL) os << ":\"" << entry->message << "\"";
|
||||
else os << ":\"\"";
|
||||
fprintf(roctx_file_handle, "%s\n", os.str().c_str()); fflush(roctx_file_handle);
|
||||
os << timestamp << " " << entry->pid << ":" << entry->tid << " " << entry->cid << ":"
|
||||
<< entry->rid;
|
||||
if (entry->message != NULL)
|
||||
os << ":\"" << entry->message << "\"";
|
||||
else
|
||||
os << ":\"\"";
|
||||
fprintf(roctx_file_handle, "%s\n", os.str().c_str());
|
||||
fflush(roctx_file_handle);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@@ -318,24 +325,20 @@ struct hsa_api_trace_entry_t {
|
||||
};
|
||||
|
||||
void hsa_api_flush_cb(hsa_api_trace_entry_t* entry);
|
||||
constexpr roctracer::TraceBuffer<hsa_api_trace_entry_t>::flush_prm_t hsa_flush_prm = {roctracer::DFLT_ENTRY_TYPE, hsa_api_flush_cb};
|
||||
constexpr roctracer::TraceBuffer<hsa_api_trace_entry_t>::flush_prm_t hsa_flush_prm = {
|
||||
roctracer::DFLT_ENTRY_TYPE, hsa_api_flush_cb};
|
||||
roctracer::TraceBuffer<hsa_api_trace_entry_t>* hsa_api_trace_buffer = NULL;
|
||||
|
||||
// HSA API callback function
|
||||
|
||||
void hsa_api_callback(
|
||||
uint32_t domain,
|
||||
uint32_t cid,
|
||||
const void* callback_data,
|
||||
void* arg)
|
||||
{
|
||||
void hsa_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg) {
|
||||
(void)arg;
|
||||
const hsa_api_data_t* data = reinterpret_cast<const hsa_api_data_t*>(callback_data);
|
||||
if (data->phase == ACTIVITY_API_PHASE_ENTER) {
|
||||
hsa_begin_timestamp = timer->timestamp_fn_ns();
|
||||
} else {
|
||||
|
||||
const timestamp_t end_timestamp = (cid == HSA_API_ID_hsa_shut_down) ? hsa_begin_timestamp : timer->timestamp_fn_ns();
|
||||
const timestamp_t end_timestamp =
|
||||
(cid == HSA_API_ID_hsa_shut_down) ? hsa_begin_timestamp : timer->timestamp_fn_ns();
|
||||
hsa_api_trace_entry_t* entry = hsa_api_trace_buffer->GetEntry();
|
||||
entry->cid = cid;
|
||||
entry->begin = hsa_begin_timestamp;
|
||||
@@ -349,17 +352,17 @@ void hsa_api_callback(
|
||||
|
||||
void hsa_api_flush_cb(hsa_api_trace_entry_t* entry) {
|
||||
std::ostringstream os;
|
||||
os << entry->begin << ":" << entry->end << " " << entry->pid << ":" << entry->tid << " " << hsa_api_data_pair_t(entry->cid, entry->data);
|
||||
fprintf(hsa_api_file_handle, "%s\n", os.str().c_str()); fflush(hsa_api_file_handle);
|
||||
os << entry->begin << ":" << entry->end << " " << entry->pid << ":" << entry->tid << " "
|
||||
<< hsa_api_data_pair_t(entry->cid, entry->data);
|
||||
fprintf(hsa_api_file_handle, "%s\n", os.str().c_str());
|
||||
fflush(hsa_api_file_handle);
|
||||
}
|
||||
|
||||
void hsa_activity_callback(
|
||||
uint32_t op,
|
||||
activity_record_t* record,
|
||||
void* arg)
|
||||
{
|
||||
void hsa_activity_callback(uint32_t op, activity_record_t* record, void* arg) {
|
||||
static uint64_t index = 0;
|
||||
fprintf(hsa_async_copy_file_handle, "%lu:%lu async-copy:%lu:%u\n", record->begin_ns, record->end_ns, index, my_pid); fflush(hsa_async_copy_file_handle);
|
||||
fprintf(hsa_async_copy_file_handle, "%lu:%lu async-copy:%lu:%u\n", record->begin_ns,
|
||||
record->end_ns, index, my_pid);
|
||||
fflush(hsa_async_copy_file_handle);
|
||||
index++;
|
||||
}
|
||||
|
||||
@@ -381,28 +384,21 @@ struct hip_api_trace_entry_t {
|
||||
};
|
||||
|
||||
void hip_api_flush_cb(hip_api_trace_entry_t* entry);
|
||||
constexpr roctracer::TraceBuffer<hip_api_trace_entry_t>::flush_prm_t hip_api_flush_prm = {roctracer::DFLT_ENTRY_TYPE, hip_api_flush_cb};
|
||||
constexpr roctracer::TraceBuffer<hip_api_trace_entry_t>::flush_prm_t hip_api_flush_prm = {
|
||||
roctracer::DFLT_ENTRY_TYPE, hip_api_flush_cb};
|
||||
roctracer::TraceBuffer<hip_api_trace_entry_t>* hip_api_trace_buffer = NULL;
|
||||
|
||||
static inline bool is_hip_kernel_launch_api(const uint32_t& cid) {
|
||||
bool ret =
|
||||
(cid == HIP_API_ID_hipLaunchKernel) ||
|
||||
(cid == HIP_API_ID_hipExtLaunchKernel) ||
|
||||
(cid == HIP_API_ID_hipLaunchCooperativeKernel) ||
|
||||
(cid == HIP_API_ID_hipLaunchCooperativeKernelMultiDevice) ||
|
||||
(cid == HIP_API_ID_hipExtLaunchMultiKernelMultiDevice) ||
|
||||
(cid == HIP_API_ID_hipModuleLaunchKernel) ||
|
||||
(cid == HIP_API_ID_hipExtModuleLaunchKernel) ||
|
||||
(cid == HIP_API_ID_hipHccModuleLaunchKernel);
|
||||
bool ret = (cid == HIP_API_ID_hipLaunchKernel) || (cid == HIP_API_ID_hipExtLaunchKernel) ||
|
||||
(cid == HIP_API_ID_hipLaunchCooperativeKernel) ||
|
||||
(cid == HIP_API_ID_hipLaunchCooperativeKernelMultiDevice) ||
|
||||
(cid == HIP_API_ID_hipExtLaunchMultiKernelMultiDevice) ||
|
||||
(cid == HIP_API_ID_hipModuleLaunchKernel) || (cid == HIP_API_ID_hipExtModuleLaunchKernel) ||
|
||||
(cid == HIP_API_ID_hipHccModuleLaunchKernel);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void hip_api_callback(
|
||||
uint32_t domain,
|
||||
uint32_t cid,
|
||||
const void* callback_data,
|
||||
void* arg)
|
||||
{
|
||||
void hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg) {
|
||||
(void)arg;
|
||||
const hip_api_data_t* data = reinterpret_cast<const hip_api_data_t*>(callback_data);
|
||||
const timestamp_t timestamp = timer->timestamp_fn_ns();
|
||||
@@ -428,16 +424,17 @@ void hip_api_callback(
|
||||
if (cid == HIP_API_ID_hipMalloc) {
|
||||
entry->ptr = *(data->args.hipMalloc.ptr);
|
||||
} else if (is_hip_kernel_launch_api(cid)) {
|
||||
switch(cid) {
|
||||
switch (cid) {
|
||||
case HIP_API_ID_hipExtLaunchMultiKernelMultiDevice:
|
||||
case HIP_API_ID_hipLaunchCooperativeKernelMultiDevice:
|
||||
{
|
||||
const hipLaunchParams* listKernels = data->args.hipLaunchCooperativeKernelMultiDevice.launchParamsList;
|
||||
case HIP_API_ID_hipLaunchCooperativeKernelMultiDevice: {
|
||||
const hipLaunchParams* listKernels =
|
||||
data->args.hipLaunchCooperativeKernelMultiDevice.launchParamsList;
|
||||
std::string name_str = "";
|
||||
for (int i = 0; i < data->args.hipLaunchCooperativeKernelMultiDevice.numDevices; ++i) {
|
||||
const hipLaunchParams& lp = listKernels[i];
|
||||
if (lp.func != NULL) {
|
||||
const char* kernel_name = roctracer::HipLoader::Instance().KernelNameRefByPtr(lp.func, lp.stream);
|
||||
const char* kernel_name =
|
||||
roctracer::HipLoader::Instance().KernelNameRefByPtr(lp.func, lp.stream);
|
||||
const int device_id = roctracer::HipLoader::Instance().GetStreamDeviceId(lp.stream);
|
||||
name_str += std::string(kernel_name) + ":" + std::to_string(device_id) + ";";
|
||||
}
|
||||
@@ -446,22 +443,21 @@ void hip_api_callback(
|
||||
break;
|
||||
}
|
||||
case HIP_API_ID_hipLaunchKernel:
|
||||
case HIP_API_ID_hipLaunchCooperativeKernel:
|
||||
{
|
||||
case HIP_API_ID_hipLaunchCooperativeKernel: {
|
||||
const void* f = data->args.hipLaunchKernel.function_address;
|
||||
hipStream_t stream = data->args.hipLaunchKernel.stream;
|
||||
if (f != NULL) entry->name = strdup(roctracer::HipLoader::Instance().KernelNameRefByPtr(f, stream));
|
||||
if (f != NULL)
|
||||
entry->name = strdup(roctracer::HipLoader::Instance().KernelNameRefByPtr(f, stream));
|
||||
break;
|
||||
}
|
||||
case HIP_API_ID_hipExtLaunchKernel:
|
||||
{
|
||||
case HIP_API_ID_hipExtLaunchKernel: {
|
||||
const void* f = data->args.hipExtLaunchKernel.function_address;
|
||||
hipStream_t stream = data->args.hipExtLaunchKernel.stream;
|
||||
if (f != NULL) entry->name = strdup(roctracer::HipLoader::Instance().KernelNameRefByPtr(f, stream));
|
||||
if (f != NULL)
|
||||
entry->name = strdup(roctracer::HipLoader::Instance().KernelNameRefByPtr(f, stream));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
default: {
|
||||
const hipFunction_t f = data->args.hipModuleLaunchKernel.f;
|
||||
if (f != NULL) entry->name = strdup(roctracer::HipLoader::Instance().KernelNameRef(f));
|
||||
}
|
||||
@@ -471,17 +467,15 @@ void hip_api_callback(
|
||||
entry->valid.store(roctracer::TRACE_ENTRY_COMPL, std::memory_order_release);
|
||||
}
|
||||
|
||||
const char * name = roctracer_op_string(domain, cid, 0);
|
||||
DEBUG_TRACE("hip_api_callback(\"%s\") phase(%d): cid(%u) data(%p) entry(%p) name(\"%s\") correlation_id(%lu) timestamp(%lu)\n",
|
||||
name, data->phase, cid, data, entry, (entry) ? entry->name : NULL, data->correlation_id, timestamp);
|
||||
const char* name = roctracer_op_string(domain, cid, 0);
|
||||
DEBUG_TRACE(
|
||||
"hip_api_callback(\"%s\") phase(%d): cid(%u) data(%p) entry(%p) name(\"%s\") "
|
||||
"correlation_id(%lu) timestamp(%lu)\n",
|
||||
name, data->phase, cid, data, entry, (entry) ? entry->name : NULL, data->correlation_id,
|
||||
timestamp);
|
||||
}
|
||||
|
||||
void mark_api_callback(
|
||||
uint32_t domain,
|
||||
uint32_t cid,
|
||||
const void* callback_data,
|
||||
void* arg)
|
||||
{
|
||||
void mark_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg) {
|
||||
(void)arg;
|
||||
const char* name = reinterpret_cast<const char*>(callback_data);
|
||||
|
||||
@@ -513,22 +507,27 @@ void hip_api_flush_cb(hip_api_trace_entry_t* entry) {
|
||||
std::ostringstream rec_ss;
|
||||
std::ostringstream oss;
|
||||
|
||||
const char* str = (domain != ACTIVITY_DOMAIN_EXT_API) ? roctracer_op_string(domain, cid, 0) : strdup("MARK");
|
||||
rec_ss << std::dec << begin_timestamp << ":" << end_timestamp << " " << entry->pid << ":" << entry->tid;
|
||||
const char* str =
|
||||
(domain != ACTIVITY_DOMAIN_EXT_API) ? roctracer_op_string(domain, cid, 0) : strdup("MARK");
|
||||
rec_ss << std::dec << begin_timestamp << ":" << end_timestamp << " " << entry->pid << ":"
|
||||
<< entry->tid;
|
||||
oss << std::dec << rec_ss.str() << " " << str;
|
||||
|
||||
const char * name = roctracer_op_string(entry->domain, entry->cid, 0);
|
||||
DEBUG_TRACE("hip_api_flush_cb(\"%s\"): domain(%u) cid(%u) entry(%p) name(\"%s\" correlation_id(%lu) beg(%lu) end(%lu))\n",
|
||||
name, entry->domain, entry->cid, entry, entry->name, correlation_id, begin_timestamp, end_timestamp);
|
||||
const char* name = roctracer_op_string(entry->domain, entry->cid, 0);
|
||||
DEBUG_TRACE(
|
||||
"hip_api_flush_cb(\"%s\"): domain(%u) cid(%u) entry(%p) name(\"%s\" correlation_id(%lu) "
|
||||
"beg(%lu) end(%lu))\n",
|
||||
name, entry->domain, entry->cid, entry, entry->name, correlation_id, begin_timestamp,
|
||||
end_timestamp);
|
||||
|
||||
if (domain == ACTIVITY_DOMAIN_HIP_API) {
|
||||
#if HIP_PROF_HIP_API_STRING
|
||||
if (hip_api_stats != NULL) {
|
||||
hip_api_stats->add_event(cid, end_timestamp - begin_timestamp);
|
||||
if (is_hip_kernel_launch_api(cid)) {
|
||||
hip_kernel_mutex.lock();
|
||||
hip_kernel_mutex.lock();
|
||||
(*hip_kernel_map)[correlation_id] = entry->name;
|
||||
hip_kernel_mutex.unlock();
|
||||
hip_kernel_mutex.unlock();
|
||||
}
|
||||
} else {
|
||||
const char* str = hipApiString((hip_api_id_t)cid, data);
|
||||
@@ -537,50 +536,36 @@ void hip_api_flush_cb(hip_api_trace_entry_t* entry) {
|
||||
const char* kernel_name = cxx_demangle(entry->name);
|
||||
rec_ss << " kernel=" << kernel_name;
|
||||
}
|
||||
rec_ss<< " :" << correlation_id;
|
||||
rec_ss << " :" << correlation_id;
|
||||
fprintf(hip_api_file_handle, "%s\n", rec_ss.str().c_str());
|
||||
}
|
||||
#else // !HIP_PROF_HIP_API_STRING
|
||||
#else // !HIP_PROF_HIP_API_STRING
|
||||
switch (cid) {
|
||||
case HIP_API_ID_hipMemcpy:
|
||||
fprintf(hip_api_file_handle, "%s(dst(%p) src(%p) size(0x%x) kind(%u))\n",
|
||||
oss.str().c_str(),
|
||||
data->args.hipMemcpy.dst,
|
||||
data->args.hipMemcpy.src,
|
||||
(uint32_t)(data->args.hipMemcpy.sizeBytes),
|
||||
(uint32_t)(data->args.hipMemcpy.kind));
|
||||
fprintf(hip_api_file_handle, "%s(dst(%p) src(%p) size(0x%x) kind(%u))\n", oss.str().c_str(),
|
||||
data->args.hipMemcpy.dst, data->args.hipMemcpy.src,
|
||||
(uint32_t)(data->args.hipMemcpy.sizeBytes), (uint32_t)(data->args.hipMemcpy.kind));
|
||||
break;
|
||||
case HIP_API_ID_hipMemcpyAsync:
|
||||
fprintf(hip_api_file_handle, "%s(dst(%p) src(%p) size(0x%x) kind(%u) stream(%p))\n",
|
||||
oss.str().c_str(),
|
||||
data->args.hipMemcpyAsync.dst,
|
||||
data->args.hipMemcpyAsync.src,
|
||||
(uint32_t)(data->args.hipMemcpyAsync.sizeBytes),
|
||||
(uint32_t)(data->args.hipMemcpyAsync.kind),
|
||||
data->args.hipMemcpyAsync.stream);
|
||||
oss.str().c_str(), data->args.hipMemcpyAsync.dst, data->args.hipMemcpyAsync.src,
|
||||
(uint32_t)(data->args.hipMemcpyAsync.sizeBytes),
|
||||
(uint32_t)(data->args.hipMemcpyAsync.kind), data->args.hipMemcpyAsync.stream);
|
||||
break;
|
||||
case HIP_API_ID_hipMalloc:
|
||||
fprintf(hip_api_file_handle, "%s(ptr(%p) size(0x%x))\n",
|
||||
oss.str().c_str(),
|
||||
entry->ptr,
|
||||
(uint32_t)(data->args.hipMalloc.size));
|
||||
fprintf(hip_api_file_handle, "%s(ptr(%p) size(0x%x))\n", oss.str().c_str(), entry->ptr,
|
||||
(uint32_t)(data->args.hipMalloc.size));
|
||||
break;
|
||||
case HIP_API_ID_hipFree:
|
||||
fprintf(hip_api_file_handle, "%s(ptr(%p))\n",
|
||||
oss.str().c_str(),
|
||||
data->args.hipFree.ptr);
|
||||
fprintf(hip_api_file_handle, "%s(ptr(%p))\n", oss.str().c_str(), data->args.hipFree.ptr);
|
||||
break;
|
||||
case HIP_API_ID_hipModuleLaunchKernel:
|
||||
fprintf(hip_api_file_handle, "%s(kernel(%s) stream(%p))\n",
|
||||
oss.str().c_str(),
|
||||
cxx_demangle(entry->name),
|
||||
data->args.hipModuleLaunchKernel.stream);
|
||||
fprintf(hip_api_file_handle, "%s(kernel(%s) stream(%p))\n", oss.str().c_str(),
|
||||
cxx_demangle(entry->name), data->args.hipModuleLaunchKernel.stream);
|
||||
break;
|
||||
case HIP_API_ID_hipExtModuleLaunchKernel:
|
||||
fprintf(hip_api_file_handle, "%s(kernel(%s) stream(%p))\n",
|
||||
oss.str().c_str(),
|
||||
cxx_demangle(entry->name),
|
||||
data->args.hipExtModuleLaunchKernel.hStream);
|
||||
fprintf(hip_api_file_handle, "%s(kernel(%s) stream(%p))\n", oss.str().c_str(),
|
||||
cxx_demangle(entry->name), data->args.hipExtModuleLaunchKernel.hStream);
|
||||
break;
|
||||
default:
|
||||
fprintf(hip_api_file_handle, "%s()\n", oss.str().c_str());
|
||||
@@ -605,23 +590,26 @@ struct hip_act_trace_entry_t {
|
||||
};
|
||||
|
||||
void hip_act_flush_cb(hip_act_trace_entry_t* entry);
|
||||
constexpr roctracer::TraceBuffer<hip_act_trace_entry_t>::flush_prm_t hip_act_flush_prm = {roctracer::DFLT_ENTRY_TYPE, hip_act_flush_cb};
|
||||
constexpr roctracer::TraceBuffer<hip_act_trace_entry_t>::flush_prm_t hip_act_flush_prm = {
|
||||
roctracer::DFLT_ENTRY_TYPE, hip_act_flush_cb};
|
||||
roctracer::TraceBuffer<hip_act_trace_entry_t>* hip_act_trace_buffer = NULL;
|
||||
|
||||
// HIP ACT trace buffer flush callback
|
||||
void hip_act_flush_cb(hip_act_trace_entry_t* entry) {
|
||||
const uint32_t domain = ACTIVITY_DOMAIN_HCC_OPS;
|
||||
const uint32_t op = 0;
|
||||
const char * name = roctracer_op_string(domain, op, entry->kind);
|
||||
const char* name = roctracer_op_string(domain, op, entry->kind);
|
||||
if (name == NULL) {
|
||||
printf("hip_act_flush_cb name is NULL\n"); fflush(stdout);
|
||||
printf("hip_act_flush_cb name is NULL\n");
|
||||
fflush(stdout);
|
||||
abort();
|
||||
}
|
||||
|
||||
if (strncmp("Kernel", name, 6) == 0) {
|
||||
hip_kernel_mutex.lock();
|
||||
if (hip_kernel_stats == NULL) {
|
||||
printf("hip_act_flush_cb hip_kernel_stats is NULL\n"); fflush(stdout);
|
||||
printf("hip_act_flush_cb hip_kernel_stats is NULL\n");
|
||||
fflush(stdout);
|
||||
abort();
|
||||
}
|
||||
name = (*hip_kernel_map)[entry->correlation_id];
|
||||
@@ -640,11 +628,14 @@ void pool_activity_callback(const char* begin, const char* end, void* arg) {
|
||||
const roctracer_record_t* end_record = reinterpret_cast<const roctracer_record_t*>(end);
|
||||
|
||||
while (record < end_record) {
|
||||
const char * name = roctracer_op_string(record->domain, record->op, record->kind);
|
||||
DEBUG_TRACE("pool_activity_callback(\"%s\"): domain(%u) op(%u) kind(%u) record(%p) correlation_id(%lu) beg(%lu) end(%lu)\n",
|
||||
name, record->domain, record->op, record->kind, record, record->correlation_id, record->begin_ns, record->end_ns);
|
||||
const char* name = roctracer_op_string(record->domain, record->op, record->kind);
|
||||
DEBUG_TRACE(
|
||||
"pool_activity_callback(\"%s\"): domain(%u) op(%u) kind(%u) record(%p) correlation_id(%lu) "
|
||||
"beg(%lu) end(%lu)\n",
|
||||
name, record->domain, record->op, record->kind, record, record->correlation_id,
|
||||
record->begin_ns, record->end_ns);
|
||||
|
||||
switch(record->domain) {
|
||||
switch (record->domain) {
|
||||
case ACTIVITY_DOMAIN_HCC_OPS:
|
||||
if (hip_memcpy_stats != NULL) {
|
||||
hip_act_trace_entry_t* entry = hip_act_trace_buffer->GetEntry();
|
||||
@@ -653,17 +644,16 @@ void pool_activity_callback(const char* begin, const char* end, void* arg) {
|
||||
entry->correlation_id = record->correlation_id;
|
||||
entry->valid.store(roctracer::TRACE_ENTRY_COMPL, std::memory_order_release);
|
||||
} else {
|
||||
fprintf(hcc_activity_file_handle, "%lu:%lu %d:%lu %s:%lu:%u\n",
|
||||
record->begin_ns, record->end_ns,
|
||||
record->device_id, record->queue_id,
|
||||
name, record->correlation_id, my_pid);
|
||||
fprintf(hcc_activity_file_handle, "%lu:%lu %d:%lu %s:%lu:%u\n", record->begin_ns,
|
||||
record->end_ns, record->device_id, record->queue_id, name, record->correlation_id,
|
||||
my_pid);
|
||||
fflush(hcc_activity_file_handle);
|
||||
}
|
||||
break;
|
||||
case ACTIVITY_DOMAIN_HSA_OPS:
|
||||
if (record->op == HSA_OP_ID_RESERVED1) {
|
||||
fprintf(pc_sample_file_handle, "%u %lu 0x%lx %s\n",
|
||||
record->pc_sample.se, record->pc_sample.cycle, record->pc_sample.pc, name);
|
||||
fprintf(pc_sample_file_handle, "%u %lu 0x%lx %s\n", record->pc_sample.se,
|
||||
record->pc_sample.cycle, record->pc_sample.pc, name);
|
||||
fflush(pc_sample_file_handle);
|
||||
}
|
||||
break;
|
||||
@@ -682,11 +672,14 @@ std::string normalize_token(const std::string& token, bool not_empty, const std:
|
||||
std::string error_str = "none";
|
||||
if (first_pos != std::string::npos) {
|
||||
const size_t last_pos = token.find_last_not_of(space_chars_set);
|
||||
if (last_pos == std::string::npos) error_str = "token string error: \"" + token + "\"";
|
||||
if (last_pos == std::string::npos)
|
||||
error_str = "token string error: \"" + token + "\"";
|
||||
else {
|
||||
const size_t end_pos = last_pos + 1;
|
||||
if (end_pos <= first_pos) error_str = "token string error: \"" + token + "\"";
|
||||
else norm_len = end_pos - first_pos;
|
||||
if (end_pos <= first_pos)
|
||||
error_str = "token string error: \"" + token + "\"";
|
||||
else
|
||||
norm_len = end_pos - first_pos;
|
||||
}
|
||||
}
|
||||
if (((first_pos != std::string::npos) && (norm_len == 0)) ||
|
||||
@@ -696,7 +689,8 @@ std::string normalize_token(const std::string& token, bool not_empty, const std:
|
||||
return (norm_len != 0) ? token.substr(first_pos, norm_len) : std::string("");
|
||||
}
|
||||
|
||||
int get_xml_array(const xml::Xml::level_t* node, const std::string& field, const std::string& delim, std::vector<std::string>* vec, const char* label = NULL) {
|
||||
int get_xml_array(const xml::Xml::level_t* node, const std::string& field, const std::string& delim,
|
||||
std::vector<std::string>* vec, const char* label = NULL) {
|
||||
int parse_iter = 0;
|
||||
const auto& opts = node->opts;
|
||||
auto it = opts.find(field);
|
||||
@@ -706,7 +700,7 @@ int get_xml_array(const xml::Xml::level_t* node, const std::string& field, const
|
||||
size_t pos1 = 0;
|
||||
const size_t string_len = array_string.length();
|
||||
while (pos1 < string_len) {
|
||||
// set pos2 such that it also handles case of multiple delimiter options.
|
||||
// set pos2 such that it also handles case of multiple delimiter options.
|
||||
// For example- "hipLaunchKernel, hipExtModuleLaunchKernel, hipMemsetAsync"
|
||||
// in this example delimiters are ' ' and also ','
|
||||
const size_t pos2 = array_string.find_first_of(delim, pos1);
|
||||
@@ -716,9 +710,9 @@ int get_xml_array(const xml::Xml::level_t* node, const std::string& field, const
|
||||
const std::string norm_str = normalize_token(token, found, "get_xml_array");
|
||||
if (norm_str.length() != 0) vec->push_back(norm_str);
|
||||
if (!found) break;
|
||||
// update pos2 such that it represents the first non-delimiter character
|
||||
// in case multiple delimiters are specified in variable 'delim'
|
||||
pos1 = array_string.find_first_not_of(delim, pos2);
|
||||
// update pos2 such that it represents the first non-delimiter character
|
||||
// in case multiple delimiters are specified in variable 'delim'
|
||||
pos1 = array_string.find_first_not_of(delim, pos2);
|
||||
++parse_iter;
|
||||
}
|
||||
}
|
||||
@@ -742,7 +736,8 @@ FILE* open_output_file(const char* prefix, const char* name, const char** path =
|
||||
}
|
||||
|
||||
if (path != NULL) *path = strdup(oss.str().c_str());
|
||||
} else file_handle = stdout;
|
||||
} else
|
||||
file_handle = stdout;
|
||||
return file_handle;
|
||||
}
|
||||
|
||||
@@ -785,7 +780,7 @@ void tool_unload() {
|
||||
flush_thread_started = false;
|
||||
flush_thread_mutex.unlock();
|
||||
PTHREAD_CALL(pthread_cancel(flush_thread));
|
||||
void *res;
|
||||
void* res;
|
||||
PTHREAD_CALL(pthread_join(flush_thread, &res));
|
||||
if (res != PTHREAD_CANCELED) FATAL("flush thread wasn't stopped correctly");
|
||||
}
|
||||
@@ -860,7 +855,8 @@ void tool_load() {
|
||||
}
|
||||
}
|
||||
|
||||
printf("ROCTracer (pid=%d): ", (int)GetPid()); fflush(stdout);
|
||||
printf("ROCTracer (pid=%d): ", (int)GetPid());
|
||||
fflush(stdout);
|
||||
|
||||
// XML input
|
||||
const char* xml_name = getenv("ROCP_INPUT");
|
||||
@@ -879,8 +875,10 @@ void tool_load() {
|
||||
|
||||
std::vector<std::string> api_vec;
|
||||
for (const auto* node : entry->nodes) {
|
||||
if (node->tag != "parameters") fatal("ROCTracer: trace node is not supported '" + name + ":" + node->tag + "'");
|
||||
get_xml_array(node, "api", ", ", &api_vec); // delimiter options given as both spaces and commas (' ' and ',')
|
||||
if (node->tag != "parameters")
|
||||
fatal("ROCTracer: trace node is not supported '" + name + ":" + node->tag + "'");
|
||||
get_xml_array(node, "api", ", ",
|
||||
&api_vec); // delimiter options given as both spaces and commas (' ' and ',')
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -917,14 +915,13 @@ void tool_load() {
|
||||
roctx_file_handle = open_output_file(output_prefix, "roctx_trace.txt");
|
||||
|
||||
// initialize HSA tracing
|
||||
roctracer_ext_properties_t properties {
|
||||
start_callback,
|
||||
stop_callback
|
||||
};
|
||||
roctracer_ext_properties_t properties{start_callback, stop_callback};
|
||||
roctracer_set_properties(ACTIVITY_DOMAIN_EXT_API, &properties);
|
||||
|
||||
fprintf(stdout, " rocTX-trace()\n"); fflush(stdout);
|
||||
ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_ROCTX, roctx_api_callback, NULL));
|
||||
fprintf(stdout, " rocTX-trace()\n");
|
||||
fflush(stdout);
|
||||
ROCTRACER_CALL(
|
||||
roctracer_enable_domain_callback(ACTIVITY_DOMAIN_ROCTX, roctx_api_callback, NULL));
|
||||
}
|
||||
|
||||
const char* ctrl_str = getenv("ROCP_CTRL_RATE");
|
||||
@@ -934,10 +931,13 @@ void tool_load() {
|
||||
uint32_t ctrl_rate = 0;
|
||||
|
||||
if (sscanf(ctrl_str, "%d:%d:%d", &ctrl_delay, &ctrl_len, &ctrl_rate) != 3) {
|
||||
EXC_RAISING(ROCTRACER_STATUS_ERROR, "Invalid ROCP_CTRL_RATE var(" << ctrl_str << "), expected ctrl_delay:ctrl_len:ctrl_rate");
|
||||
EXC_RAISING(
|
||||
ROCTRACER_STATUS_ERROR,
|
||||
"Invalid ROCP_CTRL_RATE var(" << ctrl_str << "), expected ctrl_delay:ctrl_len:ctrl_rate");
|
||||
}
|
||||
if (ctrl_len > ctrl_rate) {
|
||||
EXC_RAISING(ROCTRACER_STATUS_ERROR, "Control length value " << ctrl_len << " > rate value " << ctrl_rate);
|
||||
EXC_RAISING(ROCTRACER_STATUS_ERROR,
|
||||
"Control length value " << ctrl_len << " > rate value " << ctrl_rate);
|
||||
}
|
||||
control_dist_us = ctrl_rate - ctrl_len;
|
||||
control_len_us = ctrl_len;
|
||||
@@ -946,14 +946,21 @@ void tool_load() {
|
||||
roctracer_stop();
|
||||
|
||||
if (ctrl_delay != UINT32_MAX) {
|
||||
fprintf(stdout, "ROCTracer: trace control: delay(%uus), length(%uus), rate(%uus)\n", ctrl_delay, ctrl_len, ctrl_rate); fflush(stdout);
|
||||
fprintf(stdout, "ROCTracer: trace control: delay(%uus), length(%uus), rate(%uus)\n",
|
||||
ctrl_delay, ctrl_len, ctrl_rate);
|
||||
fflush(stdout);
|
||||
pthread_t thread;
|
||||
pthread_attr_t attr;
|
||||
int err = pthread_attr_init(&attr);
|
||||
if (err) { errno = err; perror("pthread_attr_init"); abort(); }
|
||||
if (err) {
|
||||
errno = err;
|
||||
perror("pthread_attr_init");
|
||||
abort();
|
||||
}
|
||||
err = pthread_create(&thread, &attr, control_thr_fun, NULL);
|
||||
} else {
|
||||
fprintf(stdout, "ROCTracer: trace start disabled\n"); fflush(stdout);
|
||||
fprintf(stdout, "ROCTracer: trace start disabled\n");
|
||||
fflush(stdout);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -965,10 +972,15 @@ void tool_load() {
|
||||
abort();
|
||||
}
|
||||
|
||||
fprintf(stdout, "ROCTracer: trace control flush rate(%uus)\n", control_flush_us); fflush(stdout);
|
||||
fprintf(stdout, "ROCTracer: trace control flush rate(%uus)\n", control_flush_us);
|
||||
fflush(stdout);
|
||||
pthread_attr_t attr;
|
||||
int err = pthread_attr_init(&attr);
|
||||
if (err) { errno = err; perror("pthread_attr_init"); abort(); }
|
||||
if (err) {
|
||||
errno = err;
|
||||
perror("pthread_attr_init");
|
||||
abort();
|
||||
}
|
||||
std::lock_guard<std::mutex> lock(flush_thread_mutex);
|
||||
PTHREAD_CALL(pthread_create(&flush_thread, &attr, flush_thr_fun, NULL));
|
||||
flush_thread_started = true;
|
||||
@@ -978,7 +990,8 @@ void tool_load() {
|
||||
}
|
||||
|
||||
// HSA-runtime tool on-load method
|
||||
extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count,
|
||||
extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
|
||||
uint64_t failed_tool_count,
|
||||
const char* const* failed_tool_names) {
|
||||
ONLOAD_TRACE_BEG();
|
||||
|
||||
@@ -998,17 +1011,20 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
|
||||
// initialize HSA tracing
|
||||
roctracer_set_properties(ACTIVITY_DOMAIN_HSA_API, (void*)table);
|
||||
|
||||
fprintf(stdout, " HSA-trace("); fflush(stdout);
|
||||
fprintf(stdout, " HSA-trace(");
|
||||
fflush(stdout);
|
||||
if (hsa_api_vec().size() != 0) {
|
||||
for (unsigned i = 0; i < hsa_api_vec().size(); ++i) {
|
||||
uint32_t cid = HSA_API_ID_NUMBER;
|
||||
const char* api = hsa_api_vec()[i].c_str();
|
||||
ROCTRACER_CALL(roctracer_op_code(ACTIVITY_DOMAIN_HSA_API, api, &cid, NULL));
|
||||
ROCTRACER_CALL(roctracer_enable_op_callback(ACTIVITY_DOMAIN_HSA_API, cid, hsa_api_callback, NULL));
|
||||
ROCTRACER_CALL(
|
||||
roctracer_enable_op_callback(ACTIVITY_DOMAIN_HSA_API, cid, hsa_api_callback, NULL));
|
||||
printf(" %s", api);
|
||||
}
|
||||
} else {
|
||||
ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HSA_API, hsa_api_callback, NULL));
|
||||
ROCTRACER_CALL(
|
||||
roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HSA_API, hsa_api_callback, NULL));
|
||||
}
|
||||
printf(")\n");
|
||||
}
|
||||
@@ -1018,21 +1034,20 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
|
||||
hsa_async_copy_file_handle = open_output_file(output_prefix, "async_copy_trace.txt");
|
||||
|
||||
// initialize HSA tracing
|
||||
roctracer::hsa_ops_properties_t ops_properties {
|
||||
table,
|
||||
reinterpret_cast<activity_async_callback_t>(hsa_activity_callback),
|
||||
NULL,
|
||||
output_prefix
|
||||
};
|
||||
roctracer::hsa_ops_properties_t ops_properties{
|
||||
table, reinterpret_cast<activity_async_callback_t>(hsa_activity_callback), NULL,
|
||||
output_prefix};
|
||||
roctracer_set_properties(ACTIVITY_DOMAIN_HSA_OPS, &ops_properties);
|
||||
|
||||
fprintf(stdout, " HSA-activity-trace()\n"); fflush(stdout);
|
||||
fprintf(stdout, " HSA-activity-trace()\n");
|
||||
fflush(stdout);
|
||||
ROCTRACER_CALL(roctracer_enable_op_activity(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_COPY));
|
||||
}
|
||||
|
||||
// Enable HIP API callbacks/activity
|
||||
if (trace_hip_api || trace_hip_activity) {
|
||||
fprintf(stdout, " HIP-trace()\n"); fflush(stdout);
|
||||
fprintf(stdout, " HIP-trace()\n");
|
||||
fflush(stdout);
|
||||
// roctracer properties
|
||||
roctracer_set_properties(ACTIVITY_DOMAIN_HIP_API, (void*)mark_api_callback);
|
||||
// Allocating tracing pool
|
||||
@@ -1052,11 +1067,13 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
|
||||
uint32_t cid = HIP_API_ID_NONE;
|
||||
const char* api = hip_api_vec()[i].c_str();
|
||||
ROCTRACER_CALL(roctracer_op_code(ACTIVITY_DOMAIN_HIP_API, api, &cid, NULL));
|
||||
ROCTRACER_CALL(roctracer_enable_op_callback(ACTIVITY_DOMAIN_HIP_API, cid, hip_api_callback, NULL));
|
||||
ROCTRACER_CALL(
|
||||
roctracer_enable_op_callback(ACTIVITY_DOMAIN_HIP_API, cid, hip_api_callback, NULL));
|
||||
printf(" %s", api);
|
||||
}
|
||||
} else {
|
||||
ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HIP_API, hip_api_callback, NULL));
|
||||
ROCTRACER_CALL(
|
||||
roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HIP_API, hip_api_callback, NULL));
|
||||
}
|
||||
|
||||
if (is_stats_opt) {
|
||||
@@ -1087,7 +1104,8 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
|
||||
|
||||
// Enable PC sampling
|
||||
if (trace_pcs) {
|
||||
fprintf(stdout, " PCS-trace()\n"); fflush(stdout);
|
||||
fprintf(stdout, " PCS-trace()\n");
|
||||
fflush(stdout);
|
||||
open_tracing_pool();
|
||||
pc_sample_file_handle = open_output_file(output_prefix, "pcs_trace.txt");
|
||||
ROCTRACER_CALL(roctracer_enable_op_activity(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_RESERVED1));
|
||||
@@ -1103,17 +1121,19 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
|
||||
}
|
||||
|
||||
// HSA-runtime on-unload method
|
||||
extern "C" PUBLIC_API void OnUnload() {
|
||||
ONLOAD_TRACE("");
|
||||
}
|
||||
extern "C" PUBLIC_API void OnUnload() { ONLOAD_TRACE(""); }
|
||||
|
||||
extern "C" CONSTRUCTOR_API void constructor() {
|
||||
ONLOAD_TRACE_BEG();
|
||||
roctracer::hip_support::HIP_depth_max = 0;
|
||||
roctx_trace_buffer = new roctracer::TraceBuffer<roctx_trace_entry_t>("rocTX API", 0x200000, &roctx_flush_prm, 1);
|
||||
hip_api_trace_buffer = new roctracer::TraceBuffer<hip_api_trace_entry_t>("HIP API", 0x200000, &hip_api_flush_prm, 1);
|
||||
hip_act_trace_buffer = new roctracer::TraceBuffer<hip_act_trace_entry_t>("HIP ACT", 0x200000, &hip_act_flush_prm, 1, 1);
|
||||
hsa_api_trace_buffer = new roctracer::TraceBuffer<hsa_api_trace_entry_t>("HSA API", 0x200000, &hsa_flush_prm, 1);
|
||||
roctx_trace_buffer =
|
||||
new roctracer::TraceBuffer<roctx_trace_entry_t>("rocTX API", 0x200000, &roctx_flush_prm, 1);
|
||||
hip_api_trace_buffer =
|
||||
new roctracer::TraceBuffer<hip_api_trace_entry_t>("HIP API", 0x200000, &hip_api_flush_prm, 1);
|
||||
hip_act_trace_buffer = new roctracer::TraceBuffer<hip_act_trace_entry_t>(
|
||||
"HIP ACT", 0x200000, &hip_act_flush_prm, 1, 1);
|
||||
hsa_api_trace_buffer =
|
||||
new roctracer::TraceBuffer<hsa_api_trace_entry_t>("HSA API", 0x200000, &hsa_flush_prm, 1);
|
||||
roctracer_load();
|
||||
tool_load();
|
||||
ONLOAD_TRACE_END();
|
||||
|
||||
Reference in New Issue
Block a user