diff --git a/inc/ext/prof_protocol.h b/inc/ext/prof_protocol.h index 07fa6d09a7..f75e7c0dc2 100644 --- a/inc/ext/prof_protocol.h +++ b/inc/ext/prof_protocol.h @@ -25,35 +25,30 @@ // Traced API domains typedef enum { - ACTIVITY_DOMAIN_HSA_API = 0, // HSA API domain - ACTIVITY_DOMAIN_HSA_OPS = 1, // HSA async activity domain - ACTIVITY_DOMAIN_HIP_OPS = 2, // HIP async activity domain - ACTIVITY_DOMAIN_HCC_OPS = ACTIVITY_DOMAIN_HIP_OPS, // HCC async activity domain - ACTIVITY_DOMAIN_HIP_VDI = ACTIVITY_DOMAIN_HIP_OPS, // HIP VDI async activity domain - ACTIVITY_DOMAIN_HIP_API = 3, // HIP API domain - ACTIVITY_DOMAIN_KFD_API = 4, // KFD API domain - ACTIVITY_DOMAIN_EXT_API = 5, // External ID domain - ACTIVITY_DOMAIN_ROCTX = 6, // ROCTX domain - ACTIVITY_DOMAIN_HSA_EVT = 7, // HSA events + ACTIVITY_DOMAIN_HSA_API = 0, // HSA API domain + ACTIVITY_DOMAIN_HSA_OPS = 1, // HSA async activity domain + ACTIVITY_DOMAIN_HIP_OPS = 2, // HIP async activity domain + ACTIVITY_DOMAIN_HCC_OPS = ACTIVITY_DOMAIN_HIP_OPS, // HCC async activity domain + ACTIVITY_DOMAIN_HIP_VDI = ACTIVITY_DOMAIN_HIP_OPS, // HIP VDI async activity domain + ACTIVITY_DOMAIN_HIP_API = 3, // HIP API domain + ACTIVITY_DOMAIN_KFD_API = 4, // KFD API domain + ACTIVITY_DOMAIN_EXT_API = 5, // External ID domain + ACTIVITY_DOMAIN_ROCTX = 6, // ROCTX domain + ACTIVITY_DOMAIN_HSA_EVT = 7, // HSA events ACTIVITY_DOMAIN_NUMBER } activity_domain_t; // Extension API opcodes -typedef enum { - ACTIVITY_EXT_OP_MARK = 0, - ACTIVITY_EXT_OP_EXTERN_ID = 1 -} activity_ext_op_t; +typedef enum { ACTIVITY_EXT_OP_MARK = 0, ACTIVITY_EXT_OP_EXTERN_ID = 1 } activity_ext_op_t; // API calback type -typedef void (*activity_rtapi_callback_t)(uint32_t domain, uint32_t cid, const void* data, void* arg); +typedef void (*activity_rtapi_callback_t)(uint32_t domain, uint32_t cid, const void* data, + void* arg); typedef uint32_t activity_kind_t; typedef uint32_t activity_op_t; // API callback phase -typedef enum { - ACTIVITY_API_PHASE_ENTER = 0, - ACTIVITY_API_PHASE_EXIT = 1 -} activity_api_phase_t; +typedef enum { ACTIVITY_API_PHASE_ENTER = 0, ACTIVITY_API_PHASE_EXIT = 1 } activity_api_phase_t; // Trace record types // Correlation id @@ -61,39 +56,40 @@ typedef uint64_t activity_correlation_id_t; // Activity record type typedef struct activity_record_s { - uint32_t domain; // activity domain id - activity_kind_t kind; // activity kind - activity_op_t op; // activity op - union { - struct { - activity_correlation_id_t correlation_id; // activity ID - uint64_t begin_ns; // host begin timestamp - uint64_t end_ns; // host end timestamp - }; - struct { - uint32_t se; // sampled SE - uint64_t cycle; // sample cycle - uint64_t pc; // sample PC - } pc_sample; + uint32_t domain; // activity domain id + activity_kind_t kind; // activity kind + activity_op_t op; // activity op + union { + struct { + activity_correlation_id_t correlation_id; // activity ID + uint64_t begin_ns; // host begin timestamp + uint64_t end_ns; // host end timestamp }; - union { - struct { - int device_id; // device id - uint64_t queue_id; // queue id - }; - struct { - uint32_t process_id; // device id - uint32_t thread_id; // thread id - }; - struct { - activity_correlation_id_t external_id; // external correlatino id - }; + struct { + uint32_t se; // sampled SE + uint64_t cycle; // sample cycle + uint64_t pc; // sample PC + } pc_sample; + }; + union { + struct { + int device_id; // device id + uint64_t queue_id; // queue id }; - size_t bytes; // data size bytes + struct { + uint32_t process_id; // device id + uint32_t thread_id; // thread id + }; + struct { + activity_correlation_id_t external_id; // external correlatino id + }; + }; + size_t bytes; // data size bytes } activity_record_t; // Activity sync calback type -typedef void* (*activity_sync_callback_t)(uint32_t cid, activity_record_t* record, const void* data, void* arg); +typedef void* (*activity_sync_callback_t)(uint32_t cid, activity_record_t* record, const void* data, + void* arg); // Activity async calback type typedef void (*activity_id_callback_t)(activity_correlation_id_t id); typedef void (*activity_async_callback_t)(uint32_t op, void* record, void* arg); diff --git a/inc/roctracer.h b/inc/roctracer.h index ddb3bfb9aa..168bef0cd2 100644 --- a/inc/roctracer.h +++ b/inc/roctracer.h @@ -81,17 +81,15 @@ typedef activity_domain_t roctracer_domain_t; // Return Op string by given domain and Op code // NULL returned on the error and the library errno is set -const char* roctracer_op_string( - uint32_t domain, // tracing domain - uint32_t op, // activity op ID - uint32_t kind); // activity kind +const char* roctracer_op_string(uint32_t domain, // tracing domain + uint32_t op, // activity op ID + uint32_t kind); // activity kind // Return Op code and kind by given string -roctracer_status_t roctracer_op_code( - uint32_t domain, // tracing domain - const char* str, // [in] op string - uint32_t* op, // [out] op code - uint32_t* kind); // [out] op kind code if not NULL +roctracer_status_t roctracer_op_code(uint32_t domain, // tracing domain + const char* str, // [in] op string + uint32_t* op, // [out] op code + uint32_t* kind); // [out] op kind code if not NULL //////////////////////////////////////////////////////////////////////////////// // Callback API @@ -106,24 +104,22 @@ typedef activity_rtapi_callback_t roctracer_rtapi_callback_t; // Enable runtime API callbacks roctracer_status_t roctracer_enable_op_callback( - activity_domain_t domain, // tracing domain - uint32_t op, // API call ID - activity_rtapi_callback_t callback, // callback function pointer - void* arg); // [in/out] callback arg + activity_domain_t domain, // tracing domain + uint32_t op, // API call ID + activity_rtapi_callback_t callback, // callback function pointer + void* arg); // [in/out] callback arg roctracer_status_t roctracer_enable_domain_callback( - activity_domain_t domain, // tracing domain - activity_rtapi_callback_t callback, // callback function pointer - void* arg); // [in/out] callback arg + activity_domain_t domain, // tracing domain + activity_rtapi_callback_t callback, // callback function pointer + void* arg); // [in/out] callback arg roctracer_status_t roctracer_enable_callback( - activity_rtapi_callback_t callback, // callback function pointer - void* arg); // [in/out] callback arg + activity_rtapi_callback_t callback, // callback function pointer + void* arg); // [in/out] callback arg // Disable runtime API callbacks -roctracer_status_t roctracer_disable_op_callback( - activity_domain_t domain, // tracing domain - uint32_t op); // API call ID -roctracer_status_t roctracer_disable_domain_callback( - activity_domain_t domain); // tracing domain +roctracer_status_t roctracer_disable_op_callback(activity_domain_t domain, // tracing domain + uint32_t op); // API call ID +roctracer_status_t roctracer_disable_domain_callback(activity_domain_t domain); // tracing domain roctracer_status_t roctracer_disable_callback(); //////////////////////////////////////////////////////////////////////////////// @@ -140,33 +136,32 @@ typedef activity_record_t roctracer_record_t; // Return next record static inline roctracer_status_t roctracer_next_record( - const activity_record_t* record, // [in] record ptr - const activity_record_t** next) // [out] next record ptr + const activity_record_t* record, // [in] record ptr + const activity_record_t** next) // [out] next record ptr { *next = record + 1; return ROCTRACER_STATUS_SUCCESS; } // Tracer allocator type -typedef void (*roctracer_allocator_t)( - char** ptr, // memory pointer - size_t size, // memory size - void* arg); // allocator arg +typedef void (*roctracer_allocator_t)(char** ptr, // memory pointer + size_t size, // memory size + void* arg); // allocator arg // Pool callback type typedef void (*roctracer_buffer_callback_t)( - const char* begin, // [in] available buffered trace records - const char* end, // [in] end of buffered trace records - void* arg); // [in/out] callback arg + const char* begin, // [in] available buffered trace records + const char* end, // [in] end of buffered trace records + void* arg); // [in/out] callback arg // Tracer properties typedef struct { - uint32_t mode; // roctracer mode - size_t buffer_size; // buffer size - roctracer_allocator_t alloc_fun; // memory alocator function pointer - void* alloc_arg; // memory alocator function pointer - roctracer_buffer_callback_t buffer_callback_fun; // tracer record callback function - void* buffer_callback_arg; // tracer record callback arg + uint32_t mode; // roctracer mode + size_t buffer_size; // buffer size + roctracer_allocator_t alloc_fun; // memory alocator function pointer + void* alloc_arg; // memory alocator function pointer + roctracer_buffer_callback_t buffer_callback_fun; // tracer record callback function + void* buffer_callback_arg; // tracer record callback arg } roctracer_properties_t; // Tracer memory pool type @@ -175,78 +170,69 @@ typedef void roctracer_pool_t; // Create tracer memory pool // The first invocation sets the default pool roctracer_status_t roctracer_open_pool_expl( - const roctracer_properties_t* properties, // tracer pool properties - roctracer_pool_t** pool); // [out] returns tracer pool if not NULL, - // otherwise sets the default one if it is not set yet + const roctracer_properties_t* properties, // tracer pool properties + roctracer_pool_t** pool); // [out] returns tracer pool if not NULL, + // otherwise sets the default one if it is not set yet static inline roctracer_status_t roctracer_open_pool( - const roctracer_properties_t* properties) // tracer pool properties + const roctracer_properties_t* properties) // tracer pool properties { - return roctracer_open_pool_expl(properties, NULL); + return roctracer_open_pool_expl(properties, NULL); } - // otherwise the error is generated +// otherwise the error is generated // Close tracer memory pool roctracer_status_t roctracer_close_pool_expl( - roctracer_pool_t* pool); // [in] memory pool, NULL is a default one -static inline roctracer_status_t roctracer_close_pool() -{ - return roctracer_close_pool_expl(NULL); -} + roctracer_pool_t* pool); // [in] memory pool, NULL is a default one +static inline roctracer_status_t roctracer_close_pool() { return roctracer_close_pool_expl(NULL); } // Return current default pool // Set new default pool if the argument is not NULL roctracer_pool_t* roctracer_default_pool_expl( - roctracer_pool_t* pool); // [in] new default pool if not NULL -static inline roctracer_pool_t* roctracer_default_pool() -{ - return roctracer_default_pool_expl(NULL); + roctracer_pool_t* pool); // [in] new default pool if not NULL +static inline roctracer_pool_t* roctracer_default_pool() { + return roctracer_default_pool_expl(NULL); } // Enable activity records logging roctracer_status_t roctracer_enable_op_activity_expl( - activity_domain_t domain, // tracing domain - uint32_t op, // activity op ID - roctracer_pool_t* pool); // memory pool, NULL is a default one + activity_domain_t domain, // tracing domain + uint32_t op, // activity op ID + roctracer_pool_t* pool); // memory pool, NULL is a default one static inline roctracer_status_t roctracer_enable_op_activity( - activity_domain_t domain, // tracing domain - uint32_t op) // activity op ID + activity_domain_t domain, // tracing domain + uint32_t op) // activity op ID { - return roctracer_enable_op_activity_expl(domain, op, NULL); + return roctracer_enable_op_activity_expl(domain, op, NULL); } roctracer_status_t roctracer_enable_domain_activity_expl( - activity_domain_t domain, // tracing domain - roctracer_pool_t* pool); // memory pool, NULL is a default one + activity_domain_t domain, // tracing domain + roctracer_pool_t* pool); // memory pool, NULL is a default one static inline roctracer_status_t roctracer_enable_domain_activity( - activity_domain_t domain) // tracing domain + activity_domain_t domain) // tracing domain { - return roctracer_enable_domain_activity_expl(domain, NULL); + return roctracer_enable_domain_activity_expl(domain, NULL); } roctracer_status_t roctracer_enable_activity_expl( - roctracer_pool_t* pool); // memory pool, NULL is a default one -static inline roctracer_status_t roctracer_enable_activity() -{ - return roctracer_enable_activity_expl(NULL); + roctracer_pool_t* pool); // memory pool, NULL is a default one +static inline roctracer_status_t roctracer_enable_activity() { + return roctracer_enable_activity_expl(NULL); } // Disable activity records logging -roctracer_status_t roctracer_disable_op_activity( - activity_domain_t domain, // tracing domain - uint32_t op); // activity op ID -roctracer_status_t roctracer_disable_domain_activity( - activity_domain_t domain); // tracing domain +roctracer_status_t roctracer_disable_op_activity(activity_domain_t domain, // tracing domain + uint32_t op); // activity op ID +roctracer_status_t roctracer_disable_domain_activity(activity_domain_t domain); // tracing domain roctracer_status_t roctracer_disable_activity(); // Flush available activity records roctracer_status_t roctracer_flush_activity_expl( - roctracer_pool_t* pool); // memory pool, NULL is a default one -static inline roctracer_status_t roctracer_flush_activity() -{ - return roctracer_flush_activity_expl(NULL); + roctracer_pool_t* pool); // memory pool, NULL is a default one +static inline roctracer_status_t roctracer_flush_activity() { + return roctracer_flush_activity_expl(NULL); } // Get system timestamp -roctracer_status_t roctracer_get_timestamp( - uint64_t* timestamp); // [out] return timestamp +roctracer_status_t roctracer_get_timestamp(uint64_t* timestamp); // [out] return timestamp // Load/Unload methods bool roctracer_load(); @@ -254,9 +240,8 @@ void roctracer_unload(); void roctracer_flush_buf(); // Set properties -roctracer_status_t roctracer_set_properties( - roctracer_domain_t domain, // tracing domain - void* propertes); // tracing properties +roctracer_status_t roctracer_set_properties(roctracer_domain_t domain, // tracing domain + void* propertes); // tracing properties #ifdef __cplusplus } // extern "C" block diff --git a/inc/roctracer_cb_table.h b/inc/roctracer_cb_table.h index 0cf65fbfea..def96b3488 100644 --- a/inc/roctracer_cb_table.h +++ b/inc/roctracer_cb_table.h @@ -28,9 +28,8 @@ namespace roctracer { // Generic callbacks table -template -class CbTable { - public: +template class CbTable { + public: typedef std::mutex mutex_t; CbTable() { @@ -63,7 +62,7 @@ class CbTable { return ret; } - private: + private: activity_rtapi_callback_t callback_[N]; void* arg_[N]; mutex_t mutex_; diff --git a/inc/roctracer_ext.h b/inc/roctracer_ext.h index e41bd09402..ccd56617a3 100644 --- a/inc/roctracer_ext.h +++ b/inc/roctracer_ext.h @@ -62,7 +62,8 @@ roctracer_status_t roctracer_activity_push_external_correlation_id(activity_corr // Notifies that the calling thread is leaving an external API region. // Pop an external correlation id for the calling thread. // 'lastId' returns the last external correlation if not NULL -roctracer_status_t roctracer_activity_pop_external_correlation_id(activity_correlation_id_t* last_id); +roctracer_status_t roctracer_activity_pop_external_correlation_id( + activity_correlation_id_t* last_id); #ifdef __cplusplus } // extern "C" block diff --git a/inc/roctracer_hcc.h b/inc/roctracer_hcc.h index 566061b6d0..16800bd11c 100644 --- a/inc/roctracer_hcc.h +++ b/inc/roctracer_hcc.h @@ -21,19 +21,14 @@ #ifndef INC_ROCTRACER_HCC_H_ #define INC_ROCTRACER_HCC_H_ -enum { - HIP_OP_ID_DISPATCH = 0, - HIP_OP_ID_COPY = 1, - HIP_OP_ID_BARRIER = 2, - HIP_OP_ID_NUMBER = 3 -}; +enum { HIP_OP_ID_DISPATCH = 0, HIP_OP_ID_COPY = 1, HIP_OP_ID_BARRIER = 2, HIP_OP_ID_NUMBER = 3 }; #ifdef __cplusplus extern "C" { #endif -typedef void (hipInitAsyncActivityCallback_t)(void* id_callback, void* op_callback, void* arg); -typedef bool (hipEnableAsyncActivityCallback_t)(unsigned op, bool enable); -typedef const char* (hipGetOpName_t)(unsigned op); +typedef void(hipInitAsyncActivityCallback_t)(void* id_callback, void* op_callback, void* arg); +typedef bool(hipEnableAsyncActivityCallback_t)(unsigned op, bool enable); +typedef const char*(hipGetOpName_t)(unsigned op); #ifdef __cplusplus } #endif diff --git a/inc/roctracer_hip.h b/inc/roctracer_hip.h index 3a8de0ccb1..fc12e6770b 100644 --- a/inc/roctracer_hip.h +++ b/inc/roctracer_hip.h @@ -25,12 +25,12 @@ #include inline static std::ostream& operator<<(std::ostream& out, const unsigned char& v) { - out << (unsigned int)v; + out << (unsigned int)v; return out; } inline static std::ostream& operator<<(std::ostream& out, const char& v) { - out << (unsigned char)v; + out << (unsigned char)v; return out; } #endif // __cplusplus diff --git a/inc/roctracer_hsa.h b/inc/roctracer_hsa.h index 46de976a40..1166e7a889 100644 --- a/inc/roctracer_hsa.h +++ b/inc/roctracer_hsa.h @@ -42,9 +42,7 @@ enum hsa_op_id_t { namespace roctracer { namespace hsa_support { -enum { - HSA_OP_ID_async_copy = 0 -}; +enum { HSA_OP_ID_async_copy = 0 }; extern CoreApiTable CoreApiTable_saved; extern AmdExtTable AmdExtTable_saved; @@ -57,18 +55,18 @@ struct ops_properties_t { const char* output_prefix; }; -}; // namespace hsa_support +}; // namespace hsa_support typedef hsa_support::ops_properties_t hsa_ops_properties_t; -}; // namespace roctracer +}; // namespace roctracer #include "hsa_ostream_ops.h" -#else // !__cplusplus +#else // !__cplusplus typedef void* hsa_amd_queue_intercept_handler; typedef void* hsa_amd_runtime_queue_notifier; -#endif //! __cplusplus +#endif //! __cplusplus #include -#endif // INC_ROCTRACER_HSA_H_ +#endif // INC_ROCTRACER_HSA_H_ diff --git a/inc/roctracer_roctx.h b/inc/roctracer_roctx.h index 456e6242dc..0c08dafc21 100644 --- a/inc/roctracer_roctx.h +++ b/inc/roctracer_roctx.h @@ -47,8 +47,8 @@ enum roctx_api_id_t { typedef struct roctx_api_data_s { union { struct { - const char* message; - roctx_range_id_t id; + const char* message; + roctx_range_id_t id; }; struct { const char* message; diff --git a/inc/roctx.h b/inc/roctx.h index 76bbd0cf66..1f7f6f74b4 100644 --- a/inc/roctx.h +++ b/inc/roctx.h @@ -59,8 +59,8 @@ void roctxMarkA(const char* message); //////////////////////////////////////////////////////////////////////////////// // Ranges annotating API -// Returns the 0 based level of a nested range being started by given message associated to this range. -// A negative value is returned on the error. +// Returns the 0 based level of a nested range being started by given message associated to this +// range. A negative value is returned on the error. int roctxRangePushA(const char* message); #define roctxRangePush(message) roctxRangePushA(message) diff --git a/src/core/hip_act_cb_tracker.h b/src/core/hip_act_cb_tracker.h index f6835fe241..a3ed2c6f13 100644 --- a/src/core/hip_act_cb_tracker.h +++ b/src/core/hip_act_cb_tracker.h @@ -27,7 +27,7 @@ namespace roctracer { enum { API_CB_MASK = 0x1, ACT_CB_MASK = 0x2 }; class hip_act_cb_tracker_t { - private: + private: std::map data; public: diff --git a/src/core/loader.h b/src/core/loader.h index 4980fbabee..68906ff1e1 100644 --- a/src/core/loader.h +++ b/src/core/loader.h @@ -25,29 +25,29 @@ #include #include -#define ONLD_TRACE(str) \ - if (getenv("ROCP_ONLOAD_TRACE")) do { \ - std::cout << "PID(" << GetPid() << "): TRACER_LOADER::" << __FUNCTION__ << " " << str << std::endl << std::flush; \ - } while(0); +#define ONLD_TRACE(str) \ + if (getenv("ROCP_ONLOAD_TRACE")) do { \ + std::cout << "PID(" << GetPid() << "): TRACER_LOADER::" << __FUNCTION__ << " " << str \ + << std::endl \ + << std::flush; \ + } while (0); namespace roctracer { // Base runtime loader class -template -class BaseLoader : public T { +template class BaseLoader : public T { static uint32_t GetPid() { return syscall(__NR_getpid); } - public: + public: typedef std::mutex mutex_t; typedef BaseLoader loader_t; bool Enabled() const { return (handle_ != NULL); } - template - fun_t* GetFun(const char* fun_name) { + template fun_t* GetFun(const char* fun_name) { if (handle_ == NULL) return NULL; - fun_t *f = (fun_t*) dlsym(handle_, fun_name); + fun_t* f = (fun_t*)dlsym(handle_, fun_name); if ((to_check_symb_ == true) && (f == NULL)) { fprintf(stderr, "roctracer: symbol lookup '%s' failed: \"%s\"\n", fun_name, dlerror()); abort(); @@ -68,11 +68,11 @@ class BaseLoader : public T { } static loader_t* GetRef() { return instance_; } - static void SetLibName(const char *name) { lib_name_ = name; } + static void SetLibName(const char* name) { lib_name_ = name; } - private: + private: BaseLoader() { - const int flags = (to_load_ == true) ? RTLD_LAZY : RTLD_LAZY|RTLD_NOLOAD; + const int flags = (to_load_ == true) ? RTLD_LAZY : RTLD_LAZY | RTLD_NOLOAD; handle_ = dlopen(lib_name_, flags); ONLD_TRACE("(" << lib_name_ << " = " << handle_ << ")"); if ((to_check_open_ == true) && (handle_ == NULL)) { @@ -99,14 +99,14 @@ class BaseLoader : public T { // 'rocprofiler' library loader class class RocpApi { - public: + public: typedef BaseLoader Loader; - typedef bool (RegisterCallback_t)(uint32_t op, void* callback, void* arg); - typedef bool (OperateCallback_t)(uint32_t op); - typedef bool (InitCallback_t)(void* callback, void* arg); - typedef bool (EnableCallback_t)(uint32_t op, bool enable); - typedef const char* (NameCallback_t)(uint32_t op); + typedef bool(RegisterCallback_t)(uint32_t op, void* callback, void* arg); + typedef bool(OperateCallback_t)(uint32_t op); + typedef bool(InitCallback_t)(void* callback, void* arg); + typedef bool(EnableCallback_t)(uint32_t op, bool enable); + typedef const char*(NameCallback_t)(uint32_t op); RegisterCallback_t* RegisterApiCallback; OperateCallback_t* RemoveApiCallback; @@ -118,7 +118,7 @@ class RocpApi { OperateCallback_t* RemoveEvtCallback; NameCallback_t* GetEvtName; - protected: + protected: void init(Loader* loader) { RegisterApiCallback = loader->GetFun("RegisterApiCallback"); RemoveApiCallback = loader->GetFun("RemoveApiCallback"); @@ -134,29 +134,36 @@ class RocpApi { // HIP runtime library loader class #if STATIC_BUILD -__attribute__((weak)) hipError_t hipRegisterApiCallback(uint32_t id, void* fun, void* arg) { return hipErrorUnknown; } +__attribute__((weak)) hipError_t hipRegisterApiCallback(uint32_t id, void* fun, void* arg) { + return hipErrorUnknown; +} __attribute__((weak)) hipError_t hipRemoveApiCallback(uint32_t id) { return hipErrorUnknown; } -__attribute__((weak)) hipError_t hipRegisterActivityCallback(uint32_t id, void* fun, void* arg) { return hipErrorUnknown; } +__attribute__((weak)) hipError_t hipRegisterActivityCallback(uint32_t id, void* fun, void* arg) { + return hipErrorUnknown; +} __attribute__((weak)) hipError_t hipRemoveActivityCallback(uint32_t id) { return hipErrorUnknown; } __attribute__((weak)) const char* hipKernelNameRef(const hipFunction_t f) { return NULL; } -__attribute__((weak)) const char* hipKernelNameRefByPtr(const void* hostFunction, hipStream_t stream) { return NULL; } +__attribute__((weak)) const char* hipKernelNameRefByPtr(const void* hostFunction, + hipStream_t stream) { + return NULL; +} __attribute__((weak)) int hipGetStreamDeviceId(hipStream_t stream) { return 0; } __attribute__((weak)) const char* hipApiName(uint32_t id) { return NULL; } class HipLoaderStatic { - public: + public: typedef std::mutex mutex_t; typedef HipLoaderStatic loader_t; typedef std::atomic instance_t; - typedef hipError_t (RegisterApiCallback_t)(uint32_t id, void* fun, void* arg); - typedef hipError_t (RemoveApiCallback_t)(uint32_t id); - typedef hipError_t (RegisterActivityCallback_t)(uint32_t id, void* fun, void* arg); - typedef hipError_t (RemoveActivityCallback_t)(uint32_t id); - typedef const char* (KernelNameRef_t)(const hipFunction_t f); - typedef const char* (KernelNameRefByPtr_t)(const void* hostFunction, hipStream_t stream); - typedef int (GetStreamDeviceId_t)(hipStream_t stream); - typedef const char* (ApiName_t)(uint32_t id); + typedef hipError_t(RegisterApiCallback_t)(uint32_t id, void* fun, void* arg); + typedef hipError_t(RemoveApiCallback_t)(uint32_t id); + typedef hipError_t(RegisterActivityCallback_t)(uint32_t id, void* fun, void* arg); + typedef hipError_t(RemoveActivityCallback_t)(uint32_t id); + typedef const char*(KernelNameRef_t)(const hipFunction_t f); + typedef const char*(KernelNameRefByPtr_t)(const void* hostFunction, hipStream_t stream); + typedef int(GetStreamDeviceId_t)(hipStream_t stream); + typedef const char*(ApiName_t)(uint32_t id); RegisterApiCallback_t* RegisterApiCallback; RemoveApiCallback_t* RemoveApiCallback; @@ -181,7 +188,7 @@ class HipLoaderStatic { bool Enabled() const { return true; } - private: + private: HipLoaderStatic() { RegisterApiCallback = hipRegisterApiCallback; RemoveApiCallback = hipRemoveApiCallback; @@ -189,7 +196,7 @@ class HipLoaderStatic { RemoveActivityCallback = hipRemoveActivityCallback; KernelNameRef = hipKernelNameRef; KernelNameRefByPtr = hipKernelNameRefByPtr; - GetStreamDeviceId = hipGetStreamDeviceId; + GetStreamDeviceId = hipGetStreamDeviceId; ApiName = hipApiName; } @@ -198,7 +205,7 @@ class HipLoaderStatic { }; #else class HipApi { - public: + public: typedef BaseLoader Loader; typedef decltype(hipRegisterApiCallback) RegisterApiCallback_t; @@ -219,15 +226,16 @@ class HipApi { GetStreamDeviceId_t* GetStreamDeviceId; ApiName_t* ApiName; - protected: + protected: void init(Loader* loader) { RegisterApiCallback = loader->GetFun("hipRegisterApiCallback"); RemoveApiCallback = loader->GetFun("hipRemoveApiCallback"); - RegisterActivityCallback = loader->GetFun("hipRegisterActivityCallback"); + RegisterActivityCallback = + loader->GetFun("hipRegisterActivityCallback"); RemoveActivityCallback = loader->GetFun("hipRemoveActivityCallback"); KernelNameRef = loader->GetFun("hipKernelNameRef"); KernelNameRefByPtr = loader->GetFun("hipKernelNameRefByPtr"); - GetStreamDeviceId = loader->GetFun("hipGetStreamDeviceId"); + GetStreamDeviceId = loader->GetFun("hipGetStreamDeviceId"); ApiName = loader->GetFun("hipApiName"); } }; @@ -236,17 +244,19 @@ class HipApi { // HCC runtime library loader class #include "inc/roctracer_hcc.h" class HccApi { - public: + public: typedef BaseLoader Loader; hipInitAsyncActivityCallback_t* InitActivityCallback; hipEnableAsyncActivityCallback_t* EnableActivityCallback; hipGetOpName_t* GetOpName; - protected: + protected: void init(Loader* loader) { - InitActivityCallback = loader->GetFun("hipInitActivityCallback"); - EnableActivityCallback = loader->GetFun("hipEnableActivityCallback"); + InitActivityCallback = + loader->GetFun("hipInitActivityCallback"); + EnableActivityCallback = + loader->GetFun("hipEnableActivityCallback"); GetOpName = loader->GetFun("hipGetCmdName"); } }; @@ -254,7 +264,7 @@ class HccApi { // rocTX runtime library loader class #include "inc/roctracer_roctx.h" class RocTxApi { - public: + public: typedef BaseLoader Loader; typedef decltype(RegisterApiCallback) RegisterApiCallback_t; @@ -265,7 +275,7 @@ class RocTxApi { RemoveApiCallback_t* RemoveApiCallback; RangeStackIterate_t* RangeStackIterate; - protected: + protected: void init(Loader* loader) { RegisterApiCallback = loader->GetFun("RegisterApiCallback"); RemoveApiCallback = loader->GetFun("RemoveApiCallback"); @@ -284,31 +294,31 @@ typedef BaseLoader HipLoaderShared; typedef HipLoaderShared HipLoader; #endif -} // namespace roctracer +} // namespace roctracer -#define LOADER_INSTANTIATE_2() \ - template typename roctracer::BaseLoader::mutex_t roctracer::BaseLoader::mutex_; \ - template std::atomic*> roctracer::BaseLoader::instance_{}; \ - template bool roctracer::BaseLoader::to_load_ = false; \ - template bool roctracer::BaseLoader::to_check_open_ = true; \ - template bool roctracer::BaseLoader::to_check_symb_ = true; \ - template<> const char* roctracer::RocpLoader::lib_name_ = "librocprofiler64.so"; \ - template<> bool roctracer::RocpLoader::to_load_ = true; \ - template<> const char* roctracer::HccLoader::lib_name_ = "libamdhip64.so"; \ - template<> const char* roctracer::RocTxLoader::lib_name_ = "libroctx64.so"; \ - template<> bool roctracer::RocTxLoader::to_load_ = true; +#define LOADER_INSTANTIATE_2() \ + template typename roctracer::BaseLoader::mutex_t roctracer::BaseLoader::mutex_; \ + template std::atomic*> roctracer::BaseLoader::instance_{}; \ + template bool roctracer::BaseLoader::to_load_ = false; \ + template bool roctracer::BaseLoader::to_check_open_ = true; \ + template bool roctracer::BaseLoader::to_check_symb_ = true; \ + template <> const char* roctracer::RocpLoader::lib_name_ = "librocprofiler64.so"; \ + template <> bool roctracer::RocpLoader::to_load_ = true; \ + template <> const char* roctracer::HccLoader::lib_name_ = "libamdhip64.so"; \ + template <> const char* roctracer::RocTxLoader::lib_name_ = "libroctx64.so"; \ + template <> bool roctracer::RocTxLoader::to_load_ = true; #if STATIC_BUILD -#define LOADER_INSTANTIATE_HIP() \ - roctracer::HipLoaderStatic::mutex_t roctracer::HipLoaderStatic::mutex_; \ +#define LOADER_INSTANTIATE_HIP() \ + roctracer::HipLoaderStatic::mutex_t roctracer::HipLoaderStatic::mutex_; \ roctracer::HipLoaderStatic::instance_t roctracer::HipLoaderStatic::instance_{}; #else -#define LOADER_INSTANTIATE_HIP() \ - template<> const char* roctracer::HipLoaderShared::lib_name_ = "libamdhip64.so"; +#define LOADER_INSTANTIATE_HIP() \ + template <> const char* roctracer::HipLoaderShared::lib_name_ = "libamdhip64.so"; #endif -#define LOADER_INSTANTIATE() \ - LOADER_INSTANTIATE_2(); \ +#define LOADER_INSTANTIATE() \ + LOADER_INSTANTIATE_2(); \ LOADER_INSTANTIATE_HIP(); -#endif // SRC_CORE_LOADER_H_ +#endif // SRC_CORE_LOADER_H_ diff --git a/src/core/memory_pool.h b/src/core/memory_pool.h index 5b10d1f31f..784565cec5 100644 --- a/src/core/memory_pool.h +++ b/src/core/memory_pool.h @@ -42,7 +42,7 @@ namespace roctracer { class MemoryPool { - public: + public: typedef std::mutex mutex_t; static void allocator_default(char** ptr, size_t size, void* arg) { @@ -52,12 +52,12 @@ class MemoryPool { } else if (size != 0) { *ptr = reinterpret_cast(realloc(*ptr, size)); } else { - free(*ptr); + free(*ptr); *ptr = NULL; } } - MemoryPool(const roctracer_properties_t& properties) { + MemoryPool(const roctracer_properties_t& properties) { // Assigning pool allocator alloc_fun_ = allocator_default; alloc_arg_ = NULL; @@ -89,14 +89,14 @@ class MemoryPool { ~MemoryPool() { Flush(); PTHREAD_CALL(pthread_cancel(consumer_thread_)); - void *res; + void* res; PTHREAD_CALL(pthread_join(consumer_thread_, &res)); - if (res != PTHREAD_CANCELED) EXC_ABORT(ROCTRACER_STATUS_ERROR, "consumer thread wasn't stopped correctly"); + if (res != PTHREAD_CANCELED) + EXC_ABORT(ROCTRACER_STATUS_ERROR, "consumer thread wasn't stopped correctly"); allocator_default(&pool_begin_, 0, alloc_arg_); } - template - void Write(const Record& record) { + template void Write(const Record& record) { std::lock_guard lock(write_mutex_); getRecord(record); } @@ -112,7 +112,7 @@ class MemoryPool { } } - private: + private: struct consumer_arg_t { MemoryPool* obj; const char* begin; @@ -126,11 +126,13 @@ class MemoryPool { } }; - template - Record* getRecord(const Record& init) { + template Record* getRecord(const Record& init) { char* next = write_ptr_ + sizeof(Record); if (next > buffer_end_) { - if (write_ptr_ == buffer_begin_) EXC_ABORT(ROCTRACER_STATUS_ERROR, "buffer size(" << buffer_size_ << ") is less then the record(" << sizeof(Record) << ")"); + if (write_ptr_ == buffer_begin_) + EXC_ABORT(ROCTRACER_STATUS_ERROR, + "buffer size(" << buffer_size_ << ") is less then the record(" << sizeof(Record) + << ")"); spawn_reader(buffer_begin_, write_ptr_); buffer_begin_ = (buffer_end_ == pool_end_) ? pool_begin_ : buffer_end_; buffer_end_ = buffer_begin_ + buffer_size_; @@ -145,12 +147,10 @@ class MemoryPool { return ptr; } - static void reset_reader(consumer_arg_t* arg) { - arg->valid.store(false); - } + static void reset_reader(consumer_arg_t* arg) { arg->valid.store(false); } static void sync_reader(const consumer_arg_t* arg) { - while(arg->valid.load() == true) PTHREAD_CALL(sched_yield()); + while (arg->valid.load() == true) PTHREAD_CALL(sched_yield()); } static void* reader_fun(void* consumer_arg) { diff --git a/src/core/roctracer.cpp b/src/core/roctracer.cpp index ce8fff3df1..4464b1404e 100644 --- a/src/core/roctracer.cpp +++ b/src/core/roctracer.cpp @@ -66,7 +66,6 @@ #define API_METHOD_PREFIX \ roctracer_status_t err = ROCTRACER_STATUS_SUCCESS; \ try { - #define API_METHOD_SUFFIX \ } \ catch (std::exception & e) { \ @@ -83,10 +82,12 @@ (void)err; \ return X; -#define ONLOAD_TRACE(str) \ - if (getenv("ROCP_ONLOAD_TRACE")) do { \ - std::cout << "PID(" << GetPid() << "): TRACER_LIB::" << __FUNCTION__ << " " << str << std::endl << std::flush; \ - } while(0); +#define ONLOAD_TRACE(str) \ + if (getenv("ROCP_ONLOAD_TRACE")) do { \ + std::cout << "PID(" << GetPid() << "): TRACER_LIB::" << __FUNCTION__ << " " << str \ + << std::endl \ + << std::flush; \ + } while (0); #define ONLOAD_TRACE_BEG() ONLOAD_TRACE("begin") #define ONLOAD_TRACE_END() ONLOAD_TRACE("end") @@ -95,7 +96,8 @@ static inline uint32_t GetPid() { return syscall(__NR_getpid); } /////////////////////////////////////////////////////////////////////////////////////////////////// // Mark callback // -typedef void (mark_api_callback_t)(uint32_t domain, uint32_t cid, const void* callback_data, void* arg); +typedef void(mark_api_callback_t)(uint32_t domain, uint32_t cid, const void* callback_data, + void* arg); mark_api_callback_t* mark_api_callback_ptr = NULL; /////////////////////////////////////////////////////////////////////////////////////////////////// @@ -195,7 +197,8 @@ ActJournal* act_journal; template struct journal_functor_t { Functor func_; journal_functor_t(Functor&& f) : func_(std::forward(f)) {} - template bool operator ()(activity_domain_t domain, uint32_t op, Data&& /* data */) const { + template + bool operator()(activity_domain_t domain, uint32_t op, Data&& /* data */) const { func_(domain, op); return true; } @@ -208,13 +211,13 @@ using act_dis_functor_t = journal_functor_t; template <> template -bool cb_en_functor_t::operator ()(activity_domain_t domain, uint32_t op, Data&& data) const { +bool cb_en_functor_t::operator()(activity_domain_t domain, uint32_t op, Data&& data) const { func_(domain, op, data.callback, data.user_data); return true; } template <> template -bool act_en_functor_t::operator ()(activity_domain_t domain, uint32_t op, Data&& data) const { +bool act_en_functor_t::operator()(activity_domain_t domain, uint32_t op, Data&& data) const { func_(domain, op, data.pool); return true; } @@ -222,9 +225,7 @@ bool act_en_functor_t::operator ()(activity_domain_t domain, uint32_t op, Data&& void hsa_async_copy_handler(::proxy::Tracker::entry_t* entry); void hsa_kernel_handler(::proxy::Tracker::entry_t* entry); constexpr TraceBuffer::flush_prm_t trace_buffer_prm[] = { - {COPY_ENTRY_TYPE, hsa_async_copy_handler}, - {KERNEL_ENTRY_TYPE, hsa_kernel_handler} -}; + {COPY_ENTRY_TYPE, hsa_async_copy_handler}, {KERNEL_ENTRY_TYPE, hsa_kernel_handler}}; TraceBuffer* trace_buffer = NULL; namespace hsa_support { @@ -250,18 +251,19 @@ roctracer_stop_cb_t roctracer_stop_cb = NULL; roctracer_status_t GetExcStatus(const std::exception& e) { const util::exception* roctracer_exc_ptr = dynamic_cast(&e); - return (roctracer_exc_ptr) ? static_cast(roctracer_exc_ptr->status()) : ROCTRACER_STATUS_ERROR; + return (roctracer_exc_ptr) ? static_cast(roctracer_exc_ptr->status()) + : ROCTRACER_STATUS_ERROR; } class GlobalCounter { - public: + public: typedef std::mutex mutex_t; typedef uint64_t counter_t; typedef std::atomic atomic_counter_t; static counter_t Increment() { return counter_.fetch_add(1, std::memory_order_relaxed); } - private: + private: static mutex_t mutex_; static atomic_counter_t counter_; }; @@ -273,12 +275,12 @@ struct roctracer_api_data_t { union { hip_api_data_t hip; }; - roctracer_api_data_t() {}; + roctracer_api_data_t(){}; }; struct record_pair_t { roctracer_record_t record; roctracer_api_data_t data; - record_pair_t() {}; + record_pair_t(){}; }; typedef std::stack record_pair_stack_t; static thread_local record_pair_stack_t* record_pair_stack = NULL; @@ -295,15 +297,18 @@ static thread_local std::stack external_id_stack; static inline void CorrelationIdRegistr(const activity_correlation_id_t& correlation_id) { std::lock_guard lck(correlation_id_mutex); const auto ret = correlation_id_map.insert({correlation_id, correlation_id_tls}); - if (ret.second == false) EXC_ABORT(ROCTRACER_STATUS_ERROR, "HCC activity id is not unique(" << correlation_id << ")"); + if (ret.second == false) + EXC_ABORT(ROCTRACER_STATUS_ERROR, "HCC activity id is not unique(" << correlation_id << ")"); DEBUG_TRACE("CorrelationIdRegistr id(%lu) id_tls(%lu)\n", correlation_id, correlation_id_tls); } -static inline activity_correlation_id_t CorrelationIdLookup(const activity_correlation_id_t& correlation_id) { +static inline activity_correlation_id_t CorrelationIdLookup( + const activity_correlation_id_t& correlation_id) { std::lock_guard lck(correlation_id_mutex); auto it = correlation_id_map.find(correlation_id); - if (it == correlation_id_map.end()) EXC_ABORT(ROCTRACER_STATUS_ERROR, "HCC activity id lookup failed(" << correlation_id << ")"); + if (it == correlation_id_map.end()) + EXC_ABORT(ROCTRACER_STATUS_ERROR, "HCC activity id lookup failed(" << correlation_id << ")"); const activity_correlation_id_t ret_val = it->second; correlation_id_map.erase(it); @@ -320,14 +325,14 @@ hip_act_cb_tracker_t* hip_act_cb_tracker = NULL; inline uint32_t HipApiActivityEnableCheck(uint32_t op) { if (hip_act_cb_tracker == NULL) EXC_ABORT(ROCTRACER_STATUS_ERROR, "hip_act_cb_tracker is NULL"); const uint32_t mask = hip_act_cb_tracker->enable_check(op, API_CB_MASK); - const uint32_t ret = (mask & ACT_CB_MASK); + const uint32_t ret = (mask & ACT_CB_MASK); return ret; } inline uint32_t HipApiActivityDisableCheck(uint32_t op) { if (hip_act_cb_tracker == NULL) EXC_ABORT(ROCTRACER_STATUS_ERROR, "hip_act_cb_tracker is NULL"); const uint32_t mask = hip_act_cb_tracker->disable_check(op, API_CB_MASK); - const uint32_t ret = (mask & ACT_CB_MASK); + const uint32_t ret = (mask & ACT_CB_MASK); return ret; } @@ -340,16 +345,12 @@ inline uint32_t HipActActivityEnableCheck(uint32_t op) { inline uint32_t HipActActivityDisableCheck(uint32_t op) { if (hip_act_cb_tracker == NULL) EXC_ABORT(ROCTRACER_STATUS_ERROR, "hip_act_cb_tracker is NULL"); const uint32_t mask = hip_act_cb_tracker->disable_check(op, ACT_CB_MASK); - const uint32_t ret = (mask & API_CB_MASK); + const uint32_t ret = (mask & API_CB_MASK); return ret; } -void* HIP_SyncApiDataCallback( - uint32_t op_id, - roctracer_record_t* record, - const void* callback_data, - void* arg) -{ +void* HIP_SyncApiDataCallback(uint32_t op_id, roctracer_record_t* record, const void* callback_data, + void* arg) { static hsa_rt_utils::Timer timer; if (record_pair_stack == NULL) record_pair_stack = new record_pair_stack_t; @@ -397,19 +398,18 @@ void* HIP_SyncApiDataCallback( correlation_id_tls = 0; } - const char * name = roctracer_op_string(ACTIVITY_DOMAIN_HIP_API, op_id, 0); - DEBUG_TRACE("HIP_SyncApiDataCallback(\"%s\") phase(%d): op(%u) record(%p) data(%p) pool(%p) depth(%d) correlation_id(%lu) time_ns(%lu)\n", - name, phase, op_id, record, data, pool, (int)(record_pair_stack->size()), (data_ptr) ? data_ptr->correlation_id : 0, timer.timestamp_ns()); + const char* name = roctracer_op_string(ACTIVITY_DOMAIN_HIP_API, op_id, 0); + DEBUG_TRACE( + "HIP_SyncApiDataCallback(\"%s\") phase(%d): op(%u) record(%p) data(%p) pool(%p) depth(%d) " + "correlation_id(%lu) time_ns(%lu)\n", + name, phase, op_id, record, data, pool, (int)(record_pair_stack->size()), + (data_ptr) ? data_ptr->correlation_id : 0, timer.timestamp_ns()); return ret; } -void* HIP_SyncActivityCallback( - uint32_t op_id, - roctracer_record_t* record, - const void* callback_data, - void* arg) -{ +void* HIP_SyncActivityCallback(uint32_t op_id, roctracer_record_t* record, + const void* callback_data, void* arg) { static hsa_rt_utils::Timer timer; const timestamp_t timestamp_ns = timer.timestamp_ns(); if (record_pair_stack == NULL) record_pair_stack = new record_pair_stack_t; @@ -462,8 +462,9 @@ void* HIP_SyncActivityCallback( // Getting record of stacked if (record == NULL) { - if (record_pair_stack->empty()) EXC_ABORT(ROCTRACER_STATUS_ERROR, "ActivityCallback exit: record stack is empty"); - auto& top = record_pair_stack->top(); + if (record_pair_stack->empty()) + EXC_ABORT(ROCTRACER_STATUS_ERROR, "ActivityCallback exit: record stack is empty"); + auto& top = record_pair_stack->top(); record = &(top.record); } @@ -491,9 +492,12 @@ void* HIP_SyncActivityCallback( correlation_id_tls = 0; } - const char * name = roctracer_op_string(ACTIVITY_DOMAIN_HIP_API, op_id, 0); - DEBUG_TRACE("HIP_SyncActivityCallback(\"%s\") phase(%d): op(%u) record(%p) data(%p) pool(%p) depth(%d) correlation_id(%lu) beg_ns(%lu) end_ns(%lu)\n", - name, phase, op_id, record, data, pool, (int)(record_pair_stack->size()), (data_ptr) ? data_ptr->correlation_id : 0, timestamp_ns); + const char* name = roctracer_op_string(ACTIVITY_DOMAIN_HIP_API, op_id, 0); + DEBUG_TRACE( + "HIP_SyncActivityCallback(\"%s\") phase(%d): op(%u) record(%p) data(%p) pool(%p) depth(%d) " + "correlation_id(%lu) beg_ns(%lu) end_ns(%lu)\n", + name, phase, op_id, record, data, pool, (int)(record_pair_stack->size()), + (data_ptr) ? data_ptr->correlation_id : 0, timestamp_ns); return ret; } @@ -510,9 +514,12 @@ void HCC_AsyncActivityCallback(uint32_t op_id, void* record, void* arg) { if (record_ptr->correlation_id == 0) return; pool->Write(*record_ptr); - const char * name = roctracer_op_string(ACTIVITY_DOMAIN_HCC_OPS, record_ptr->op, record_ptr->kind); - DEBUG_TRACE("HCC_AsyncActivityCallback(\"%s\"): op(%u) kind(%u) record(%p) pool(%p) correlation_id(%d) beg_ns(%lu) end_ns(%lu)\n", - name, record_ptr->op, record_ptr->kind, record, pool, record_ptr->correlation_id, record_ptr->begin_ns, record_ptr->end_ns); + const char* name = roctracer_op_string(ACTIVITY_DOMAIN_HCC_OPS, record_ptr->op, record_ptr->kind); + DEBUG_TRACE( + "HCC_AsyncActivityCallback(\"%s\"): op(%u) kind(%u) record(%p) pool(%p) correlation_id(%d) " + "beg_ns(%lu) end_ns(%lu)\n", + name, record_ptr->op, record_ptr->kind, record, pool, record_ptr->correlation_id, + record_ptr->begin_ns, record_ptr->end_ns); } // Open output file @@ -528,7 +535,8 @@ FILE* open_output_file(const char* prefix, const char* name) { perror(errmsg.str().c_str()); abort(); } - } else file_handle = stdout; + } else + file_handle = stdout; return file_handle; } @@ -542,16 +550,11 @@ void hsa_kernel_handler(::proxy::Tracker::entry_t* entry) { if (index == 0) { kernel_file_handle = open_output_file(hsa_support::output_prefix, "results.txt"); } - fprintf(kernel_file_handle, "dispatch[%lu], gpu-id(%u), tid(%u), kernel-name(\"%s\"), time(%lu,%lu,%lu,%lu)\n", - index, - //::util::HsaRsrcFactory::Instance().GetAgentInfo(entry->agent)->dev_index, - entry->dev_index, - entry->kernel.tid, - entry->kernel.name, - entry->dispatch, - entry->begin, - entry->end, - entry->complete); + fprintf(kernel_file_handle, + "dispatch[%lu], gpu-id(%u), tid(%u), kernel-name(\"%s\"), time(%lu,%lu,%lu,%lu)\n", index, + //::util::HsaRsrcFactory::Instance().GetAgentInfo(entry->agent)->dev_index, + entry->dev_index, entry->kernel.tid, entry->kernel.name, entry->dispatch, entry->begin, + entry->end, entry->complete); #if 0 fprintf(file_handle, "dispatch[%u], gpu-id(%u), queue-id(%u), queue-index(%lu), tid(%lu), grd(%u), wgr(%u), lds(%u), scr(%u), vgpr(%u), sgpr(%u), fbar(%u), sig(0x%lx), kernel-name(\"%s\")", index, @@ -581,32 +584,29 @@ void hsa_kernel_handler(::proxy::Tracker::entry_t* entry) { void hsa_async_copy_handler(::proxy::Tracker::entry_t* entry) { activity_record_t record{}; - record.domain = ACTIVITY_DOMAIN_HSA_OPS; // activity domain id - record.begin_ns = entry->begin; // host begin timestamp - record.end_ns = entry->end; // host end timestamp - record.device_id = 0; // device id + record.domain = ACTIVITY_DOMAIN_HSA_OPS; // activity domain id + record.begin_ns = entry->begin; // host begin timestamp + record.end_ns = entry->end; // host end timestamp + record.device_id = 0; // device id - hsa_support::async_copy_callback_fun(hsa_support::HSA_OP_ID_async_copy, &record, hsa_support::async_copy_callback_arg); + hsa_support::async_copy_callback_fun(hsa_support::HSA_OP_ID_async_copy, &record, + hsa_support::async_copy_callback_arg); } -hsa_status_t hsa_amd_memory_async_copy_interceptor( - void* dst, hsa_agent_t dst_agent, const void* src, - hsa_agent_t src_agent, size_t size, uint32_t num_dep_signals, - const hsa_signal_t* dep_signals, hsa_signal_t completion_signal) -{ +hsa_status_t hsa_amd_memory_async_copy_interceptor(void* dst, hsa_agent_t dst_agent, + const void* src, hsa_agent_t src_agent, + size_t size, uint32_t num_dep_signals, + const hsa_signal_t* dep_signals, + hsa_signal_t completion_signal) { hsa_status_t status = HSA_STATUS_SUCCESS; if (hsa_support::async_copy_callback_enabled) { trace_entry_t* entry = trace_buffer->GetEntry(); ::proxy::Tracker::Enable(COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry); - status = hsa_amd_memory_async_copy_fn(dst, dst_agent, src, - src_agent, size, num_dep_signals, + status = hsa_amd_memory_async_copy_fn(dst, dst_agent, src, src_agent, size, num_dep_signals, dep_signals, entry->signal); if (status != HSA_STATUS_SUCCESS) ::proxy::Tracker::Disable(entry); - } - else - { - status = hsa_amd_memory_async_copy_fn(dst, dst_agent, src, - src_agent, size, num_dep_signals, + } else { + status = hsa_amd_memory_async_copy_fn(dst, dst_agent, src, src_agent, size, num_dep_signals, dep_signals, completion_signal); } return status; @@ -616,24 +616,18 @@ hsa_status_t hsa_amd_memory_async_copy_rect_interceptor( const hsa_pitched_ptr_t* dst, const hsa_dim3_t* dst_offset, const hsa_pitched_ptr_t* src, const hsa_dim3_t* src_offset, const hsa_dim3_t* range, hsa_agent_t copy_agent, hsa_amd_copy_direction_t dir, uint32_t num_dep_signals, const hsa_signal_t* dep_signals, - hsa_signal_t completion_signal) -{ + hsa_signal_t completion_signal) { hsa_status_t status = HSA_STATUS_SUCCESS; if (hsa_support::async_copy_callback_enabled) { trace_entry_t* entry = trace_buffer->GetEntry(); ::proxy::Tracker::Enable(COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry); - status = hsa_amd_memory_async_copy_rect_fn(dst, dst_offset, src, - src_offset, range, copy_agent, - dir, num_dep_signals, dep_signals, - entry->signal); + status = hsa_amd_memory_async_copy_rect_fn(dst, dst_offset, src, src_offset, range, copy_agent, + dir, num_dep_signals, dep_signals, entry->signal); if (status != HSA_STATUS_SUCCESS) ::proxy::Tracker::Disable(entry); - } - else - { - status = hsa_amd_memory_async_copy_rect_fn(dst, dst_offset, src, - src_offset, range, copy_agent, - dir, num_dep_signals, dep_signals, - completion_signal); + } else { + status = + hsa_amd_memory_async_copy_rect_fn(dst, dst_offset, src, src_offset, range, copy_agent, dir, + num_dep_signals, dep_signals, completion_signal); } return status; } @@ -685,11 +679,7 @@ PUBLIC_API const char* roctracer_error_string() { // Return Op string by given domain and activity/API codes // NULL returned on the error and the library errno is set -PUBLIC_API const char* roctracer_op_string( - uint32_t domain, - uint32_t op, - uint32_t kind) -{ +PUBLIC_API const char* roctracer_op_string(uint32_t domain, uint32_t op, uint32_t kind) { API_METHOD_PREFIX switch (domain) { case ACTIVITY_DOMAIN_HSA_API: @@ -711,18 +701,15 @@ PUBLIC_API const char* roctracer_op_string( } // Return Op code and kind by given string -PUBLIC_API roctracer_status_t roctracer_op_code( - uint32_t domain, - const char* str, - uint32_t* op, - uint32_t* kind) -{ +PUBLIC_API roctracer_status_t roctracer_op_code(uint32_t domain, const char* str, uint32_t* op, + uint32_t* kind) { API_METHOD_PREFIX switch (domain) { case ACTIVITY_DOMAIN_HSA_API: { *op = roctracer::hsa_support::GetApiCode(str); if (*op == HSA_API_ID_NUMBER) { - EXC_RAISING(ROCTRACER_STATUS_BAD_PARAMETER, "Invalid API name \"" << str << "\", domain ID(" << domain << ")"); + EXC_RAISING(ROCTRACER_STATUS_BAD_PARAMETER, + "Invalid API name \"" << str << "\", domain ID(" << domain << ")"); } if (kind != NULL) *kind = 0; break; @@ -730,7 +717,8 @@ PUBLIC_API roctracer_status_t roctracer_op_code( case ACTIVITY_DOMAIN_HIP_API: { *op = hipApiIdByName(str); if (*op == HIP_API_ID_NONE) { - EXC_RAISING(ROCTRACER_STATUS_BAD_PARAMETER, "Invalid API name \"" << str << "\", domain ID(" << domain << ")"); + EXC_RAISING(ROCTRACER_STATUS_BAD_PARAMETER, + "Invalid API name \"" << str << "\", domain ID(" << domain << ")"); } if (kind != NULL) *kind = 0; break; @@ -743,13 +731,20 @@ PUBLIC_API roctracer_status_t roctracer_op_code( static inline uint32_t get_op_begin(uint32_t domain) { switch (domain) { - case ACTIVITY_DOMAIN_HSA_OPS: return 0; - case ACTIVITY_DOMAIN_HSA_API: return 0; - case ACTIVITY_DOMAIN_HSA_EVT: return 0; - case ACTIVITY_DOMAIN_HCC_OPS: return 0; - case ACTIVITY_DOMAIN_HIP_API: return HIP_API_ID_FIRST; - case ACTIVITY_DOMAIN_EXT_API: return 0; - case ACTIVITY_DOMAIN_ROCTX: return 0; + case ACTIVITY_DOMAIN_HSA_OPS: + return 0; + case ACTIVITY_DOMAIN_HSA_API: + return 0; + case ACTIVITY_DOMAIN_HSA_EVT: + return 0; + case ACTIVITY_DOMAIN_HCC_OPS: + return 0; + case ACTIVITY_DOMAIN_HIP_API: + return HIP_API_ID_FIRST; + case ACTIVITY_DOMAIN_EXT_API: + return 0; + case ACTIVITY_DOMAIN_ROCTX: + return 0; default: EXC_RAISING(ROCTRACER_STATUS_BAD_DOMAIN, "invalid domain ID(" << domain << ")"); } @@ -758,13 +753,21 @@ static inline uint32_t get_op_begin(uint32_t domain) { static inline uint32_t get_op_end(uint32_t domain) { switch (domain) { - case ACTIVITY_DOMAIN_HSA_OPS: return HSA_OP_ID_NUMBER; - case ACTIVITY_DOMAIN_HSA_API: return HSA_API_ID_NUMBER; - case ACTIVITY_DOMAIN_HSA_EVT: return HSA_EVT_ID_NUMBER; - case ACTIVITY_DOMAIN_HCC_OPS: return HIP_OP_ID_NUMBER; - case ACTIVITY_DOMAIN_HIP_API: return HIP_API_ID_LAST + 1;; - case ACTIVITY_DOMAIN_EXT_API: return 0; - case ACTIVITY_DOMAIN_ROCTX: return ROCTX_API_ID_NUMBER; + case ACTIVITY_DOMAIN_HSA_OPS: + return HSA_OP_ID_NUMBER; + case ACTIVITY_DOMAIN_HSA_API: + return HSA_API_ID_NUMBER; + case ACTIVITY_DOMAIN_HSA_EVT: + return HSA_EVT_ID_NUMBER; + case ACTIVITY_DOMAIN_HCC_OPS: + return HIP_OP_ID_NUMBER; + case ACTIVITY_DOMAIN_HIP_API: + return HIP_API_ID_LAST + 1; + ; + case ACTIVITY_DOMAIN_EXT_API: + return 0; + case ACTIVITY_DOMAIN_ROCTX: + return ROCTX_API_ID_NUMBER; default: EXC_RAISING(ROCTRACER_STATUS_BAD_DOMAIN, "invalid domain ID(" << domain << ")"); } @@ -772,14 +775,12 @@ static inline uint32_t get_op_end(uint32_t domain) { } // Enable runtime API callbacks -static roctracer_status_t roctracer_enable_callback_fun( - roctracer_domain_t domain, - uint32_t op, - roctracer_rtapi_callback_t callback, - void* user_data) -{ +static roctracer_status_t roctracer_enable_callback_fun(roctracer_domain_t domain, uint32_t op, + roctracer_rtapi_callback_t callback, + void* user_data) { switch (domain) { - case ACTIVITY_DOMAIN_HSA_OPS: break; + case ACTIVITY_DOMAIN_HSA_OPS: + break; case ACTIVITY_DOMAIN_HSA_API: { #if 0 if (op == HSA_API_ID_DISPATCH) { @@ -792,28 +793,42 @@ static roctracer_status_t roctracer_enable_callback_fun( break; } case ACTIVITY_DOMAIN_HSA_EVT: { - const bool succ = roctracer::RocpLoader::Instance().RegisterEvtCallback(op, (void*)callback, user_data); - if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HSA_ERR, "HSA::RegisterEvtCallback error(" << op << ") failed"); + const bool succ = + roctracer::RocpLoader::Instance().RegisterEvtCallback(op, (void*)callback, user_data); + if (succ == false) + HCC_EXC_RAISING(ROCTRACER_STATUS_HSA_ERR, + "HSA::RegisterEvtCallback error(" << op << ") failed"); break; } - case ACTIVITY_DOMAIN_HCC_OPS: break; + case ACTIVITY_DOMAIN_HCC_OPS: + break; case ACTIVITY_DOMAIN_HIP_API: { if (roctracer::HipLoader::Instance().Enabled() == false) break; std::lock_guard lock(roctracer::hip_activity_mutex); - hipError_t hip_err = roctracer::HipLoader::Instance().RegisterApiCallback(op, (void*)callback, user_data); - if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "HIP::RegisterApiCallback(" << op << ") error(" << hip_err << ")"); + hipError_t hip_err = + roctracer::HipLoader::Instance().RegisterApiCallback(op, (void*)callback, user_data); + if (hip_err != hipSuccess) + HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, + "HIP::RegisterApiCallback(" << op << ") error(" << hip_err << ")"); if (roctracer::HipApiActivityEnableCheck(op) == 0) { - hip_err = roctracer::HipLoader::Instance().RegisterActivityCallback(op, (void*)roctracer::HIP_SyncApiDataCallback, (void*)1); - if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "HIPAPI: HIP::RegisterActivityCallback(" << op << ") error(" << hip_err << ")"); + hip_err = roctracer::HipLoader::Instance().RegisterActivityCallback( + op, (void*)roctracer::HIP_SyncApiDataCallback, (void*)1); + if (hip_err != hipSuccess) + HIP_EXC_RAISING( + ROCTRACER_STATUS_HIP_API_ERR, + "HIPAPI: HIP::RegisterActivityCallback(" << op << ") error(" << hip_err << ")"); } break; } case ACTIVITY_DOMAIN_ROCTX: { if (roctracer::RocTxLoader::Instance().Enabled()) { - const bool suc = roctracer::RocTxLoader::Instance().RegisterApiCallback(op, (void*)callback, user_data); - if (suc == false) EXC_RAISING(ROCTRACER_STATUS_ROCTX_ERR, "ROCTX::RegisterApiCallback(" << op << ") failed"); + const bool suc = + roctracer::RocTxLoader::Instance().RegisterApiCallback(op, (void*)callback, user_data); + if (suc == false) + EXC_RAISING(ROCTRACER_STATUS_ROCTX_ERR, + "ROCTX::RegisterApiCallback(" << op << ") failed"); } break; } @@ -823,32 +838,23 @@ static roctracer_status_t roctracer_enable_callback_fun( return ROCTRACER_STATUS_SUCCESS; } -static void roctracer_enable_callback_impl( - roctracer_domain_t domain, - uint32_t op, - roctracer_rtapi_callback_t callback, - void* user_data) -{ +static void roctracer_enable_callback_impl(roctracer_domain_t domain, uint32_t op, + roctracer_rtapi_callback_t callback, void* user_data) { roctracer::cb_journal->Insert(domain, op, {callback, user_data}); roctracer_enable_callback_fun(domain, op, callback, user_data); } -PUBLIC_API roctracer_status_t roctracer_enable_op_callback( - roctracer_domain_t domain, - uint32_t op, - roctracer_rtapi_callback_t callback, - void* user_data) -{ +PUBLIC_API roctracer_status_t roctracer_enable_op_callback(roctracer_domain_t domain, uint32_t op, + roctracer_rtapi_callback_t callback, + void* user_data) { API_METHOD_PREFIX roctracer_enable_callback_impl(domain, op, callback, user_data); API_METHOD_SUFFIX } -PUBLIC_API roctracer_status_t roctracer_enable_domain_callback( - roctracer_domain_t domain, - roctracer_rtapi_callback_t callback, - void* user_data) -{ +PUBLIC_API roctracer_status_t roctracer_enable_domain_callback(roctracer_domain_t domain, + roctracer_rtapi_callback_t callback, + void* user_data) { API_METHOD_PREFIX const uint32_t op_end = get_op_end(domain); for (uint32_t op = get_op_begin(domain); op < op_end; ++op) @@ -856,10 +862,8 @@ PUBLIC_API roctracer_status_t roctracer_enable_domain_callback( API_METHOD_SUFFIX } -PUBLIC_API roctracer_status_t roctracer_enable_callback( - roctracer_rtapi_callback_t callback, - void* user_data) -{ +PUBLIC_API roctracer_status_t roctracer_enable_callback(roctracer_rtapi_callback_t callback, + void* user_data) { API_METHOD_PREFIX for (uint32_t domain = 0; domain < ACTIVITY_DOMAIN_NUMBER; ++domain) { const uint32_t op_end = get_op_end(domain); @@ -870,12 +874,10 @@ PUBLIC_API roctracer_status_t roctracer_enable_callback( } // Disable runtime API callbacks -static roctracer_status_t roctracer_disable_callback_fun( - roctracer_domain_t domain, - uint32_t op) -{ +static roctracer_status_t roctracer_disable_callback_fun(roctracer_domain_t domain, uint32_t op) { switch (domain) { - case ACTIVITY_DOMAIN_HSA_OPS: break; + case ACTIVITY_DOMAIN_HSA_OPS: + break; case ACTIVITY_DOMAIN_HSA_API: { #if 0 if (op == HSA_API_ID_DISPATCH) { @@ -887,29 +889,38 @@ static roctracer_status_t roctracer_disable_callback_fun( roctracer::hsa_support::cb_table.set(op, NULL, NULL); break; } - case ACTIVITY_DOMAIN_HCC_OPS: break; + case ACTIVITY_DOMAIN_HCC_OPS: + break; case ACTIVITY_DOMAIN_HIP_API: { if (roctracer::HipLoader::Instance().Enabled() == false) break; std::lock_guard lock(roctracer::hip_activity_mutex); const hipError_t hip_err = roctracer::HipLoader::Instance().RemoveApiCallback(op); - if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "HIP::RemoveApiCallback(" << op << "), error(" << hip_err << ")"); + if (hip_err != hipSuccess) + HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, + "HIP::RemoveApiCallback(" << op << "), error(" << hip_err << ")"); if (roctracer::HipApiActivityDisableCheck(op) == 0) { const hipError_t hip_err = roctracer::HipLoader::Instance().RemoveActivityCallback(op); - if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "HIPAPI: HIP::RemoveActivityCallback op(" << op << "), error(" << hip_err << ")"); + if (hip_err != hipSuccess) + HIP_EXC_RAISING( + ROCTRACER_STATUS_HIP_API_ERR, + "HIPAPI: HIP::RemoveActivityCallback op(" << op << "), error(" << hip_err << ")"); } break; } case ACTIVITY_DOMAIN_HSA_EVT: { const bool succ = roctracer::RocpLoader::Instance().RemoveEvtCallback(op); - if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HSA_ERR, "HSA::RemoveEvtCallback error(" << op << ") failed"); + if (succ == false) + HCC_EXC_RAISING(ROCTRACER_STATUS_HSA_ERR, + "HSA::RemoveEvtCallback error(" << op << ") failed"); break; } case ACTIVITY_DOMAIN_ROCTX: { if (roctracer::RocTxLoader::Instance().Enabled()) { const bool suc = roctracer::RocTxLoader::Instance().RemoveApiCallback(op); - if (suc == false) EXC_RAISING(ROCTRACER_STATUS_ROCTX_ERR, "ROCTX::RemoveApiCallback(" << op << ") failed"); + if (suc == false) + EXC_RAISING(ROCTRACER_STATUS_ROCTX_ERR, "ROCTX::RemoveApiCallback(" << op << ") failed"); } break; } @@ -919,26 +930,19 @@ static roctracer_status_t roctracer_disable_callback_fun( return ROCTRACER_STATUS_SUCCESS; } -static void roctracer_disable_callback_impl( - roctracer_domain_t domain, - uint32_t op) -{ - roctracer::cb_journal->Remove(domain, op); - roctracer_disable_callback_fun(domain, op); +static void roctracer_disable_callback_impl(roctracer_domain_t domain, uint32_t op) { + roctracer::cb_journal->Remove(domain, op); + roctracer_disable_callback_fun(domain, op); } -PUBLIC_API roctracer_status_t roctracer_disable_op_callback( - roctracer_domain_t domain, - uint32_t op) -{ +PUBLIC_API roctracer_status_t roctracer_disable_op_callback(roctracer_domain_t domain, + uint32_t op) { API_METHOD_PREFIX roctracer_disable_callback_impl(domain, op); API_METHOD_SUFFIX } -PUBLIC_API roctracer_status_t roctracer_disable_domain_callback( - roctracer_domain_t domain) -{ +PUBLIC_API roctracer_status_t roctracer_disable_domain_callback(roctracer_domain_t domain) { API_METHOD_PREFIX const uint32_t op_end = get_op_end(domain); for (uint32_t op = get_op_begin(domain); op < op_end; ++op) @@ -946,8 +950,7 @@ PUBLIC_API roctracer_status_t roctracer_disable_domain_callback( API_METHOD_SUFFIX } -PUBLIC_API roctracer_status_t roctracer_disable_callback() -{ +PUBLIC_API roctracer_status_t roctracer_disable_callback() { API_METHOD_PREFIX for (uint32_t domain = 0; domain < ACTIVITY_DOMAIN_NUMBER; ++domain) { const uint32_t op_end = get_op_end(domain); @@ -966,10 +969,8 @@ PUBLIC_API roctracer_pool_t* roctracer_default_pool_expl(roctracer_pool_t* pool) } // Open memory pool -PUBLIC_API roctracer_status_t roctracer_open_pool_expl( - const roctracer_properties_t* properties, - roctracer_pool_t** pool) -{ +PUBLIC_API roctracer_status_t roctracer_open_pool_expl(const roctracer_properties_t* properties, + roctracer_pool_t** pool) { API_METHOD_PREFIX std::lock_guard lock(roctracer::memory_pool_mutex); if ((pool == NULL) && (roctracer::memory_pool != NULL)) { @@ -977,8 +978,10 @@ PUBLIC_API roctracer_status_t roctracer_open_pool_expl( } roctracer::MemoryPool* p = new roctracer::MemoryPool(*properties); if (p == NULL) EXC_RAISING(ROCTRACER_STATUS_ERROR, "MemoryPool() error"); - if (pool != NULL) *pool = p; - else roctracer::memory_pool = p; + if (pool != NULL) + *pool = p; + else + roctracer::memory_pool = p; API_METHOD_SUFFIX } @@ -988,17 +991,14 @@ PUBLIC_API roctracer_status_t roctracer_close_pool_expl(roctracer_pool_t* pool) std::lock_guard lock(roctracer::memory_pool_mutex); roctracer_pool_t* ptr = (pool == NULL) ? roctracer_default_pool() : pool; roctracer::MemoryPool* memory_pool = reinterpret_cast(ptr); - delete(memory_pool); + delete (memory_pool); if (pool == NULL) roctracer::memory_pool = NULL; API_METHOD_SUFFIX } // Enable activity records logging -static roctracer_status_t roctracer_enable_activity_fun( - roctracer_domain_t domain, - uint32_t op, - roctracer_pool_t* pool) -{ +static roctracer_status_t roctracer_enable_activity_fun(roctracer_domain_t domain, uint32_t op, + roctracer_pool_t* pool) { if (pool == NULL) pool = roctracer_default_pool(); switch (domain) { case ACTIVITY_DOMAIN_HSA_OPS: { @@ -1008,27 +1008,31 @@ static roctracer_status_t roctracer_enable_activity_fun( const bool init_phase = (roctracer::RocpLoader::GetRef() == NULL); if (roctracer::RocpLoader::GetRef() == NULL) break; if (init_phase == true) { - roctracer::RocpLoader::Instance().InitActivityCallback((void*)roctracer::HSA_AsyncActivityCallback, - (void*)pool); + roctracer::RocpLoader::Instance().InitActivityCallback( + (void*)roctracer::HSA_AsyncActivityCallback, (void*)pool); } const bool succ = roctracer::RocpLoader::Instance().EnableActivityCallback(op, true); - if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HSA_ERR, "HSA::EnableActivityCallback error"); + if (succ == false) + HCC_EXC_RAISING(ROCTRACER_STATUS_HSA_ERR, "HSA::EnableActivityCallback error"); } break; } - case ACTIVITY_DOMAIN_HSA_API: break; - case ACTIVITY_DOMAIN_HSA_EVT: break; + case ACTIVITY_DOMAIN_HSA_API: + break; + case ACTIVITY_DOMAIN_HSA_EVT: + break; case ACTIVITY_DOMAIN_HCC_OPS: { const bool init_phase = (roctracer::HccLoader::GetRef() == NULL); if (roctracer::HccLoader::Instance().Enabled() == false) break; if (init_phase == true) { - roctracer::HccLoader::Instance().InitActivityCallback((void*)roctracer::HCC_ActivityIdCallback, - (void*)roctracer::HCC_AsyncActivityCallback, - (void*)pool); + roctracer::HccLoader::Instance().InitActivityCallback( + (void*)roctracer::HCC_ActivityIdCallback, (void*)roctracer::HCC_AsyncActivityCallback, + (void*)pool); } const bool succ = roctracer::HccLoader::Instance().EnableActivityCallback(op, true); - if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HCC_OPS_ERR, "HCC::EnableActivityCallback error"); + if (succ == false) + HCC_EXC_RAISING(ROCTRACER_STATUS_HCC_OPS_ERR, "HCC::EnableActivityCallback error"); break; } case ACTIVITY_DOMAIN_HIP_API: { @@ -1036,41 +1040,38 @@ static roctracer_status_t roctracer_enable_activity_fun( std::lock_guard lock(roctracer::hip_activity_mutex); if (roctracer::HipActActivityEnableCheck(op) == 0) { - const hipError_t hip_err = roctracer::HipLoader::Instance().RegisterActivityCallback(op, (void*)roctracer::HIP_SyncActivityCallback, (void*)pool); - if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "HIP::RegisterActivityCallback(" << op << " error(" << hip_err << ")"); + const hipError_t hip_err = roctracer::HipLoader::Instance().RegisterActivityCallback( + op, (void*)roctracer::HIP_SyncActivityCallback, (void*)pool); + if (hip_err != hipSuccess) + HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, + "HIP::RegisterActivityCallback(" << op << " error(" << hip_err << ")"); } break; } - case ACTIVITY_DOMAIN_ROCTX: break; + case ACTIVITY_DOMAIN_ROCTX: + break; default: EXC_RAISING(ROCTRACER_STATUS_BAD_DOMAIN, "invalid domain ID(" << domain << ")"); } return ROCTRACER_STATUS_SUCCESS; } -static void roctracer_enable_activity_impl( - roctracer_domain_t domain, - uint32_t op, - roctracer_pool_t* pool) -{ - roctracer::act_journal->Insert(domain, op, {pool}); - roctracer_enable_activity_fun(domain, op, pool); +static void roctracer_enable_activity_impl(roctracer_domain_t domain, uint32_t op, + roctracer_pool_t* pool) { + roctracer::act_journal->Insert(domain, op, {pool}); + roctracer_enable_activity_fun(domain, op, pool); } -PUBLIC_API roctracer_status_t roctracer_enable_op_activity_expl( - roctracer_domain_t domain, - uint32_t op, - roctracer_pool_t* pool) -{ +PUBLIC_API roctracer_status_t roctracer_enable_op_activity_expl(roctracer_domain_t domain, + uint32_t op, + roctracer_pool_t* pool) { API_METHOD_PREFIX roctracer_enable_activity_impl(domain, op, pool); API_METHOD_SUFFIX } -PUBLIC_API roctracer_status_t roctracer_enable_domain_activity_expl( - roctracer_domain_t domain, - roctracer_pool_t* pool) -{ +PUBLIC_API roctracer_status_t roctracer_enable_domain_activity_expl(roctracer_domain_t domain, + roctracer_pool_t* pool) { API_METHOD_PREFIX const uint32_t op_end = get_op_end(domain); for (uint32_t op = get_op_begin(domain); op < op_end; ++op) @@ -1078,9 +1079,7 @@ PUBLIC_API roctracer_status_t roctracer_enable_domain_activity_expl( API_METHOD_SUFFIX } -PUBLIC_API roctracer_status_t roctracer_enable_activity_expl( - roctracer_pool_t* pool) -{ +PUBLIC_API roctracer_status_t roctracer_enable_activity_expl(roctracer_pool_t* pool) { API_METHOD_PREFIX for (uint32_t domain = 0; domain < ACTIVITY_DOMAIN_NUMBER; ++domain) { const uint32_t op_end = get_op_end(domain); @@ -1091,10 +1090,7 @@ PUBLIC_API roctracer_status_t roctracer_enable_activity_expl( } // Disable activity records logging -static roctracer_status_t roctracer_disable_activity_fun( - roctracer_domain_t domain, - uint32_t op) -{ +static roctracer_status_t roctracer_disable_activity_fun(roctracer_domain_t domain, uint32_t op) { switch (domain) { case ACTIVITY_DOMAIN_HSA_OPS: { if (op == HSA_OP_ID_COPY) { @@ -1102,17 +1098,23 @@ static roctracer_status_t roctracer_disable_activity_fun( } else { if (roctracer::RocpLoader::GetRef() == NULL) break; const bool succ = roctracer::RocpLoader::Instance().EnableActivityCallback(op, false); - if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HSA_ERR, "HSA::EnableActivityCallback(false) error, op(" << op << ")"); + if (succ == false) + HCC_EXC_RAISING(ROCTRACER_STATUS_HSA_ERR, + "HSA::EnableActivityCallback(false) error, op(" << op << ")"); } break; } - case ACTIVITY_DOMAIN_HSA_API: break; - case ACTIVITY_DOMAIN_HSA_EVT: break; + case ACTIVITY_DOMAIN_HSA_API: + break; + case ACTIVITY_DOMAIN_HSA_EVT: + break; case ACTIVITY_DOMAIN_HCC_OPS: { if (roctracer::HccLoader::Instance().Enabled() == false) break; const bool succ = roctracer::HccLoader::Instance().EnableActivityCallback(op, false); - if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HCC_OPS_ERR, "HCC::EnableActivityCallback(NULL) error, op(" << op << ")"); + if (succ == false) + HCC_EXC_RAISING(ROCTRACER_STATUS_HCC_OPS_ERR, + "HCC::EnableActivityCallback(NULL) error, op(" << op << ")"); break; } case ACTIVITY_DOMAIN_HIP_API: { @@ -1121,40 +1123,40 @@ static roctracer_status_t roctracer_disable_activity_fun( if (roctracer::HipActActivityDisableCheck(op) == 0) { const hipError_t hip_err = roctracer::HipLoader::Instance().RemoveActivityCallback(op); - if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "HIP::RemoveActivityCallback op(" << op << "), error(" << hip_err << ")"); + if (hip_err != hipSuccess) + HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, + "HIP::RemoveActivityCallback op(" << op << "), error(" << hip_err << ")"); } else { - const hipError_t hip_err = roctracer::HipLoader::Instance().RegisterActivityCallback(op, (void*)roctracer::HIP_SyncApiDataCallback, (void*)1); - if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "HIPACT: HIP::RegisterActivityCallback(" << op << ") error(" << hip_err << ")"); + const hipError_t hip_err = roctracer::HipLoader::Instance().RegisterActivityCallback( + op, (void*)roctracer::HIP_SyncApiDataCallback, (void*)1); + if (hip_err != hipSuccess) + HIP_EXC_RAISING( + ROCTRACER_STATUS_HIP_API_ERR, + "HIPACT: HIP::RegisterActivityCallback(" << op << ") error(" << hip_err << ")"); } break; } - case ACTIVITY_DOMAIN_ROCTX: break; + case ACTIVITY_DOMAIN_ROCTX: + break; default: EXC_RAISING(ROCTRACER_STATUS_BAD_DOMAIN, "invalid domain ID(" << domain << ")"); } return ROCTRACER_STATUS_SUCCESS; } -static void roctracer_disable_activity_impl( - roctracer_domain_t domain, - uint32_t op) -{ +static void roctracer_disable_activity_impl(roctracer_domain_t domain, uint32_t op) { roctracer::act_journal->Remove(domain, op); roctracer_disable_activity_fun(domain, op); } -PUBLIC_API roctracer_status_t roctracer_disable_op_activity( - roctracer_domain_t domain, - uint32_t op) -{ +PUBLIC_API roctracer_status_t roctracer_disable_op_activity(roctracer_domain_t domain, + uint32_t op) { API_METHOD_PREFIX roctracer_disable_activity_impl(domain, op); API_METHOD_SUFFIX } -PUBLIC_API roctracer_status_t roctracer_disable_domain_activity( - roctracer_domain_t domain) -{ +PUBLIC_API roctracer_status_t roctracer_disable_domain_activity(roctracer_domain_t domain) { API_METHOD_PREFIX const uint32_t op_end = get_op_end(domain); for (uint32_t op = get_op_begin(domain); op < op_end; ++op) @@ -1162,8 +1164,7 @@ PUBLIC_API roctracer_status_t roctracer_disable_domain_activity( API_METHOD_SUFFIX } -PUBLIC_API roctracer_status_t roctracer_disable_activity() -{ +PUBLIC_API roctracer_status_t roctracer_disable_activity() { API_METHOD_PREFIX for (uint32_t domain = 0; domain < ACTIVITY_DOMAIN_NUMBER; ++domain) { const uint32_t op_end = get_op_end(domain); @@ -1185,7 +1186,8 @@ PUBLIC_API roctracer_status_t roctracer_flush_activity_expl(roctracer_pool_t* po // Notifies that the calling thread is entering an external API region. // Push an external correlation id for the calling thread. -PUBLIC_API roctracer_status_t roctracer_activity_push_external_correlation_id(activity_correlation_id_t id) { +PUBLIC_API roctracer_status_t +roctracer_activity_push_external_correlation_id(activity_correlation_id_t id) { API_METHOD_PREFIX roctracer::external_id_stack.push(id); API_METHOD_SUFFIX @@ -1194,7 +1196,8 @@ PUBLIC_API roctracer_status_t roctracer_activity_push_external_correlation_id(ac // Notifies that the calling thread is leaving an external API region. // Pop an external correlation id for the calling thread. // 'lastId' returns the last external correlation -PUBLIC_API roctracer_status_t roctracer_activity_pop_external_correlation_id(activity_correlation_id_t* last_id) { +PUBLIC_API roctracer_status_t +roctracer_activity_pop_external_correlation_id(activity_correlation_id_t* last_id) { API_METHOD_PREFIX if (last_id != NULL) *last_id = 0; @@ -1214,7 +1217,8 @@ PUBLIC_API roctracer_status_t roctracer_activity_pop_external_correlation_id(act PUBLIC_API void roctracer_mark(const char* str) { if (mark_api_callback_ptr) { mark_api_callback_ptr(ACTIVITY_DOMAIN_EXT_API, ACTIVITY_EXT_OP_MARK, str, NULL); - roctracer::GlobalCounter::Increment(); // account for user-defined markers when tracking correlation id + roctracer::GlobalCounter::Increment(); // account for user-defined markers when tracking + // correlation id } } @@ -1245,15 +1249,14 @@ PUBLIC_API roctracer_status_t roctracer_get_timestamp(uint64_t* timestamp) { } // Set properties -PUBLIC_API roctracer_status_t roctracer_set_properties( - roctracer_domain_t domain, - void* properties) -{ +PUBLIC_API roctracer_status_t roctracer_set_properties(roctracer_domain_t domain, + void* properties) { API_METHOD_PREFIX switch (domain) { case ACTIVITY_DOMAIN_HSA_OPS: { // HSA OPS properties - roctracer::hsa_ops_properties_t* ops_properties = reinterpret_cast(properties); + roctracer::hsa_ops_properties_t* ops_properties = + reinterpret_cast(properties); HsaApiTable* table = reinterpret_cast(ops_properties->table); roctracer::hsa_support::async_copy_callback_fun = ops_properties->async_copy_callback_fun; roctracer::hsa_support::async_copy_callback_arg = ops_properties->async_copy_callback_arg; @@ -1271,9 +1274,12 @@ PUBLIC_API roctracer_status_t roctracer_set_properties( hsa_status_t status = hsa_amd_profiling_async_copy_enable(true); if (status != HSA_STATUS_SUCCESS) EXC_ABORT(status, "hsa_amd_profiling_async_copy_enable"); roctracer::hsa_amd_memory_async_copy_fn = table->amd_ext_->hsa_amd_memory_async_copy_fn; - roctracer::hsa_amd_memory_async_copy_rect_fn = table->amd_ext_->hsa_amd_memory_async_copy_rect_fn; - table->amd_ext_->hsa_amd_memory_async_copy_fn = roctracer::hsa_amd_memory_async_copy_interceptor; - table->amd_ext_->hsa_amd_memory_async_copy_rect_fn = roctracer::hsa_amd_memory_async_copy_rect_interceptor; + roctracer::hsa_amd_memory_async_copy_rect_fn = + table->amd_ext_->hsa_amd_memory_async_copy_rect_fn; + table->amd_ext_->hsa_amd_memory_async_copy_fn = + roctracer::hsa_amd_memory_async_copy_interceptor; + table->amd_ext_->hsa_amd_memory_async_copy_rect_fn = + roctracer::hsa_amd_memory_async_copy_rect_interceptor; break; } @@ -1291,11 +1297,13 @@ PUBLIC_API roctracer_status_t roctracer_set_properties( case ACTIVITY_DOMAIN_HCC_OPS: case ACTIVITY_DOMAIN_HIP_API: { mark_api_callback_ptr = reinterpret_cast(properties); - if (roctracer::hip_act_cb_tracker == NULL) roctracer::hip_act_cb_tracker = new roctracer::hip_act_cb_tracker_t; + if (roctracer::hip_act_cb_tracker == NULL) + roctracer::hip_act_cb_tracker = new roctracer::hip_act_cb_tracker_t; break; } case ACTIVITY_DOMAIN_EXT_API: { - roctracer_ext_properties_t* ops_properties = reinterpret_cast(properties); + roctracer_ext_properties_t* ops_properties = + reinterpret_cast(properties); roctracer::ext_support::roctracer_start_cb = ops_properties->start_cb; roctracer::ext_support::roctracer_stop_cb = ops_properties->stop_cb; break; @@ -1349,7 +1357,8 @@ PUBLIC_API void roctracer_flush_buf() { CONSTRUCTOR_API void constructor() { ONLOAD_TRACE_BEG(); roctracer::util::Logger::Create(); - roctracer::trace_buffer = new roctracer::TraceBuffer("HSA GPU", 0x200000, roctracer::trace_buffer_prm, 2); + roctracer::trace_buffer = new roctracer::TraceBuffer( + "HSA GPU", 0x200000, roctracer::trace_buffer_prm, 2); roctracer_load(); ONLOAD_TRACE_END(); } diff --git a/src/core/trace_buffer.h b/src/core/trace_buffer.h index a5c2619921..d6061970ae 100644 --- a/src/core/trace_buffer.h +++ b/src/core/trace_buffer.h @@ -50,11 +50,7 @@ } while (0) namespace roctracer { -enum { - TRACE_ENTRY_INV = 0, - TRACE_ENTRY_INIT = 1, - TRACE_ENTRY_COMPL = 2 -}; +enum { TRACE_ENTRY_INV = 0, TRACE_ENTRY_INIT = 1, TRACE_ENTRY_COMPL = 2 }; enum entry_type_t { DFLT_ENTRY_TYPE = 0, @@ -68,8 +64,8 @@ struct trace_entry_t { std::atomic valid; entry_type_t type; uint64_t dispatch; - uint64_t begin; // kernel begin timestamp, ns - uint64_t end; // kernel end timestamp, ns + uint64_t begin; // kernel begin timestamp, ns + uint64_t end; // kernel end timestamp, ns uint64_t complete; hsa_agent_t agent; uint32_t dev_index; @@ -86,8 +82,7 @@ struct trace_entry_t { }; }; -template -struct push_element_fun { +template struct push_element_fun { T* const elem_; T** prev_; bool fun(T* node) { @@ -105,10 +100,12 @@ struct push_element_fun { push_element_fun(T* elem, T** prev) : elem_(elem), prev_(prev) {} }; -template -struct call_element_fun { +template struct call_element_fun { void (T::*fptr_)(); - bool fun(T* node) const { (node->*fptr_)(); return false; } + bool fun(T* node) const { + (node->*fptr_)(); + return false; + } call_element_fun(void (T::*f)()) : fptr_(f) {} }; @@ -118,18 +115,26 @@ struct TraceBufferBase { virtual void StartWorkerThread() = 0; virtual void Flush() = 0; - static void StartWorkerThreadAll() { foreach(call_element_fun(&TraceBufferBase::StartWorkerThread)); } - static void FlushAll() { foreach(call_element_fun(&TraceBufferBase::Flush)); } + static void StartWorkerThreadAll() { + foreach (call_element_fun(&TraceBufferBase::StartWorkerThread)) + ; + } + static void FlushAll() { + foreach (call_element_fun(&TraceBufferBase::Flush)) + ; + } static void Push(TraceBufferBase* elem) { - if (head_elem_ == NULL) head_elem_ = elem; - else foreach(push_element_fun(elem, &head_elem_)); + if (head_elem_ == NULL) + head_elem_ = elem; + else + foreach (push_element_fun(elem, &head_elem_)) + ; } TraceBufferBase(const uint32_t& prior) : priority_(prior), next_elem_(NULL) {} - template - static void foreach(const F& f_in) { + template static void foreach (const F& f_in) { std::lock_guard lck(mutex_); F f = f_in; TraceBufferBase* p = head_elem_; @@ -146,9 +151,8 @@ struct TraceBufferBase { static mutex_t mutex_; }; -template -class TraceBuffer : protected TraceBufferBase { - public: +template class TraceBuffer : protected TraceBufferBase { + public: typedef void (*callback_t)(Entry*); typedef TraceBuffer Obj; typedef uint64_t pointer_t; @@ -161,11 +165,9 @@ class TraceBuffer : protected TraceBufferBase { callback_t fun; }; - TraceBuffer(const char* name, uint32_t size, const flush_prm_t* flush_prm_arr, uint32_t flush_prm_count, uint32_t prior = 0) : - TraceBufferBase(prior), - size_(size), - work_thread_started_(false) - { + TraceBuffer(const char* name, uint32_t size, const flush_prm_t* flush_prm_arr, + uint32_t flush_prm_count, uint32_t prior = 0) + : TraceBufferBase(prior), size_(size), work_thread_started_(false) { name_ = strdup(name); data_ = allocate_fun(); next_ = allocate_fun(); @@ -204,7 +206,7 @@ class TraceBuffer : protected TraceBufferBase { std::lock_guard lck(mutex_); if (work_thread_started_ == true) { PTHREAD_CALL(pthread_cancel(work_thread_)); - void *res; + void* res; PTHREAD_CALL(pthread_join(work_thread_, &res)); if (res != PTHREAD_CANCELED) FATAL("consumer thread wasn't stopped correctly"); work_thread_started_ = false; @@ -223,7 +225,7 @@ class TraceBuffer : protected TraceBufferBase { void Flush() { flush_buf(); } - private: + private: void flush_buf() { std::lock_guard lck(mutex_); @@ -231,7 +233,7 @@ class TraceBuffer : protected TraceBufferBase { pointer_t curr_pointer = write_pointer_.load(std::memory_order_relaxed); buf_list_it_t it = buf_list_.begin(); buf_list_it_t end_it = buf_list_.end(); - while(it != end_it) { + while (it != end_it) { Entry* buf = *it; Entry* ptr = buf + (pointer % size_); Entry* end_ptr = buf + size_; @@ -261,15 +263,13 @@ class TraceBuffer : protected TraceBufferBase { } inline Entry* allocate_fun() { - Entry* ptr = (Entry*) malloc(size_ * sizeof(Entry)); + Entry* ptr = (Entry*)malloc(size_ * sizeof(Entry)); if (ptr == NULL) FATAL("malloc failed"); - //memset(ptr, 0, size_ * sizeof(Entry)); + // memset(ptr, 0, size_ * sizeof(Entry)); return ptr; } - inline void free_fun(void* ptr) { - free(ptr); - } + inline void free_fun(void* ptr) { free(ptr); } static void* allocate_worker(void* arg) { Obj* obj = (Obj*)arg; @@ -321,8 +321,8 @@ class TraceBuffer : protected TraceBufferBase { }; } // namespace roctracer -#define TRACE_BUFFER_INSTANTIATE() \ - roctracer::TraceBufferBase* roctracer::TraceBufferBase::head_elem_ = NULL; \ +#define TRACE_BUFFER_INSTANTIATE() \ + roctracer::TraceBufferBase* roctracer::TraceBufferBase::head_elem_ = NULL; \ roctracer::TraceBufferBase::mutex_t roctracer::TraceBufferBase::mutex_; #endif // SRC_CORE_TRACE_BUFFER_H_ diff --git a/src/proxy/hsa_queue.h b/src/proxy/hsa_queue.h index 09b55949db..d8086615e6 100644 --- a/src/proxy/hsa_queue.h +++ b/src/proxy/hsa_queue.h @@ -31,9 +31,7 @@ class HsaQueue : public Queue { public: HsaQueue(const util::AgentInfo* agent_info, hsa_queue_t* queue) : queue_(queue) {} - void Submit(const packet_t* packet) { - util::HsaRsrcFactory::Instance().Submit(queue_, packet); - } + void Submit(const packet_t* packet) { util::HsaRsrcFactory::Instance().Submit(queue_, packet); } private: hsa_queue_t* queue_; diff --git a/src/proxy/intercept_queue.cpp b/src/proxy/intercept_queue.cpp index 062b84e2b0..f67dfc5ea0 100644 --- a/src/proxy/intercept_queue.cpp +++ b/src/proxy/intercept_queue.cpp @@ -27,10 +27,10 @@ void InterceptQueue::HsaIntercept(HsaApiTable* table) { } InterceptQueue::mutex_t InterceptQueue::mutex_; -//rocprofiler_callback_t InterceptQueue::dispatch_callback_ = NULL; -//InterceptQueue::queue_callback_t InterceptQueue::create_callback_ = NULL; -//InterceptQueue::queue_callback_t InterceptQueue::destroy_callback_ = NULL; -//void* InterceptQueue::callback_data_ = NULL; +// rocprofiler_callback_t InterceptQueue::dispatch_callback_ = NULL; +// InterceptQueue::queue_callback_t InterceptQueue::create_callback_ = NULL; +// InterceptQueue::queue_callback_t InterceptQueue::destroy_callback_ = NULL; +// void* InterceptQueue::callback_data_ = NULL; InterceptQueue::obj_map_t* InterceptQueue::obj_map_ = NULL; const char* InterceptQueue::kernel_none_ = ""; bool InterceptQueue::in_create_call_ = false; diff --git a/src/proxy/intercept_queue.h b/src/proxy/intercept_queue.h index cc74bcaf12..0cc49de1e2 100644 --- a/src/proxy/intercept_queue.h +++ b/src/proxy/intercept_queue.h @@ -37,7 +37,9 @@ #include "util/hsa_rsrc_factory.h" #include "util/exception.h" -namespace roctracer { extern TraceBuffer* trace_buffer; } +namespace roctracer { +extern TraceBuffer* trace_buffer; +} namespace rocprofiler { extern decltype(hsa_queue_create)* hsa_queue_create_fn; @@ -48,25 +50,25 @@ class InterceptQueue { typedef std::recursive_mutex mutex_t; typedef std::map obj_map_t; typedef hsa_status_t (*queue_callback_t)(hsa_queue_t*, void* data); - typedef void (*queue_event_callback_t)(hsa_status_t status, hsa_queue_t *queue, void *arg); + typedef void (*queue_event_callback_t)(hsa_status_t status, hsa_queue_t* queue, void* arg); typedef uint32_t queue_id_t; static void HsaIntercept(HsaApiTable* table); - static hsa_status_t InterceptQueueCreate(hsa_agent_t agent, uint32_t size, hsa_queue_type32_t type, - void (*callback)(hsa_status_t status, hsa_queue_t* source, - void* data), - void* data, uint32_t private_segment_size, - uint32_t group_segment_size, hsa_queue_t** queue, - const bool& tracker_on) { + static hsa_status_t InterceptQueueCreate( + hsa_agent_t agent, uint32_t size, hsa_queue_type32_t type, + void (*callback)(hsa_status_t status, hsa_queue_t* source, void* data), void* data, + uint32_t private_segment_size, uint32_t group_segment_size, hsa_queue_t** queue, + const bool& tracker_on) { std::lock_guard lck(mutex_); hsa_status_t status = HSA_STATUS_ERROR; if (in_create_call_) EXC_ABORT(status, "recursive InterceptQueueCreate()"); in_create_call_ = true; - ProxyQueue* proxy = ProxyQueue::Create(agent, size, type, queue_event_callback, data, private_segment_size, - group_segment_size, queue, &status); + ProxyQueue* proxy = + ProxyQueue::Create(agent, size, type, queue_event_callback, data, private_segment_size, + group_segment_size, queue, &status); if (status != HSA_STATUS_SUCCESS) EXC_ABORT(status, "ProxyQueue::Create()"); status = util::HsaRsrcFactory::HsaApi()->hsa_amd_profiling_set_profiler_enabled(*queue, true); @@ -78,7 +80,8 @@ class InterceptQueue { obj->queue_id = current_queue_id; (*obj_map_)[(uint64_t)(*queue)] = obj; - status = (is_enabled) ? proxy->SetInterceptCB(OnSubmitCB, obj) : proxy->SetInterceptCB(OnSubmitCB_dummy, obj); + status = (is_enabled) ? proxy->SetInterceptCB(OnSubmitCB, obj) + : proxy->SetInterceptCB(OnSubmitCB_dummy, obj); #if 0 if (create_callback_ != NULL) { @@ -96,15 +99,17 @@ class InterceptQueue { void* data), void* data, uint32_t private_segment_size, uint32_t group_segment_size, hsa_queue_t** queue) { - return InterceptQueueCreate(agent, size, type, callback, data, private_segment_size, group_segment_size, queue, false); + return InterceptQueueCreate(agent, size, type, callback, data, private_segment_size, + group_segment_size, queue, false); } static hsa_status_t QueueCreateTracked(hsa_agent_t agent, uint32_t size, hsa_queue_type32_t type, - void (*callback)(hsa_status_t status, hsa_queue_t* source, - void* data), - void* data, uint32_t private_segment_size, - uint32_t group_segment_size, hsa_queue_t** queue) { - return InterceptQueueCreate(agent, size, type, callback, data, private_segment_size, group_segment_size, queue, true); + void (*callback)(hsa_status_t status, hsa_queue_t* source, + void* data), + void* data, uint32_t private_segment_size, + uint32_t group_segment_size, hsa_queue_t** queue) { + return InterceptQueueCreate(agent, size, type, callback, data, private_segment_size, + group_segment_size, queue, true); } static hsa_status_t QueueDestroy(hsa_queue_t* queue) { @@ -122,8 +127,8 @@ class InterceptQueue { return status; } - static void OnSubmitCB_dummy(const void* in_packets, uint64_t count, uint64_t user_que_idx, void* data, - hsa_amd_queue_intercept_packet_writer writer) { + static void OnSubmitCB_dummy(const void* in_packets, uint64_t count, uint64_t user_que_idx, + void* data, hsa_amd_queue_intercept_packet_writer writer) { const packet_t* packets_arr = reinterpret_cast(in_packets); // Submitting the original packets if profiling was not enabled @@ -161,8 +166,10 @@ class InterceptQueue { ::proxy::Tracker::entry_t* entry = roctracer::trace_buffer->GetEntry(); entry->kernel.tid = syscall(__NR_gettid); entry->kernel.name = kernel_name; - ::proxy::Tracker::Enable(roctracer::KERNEL_ENTRY_TYPE, obj->agent_info_->dev_id, completion_signal, entry); - const_cast(dispatch_packet)->completion_signal = entry->signal; + ::proxy::Tracker::Enable(roctracer::KERNEL_ENTRY_TYPE, obj->agent_info_->dev_id, + completion_signal, entry); + const_cast(dispatch_packet)->completion_signal = + entry->signal; } } @@ -190,7 +197,7 @@ class InterceptQueue { static void Enable(bool val) { is_enabled = val; } private: - static void queue_event_callback(hsa_status_t status, hsa_queue_t *queue, void *arg) { + static void queue_event_callback(hsa_status_t status, hsa_queue_t* queue, void* arg) { if (status != HSA_STATUS_SUCCESS) EXC_ABORT(status, "queue error handling is not supported"); InterceptQueue* obj = GetObj(queue); if (obj->queue_event_callback_) obj->queue_event_callback_(status, obj->queue_, arg); @@ -201,7 +208,8 @@ class InterceptQueue { return static_cast((*header >> HSA_PACKET_HEADER_TYPE) & header_type_mask); } - static const amd_kernel_code_t* GetKernelCode(const hsa_kernel_dispatch_packet_t* dispatch_packet) { + static const amd_kernel_code_t* GetKernelCode( + const hsa_kernel_dispatch_packet_t* dispatch_packet) { const amd_kernel_code_t* kernel_code = NULL; hsa_status_t status = util::HsaRsrcFactory::Instance().LoaderApi()->hsa_ven_amd_loader_query_host_address( @@ -256,22 +264,19 @@ class InterceptQueue { assert(queue == obj->queue_); delete obj; obj_map_->erase(it); - status = HSA_STATUS_SUCCESS;; + status = HSA_STATUS_SUCCESS; + ; } return status; } - InterceptQueue(const hsa_agent_t& agent, hsa_queue_t* const queue, ProxyQueue* proxy) : - queue_(queue), - proxy_(proxy) - { + InterceptQueue(const hsa_agent_t& agent, hsa_queue_t* const queue, ProxyQueue* proxy) + : queue_(queue), proxy_(proxy) { agent_info_ = util::HsaRsrcFactory::Instance().GetAgentInfo(agent); queue_event_callback_ = NULL; } - ~InterceptQueue() { - ProxyQueue::Destroy(proxy_); - } + ~InterceptQueue() { ProxyQueue::Destroy(proxy_); } static bool is_enabled; diff --git a/src/proxy/proxy_queue.cpp b/src/proxy/proxy_queue.cpp index 6905889343..3c14b530fa 100644 --- a/src/proxy/proxy_queue.cpp +++ b/src/proxy/proxy_queue.cpp @@ -36,10 +36,10 @@ ProxyQueue* ProxyQueue::Create(hsa_agent_t agent, uint32_t size, hsa_queue_type3 hsa_status_t* status) { hsa_status_t suc = HSA_STATUS_ERROR; ProxyQueue* instance = - (rocp_type_) ? (ProxyQueue*) new SimpleProxyQueue() : (ProxyQueue*) new HsaProxyQueue(); + (rocp_type_) ? (ProxyQueue*)new SimpleProxyQueue() : (ProxyQueue*)new HsaProxyQueue(); if (instance != NULL) { suc = instance->Init(agent, size, type, callback, data, private_segment_size, - group_segment_size, queue); + group_segment_size, queue); if (suc != HSA_STATUS_SUCCESS) { delete instance; instance = NULL; diff --git a/src/proxy/simple_proxy_queue.cpp b/src/proxy/simple_proxy_queue.cpp index a7cee79830..a24259eca9 100644 --- a/src/proxy/simple_proxy_queue.cpp +++ b/src/proxy/simple_proxy_queue.cpp @@ -25,13 +25,19 @@ void SimpleProxyQueue::HsaIntercept(HsaApiTable* table) { table->core_->hsa_signal_store_relaxed_fn = rocprofiler::SimpleProxyQueue::SignalStore; table->core_->hsa_signal_store_screlease_fn = rocprofiler::SimpleProxyQueue::SignalStore; - table->core_->hsa_queue_load_write_index_relaxed_fn = rocprofiler::SimpleProxyQueue::GetQueueIndex; - table->core_->hsa_queue_store_write_index_relaxed_fn = rocprofiler::SimpleProxyQueue::SetQueueIndex; - table->core_->hsa_queue_load_read_index_relaxed_fn = rocprofiler::SimpleProxyQueue::GetSubmitIndex; + table->core_->hsa_queue_load_write_index_relaxed_fn = + rocprofiler::SimpleProxyQueue::GetQueueIndex; + table->core_->hsa_queue_store_write_index_relaxed_fn = + rocprofiler::SimpleProxyQueue::SetQueueIndex; + table->core_->hsa_queue_load_read_index_relaxed_fn = + rocprofiler::SimpleProxyQueue::GetSubmitIndex; - table->core_->hsa_queue_load_write_index_scacquire_fn = rocprofiler::SimpleProxyQueue::GetQueueIndex; - table->core_->hsa_queue_store_write_index_screlease_fn = rocprofiler::SimpleProxyQueue::SetQueueIndex; - table->core_->hsa_queue_load_read_index_scacquire_fn = rocprofiler::SimpleProxyQueue::GetSubmitIndex; + table->core_->hsa_queue_load_write_index_scacquire_fn = + rocprofiler::SimpleProxyQueue::GetQueueIndex; + table->core_->hsa_queue_store_write_index_screlease_fn = + rocprofiler::SimpleProxyQueue::SetQueueIndex; + table->core_->hsa_queue_load_read_index_scacquire_fn = + rocprofiler::SimpleProxyQueue::GetSubmitIndex; } SimpleProxyQueue::queue_map_t* SimpleProxyQueue::queue_map_ = NULL; diff --git a/src/proxy/simple_proxy_queue.h b/src/proxy/simple_proxy_queue.h index 25cb67bab3..3b76c4bf30 100644 --- a/src/proxy/simple_proxy_queue.h +++ b/src/proxy/simple_proxy_queue.h @@ -30,7 +30,7 @@ #include "util/hsa_rsrc_factory.h" #ifndef ROCP_PROXY_LOCK -# define ROCP_PROXY_LOCK 1 +#define ROCP_PROXY_LOCK 1 #endif namespace rocprofiler { @@ -125,7 +125,8 @@ class SimpleProxyQueue : public ProxyQueue { const uint64_t que_idx = hsa_queue_load_write_index_relaxed_fn(queue_); // Waiting untill there is a free space in the queue - while (que_idx >= (hsa_queue_load_read_index_relaxed_fn(queue_) + size_)); + while (que_idx >= (hsa_queue_load_read_index_relaxed_fn(queue_) + size_)) + ; // Increment the write index hsa_queue_store_write_index_relaxed_fn(queue_, que_idx + 1); @@ -160,8 +161,7 @@ class SimpleProxyQueue : public ProxyQueue { queue_mask_(0), submit_index_(0), on_submit_cb_(NULL), - on_submit_cb_data_(NULL) - { + on_submit_cb_data_(NULL) { printf("ROCProfiler: SimpleProxyQueue is enabled\n"); fflush(stdout); } @@ -200,8 +200,8 @@ class SimpleProxyQueue : public ProxyQueue { if (queue_map_ == NULL) queue_map_ = new queue_map_t; (*queue_map_)[queue_->doorbell_signal.handle] = this; - } - else abort(); + } else + abort(); } } if (status != HSA_STATUS_SUCCESS) abort(); diff --git a/src/proxy/tracker.h b/src/proxy/tracker.h index b1657707e7..1bf98ce32c 100644 --- a/src/proxy/tracker.h +++ b/src/proxy/tracker.h @@ -35,20 +35,21 @@ namespace proxy { class Tracker { - public: + public: typedef util::HsaRsrcFactory::timestamp_t timestamp_t; typedef roctracer::trace_entry_t entry_t; typedef roctracer::entry_type_t entry_type_t; // Add tracker entry - inline static void Enable(entry_type_t type, const hsa_agent_t& agent, const hsa_signal_t& signal, entry_t* entry) { + inline static void Enable(entry_type_t type, const hsa_agent_t& agent, const hsa_signal_t& signal, + entry_t* entry) { hsa_status_t status = HSA_STATUS_ERROR; util::HsaRsrcFactory* hsa_rsrc = &(util::HsaRsrcFactory::Instance()); // Creating a new tracker entry entry->type = type; entry->agent = agent; - entry->dev_index = 0; //hsa_rsrc->GetAgentInfo(agent)->dev_index; + entry->dev_index = 0; // hsa_rsrc->GetAgentInfo(agent)->dev_index; entry->orig = signal; entry->dispatch = hsa_rsrc->TimestampNs(); entry->valid.store(roctracer::TRACE_ENTRY_INIT, std::memory_order_release); @@ -56,7 +57,8 @@ class Tracker { // Creating a proxy signal status = hsa_signal_create(1, 0, NULL, &(entry->signal)); if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "hsa_signal_create"); - status = hsa_amd_signal_async_handler(entry->signal, HSA_SIGNAL_CONDITION_LT, 1, Handler, entry); + status = + hsa_amd_signal_async_handler(entry->signal, HSA_SIGNAL_CONDITION_LT, 1, Handler, entry); if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "hsa_amd_signal_async_handler"); } @@ -66,7 +68,7 @@ class Tracker { entry->valid.store(roctracer::TRACE_ENTRY_INV, std::memory_order_release); } - private: + private: // Entry completion inline static void Complete(hsa_signal_value_t signal_value, entry_t* entry) { // Query begin/end and complete timestamps @@ -74,12 +76,14 @@ class Tracker { if (entry->type == roctracer::COPY_ENTRY_TYPE) { hsa_amd_profiling_async_copy_time_t async_copy_time{}; hsa_status_t status = hsa_amd_profiling_get_async_copy_time(entry->signal, &async_copy_time); - if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "hsa_amd_profiling_get_async_copy_time"); + if (status != HSA_STATUS_SUCCESS) + EXC_RAISING(status, "hsa_amd_profiling_get_async_copy_time"); entry->begin = hsa_rsrc->SysclockToNs(async_copy_time.start); entry->end = hsa_rsrc->SysclockToNs(async_copy_time.end); } else { hsa_amd_profiling_dispatch_time_t dispatch_time{}; - hsa_status_t status = hsa_amd_profiling_get_dispatch_time(entry->agent, entry->signal, &dispatch_time); + hsa_status_t status = + hsa_amd_profiling_get_dispatch_time(entry->agent, entry->signal, &dispatch_time); if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "hsa_amd_profiling_get_dispatch_time"); entry->begin = hsa_rsrc->SysclockToNs(dispatch_time.start); entry->end = hsa_rsrc->SysclockToNs(dispatch_time.end); @@ -101,7 +105,8 @@ class Tracker { orig_signal_ptr->end_ts = prof_signal_ptr->end_ts; const hsa_signal_value_t new_value = hsa_signal_load_relaxed(orig) - 1; - if (signal_value != new_value) EXC_ABORT(HSA_STATUS_ERROR, "Tracker::Complete bad signal value"); + if (signal_value != new_value) + EXC_ABORT(HSA_STATUS_ERROR, "Tracker::Complete bad signal value"); hsa_signal_store_screlease(orig, signal_value); } hsa_signal_destroy(signal); @@ -111,7 +116,8 @@ class Tracker { static bool Handler(hsa_signal_value_t signal_value, void* arg) { // Acquire entry entry_t* entry = reinterpret_cast(arg); - while (entry->valid.load(std::memory_order_acquire) != roctracer::TRACE_ENTRY_INIT) sched_yield(); + while (entry->valid.load(std::memory_order_acquire) != roctracer::TRACE_ENTRY_INIT) + sched_yield(); // Complete entry Tracker::Complete(signal_value, entry); @@ -119,6 +125,6 @@ class Tracker { } }; -} // namespace rocprofiler +} // namespace proxy -#endif // SRC_PROXY_TRACKER_H_ +#endif // SRC_PROXY_TRACKER_H_ diff --git a/src/proxy/types.h b/src/proxy/types.h index af08a83454..50231a55f3 100644 --- a/src/proxy/types.h +++ b/src/proxy/types.h @@ -34,11 +34,12 @@ typedef hsa_ext_amd_aql_pm4_packet_t packet_t; typedef uint32_t packet_word_t; typedef uint64_t timestamp_t; -inline std::ostream& operator<< (std::ostream& out, const event_t& event) { - out << "[block_name(" << event.block_name << "). block_index(" << event.block_index << "). counter_id(" << event.counter_id << ")]"; +inline std::ostream& operator<<(std::ostream& out, const event_t& event) { + out << "[block_name(" << event.block_name << "). block_index(" << event.block_index + << "). counter_id(" << event.counter_id << ")]"; return out; } -inline std::ostream& operator<< (std::ostream& out, const parameter_t& parameter) { +inline std::ostream& operator<<(std::ostream& out, const parameter_t& parameter) { out << "[parameter_name(" << parameter.parameter_name << "). value(" << parameter.value << ")]"; return out; } diff --git a/src/roctx/roctx.cpp b/src/roctx/roctx.cpp index ff77726f35..486e97b7b4 100644 --- a/src/roctx/roctx.cpp +++ b/src/roctx/roctx.cpp @@ -37,7 +37,6 @@ #define API_METHOD_PREFIX \ roctx_status_t err = ROCTX_STATUS_SUCCESS; \ try { - #define API_METHOD_SUFFIX \ } \ catch (std::exception & e) { \ @@ -52,7 +51,7 @@ ERR_LOGGING(__FUNCTION__ << "(), " << e.what()); \ err = roctx::GetExcStatus(e); \ } \ - (void)err; \ + (void)err; #define API_METHOD_CATCH(X) \ } \ @@ -84,8 +83,10 @@ thread_map_t thread_map; static thread_local message_stack_t* message_stack = NULL; roctx_status_t GetExcStatus(const std::exception& e) { - const roctracer::util::exception* roctx_exc_ptr = dynamic_cast(&e); - return (roctx_exc_ptr) ? static_cast(roctx_exc_ptr->status()) : ROCTX_STATUS_ERROR; + const roctracer::util::exception* roctx_exc_ptr = + dynamic_cast(&e); + return (roctx_exc_ptr) ? static_cast(roctx_exc_ptr->status()) + : ROCTX_STATUS_ERROR; } void thread_data_init() { @@ -124,7 +125,8 @@ PUBLIC_API void roctxMarkA(const char* message) { activity_rtapi_callback_t api_callback_fun = NULL; void* api_callback_arg = NULL; roctx::cb_table.get(ROCTX_API_ID_roctxMarkA, &api_callback_fun, &api_callback_arg); - if (api_callback_fun) api_callback_fun(ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_roctxMarkA, &api_data, api_callback_arg); + if (api_callback_fun) + api_callback_fun(ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_roctxMarkA, &api_data, api_callback_arg); API_METHOD_SUFFIX_NRET } @@ -137,7 +139,9 @@ PUBLIC_API int roctxRangePushA(const char* message) { activity_rtapi_callback_t api_callback_fun = NULL; void* api_callback_arg = NULL; roctx::cb_table.get(ROCTX_API_ID_roctxRangePushA, &api_callback_fun, &api_callback_arg); - if (api_callback_fun) api_callback_fun(ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_roctxRangePushA, &api_data, api_callback_arg); + if (api_callback_fun) + api_callback_fun(ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_roctxRangePushA, &api_data, + api_callback_arg); roctx::message_stack->push(strdup(message)); return roctx::message_stack->size() - 1; @@ -152,11 +156,13 @@ PUBLIC_API int roctxRangePop() { activity_rtapi_callback_t api_callback_fun = NULL; void* api_callback_arg = NULL; roctx::cb_table.get(ROCTX_API_ID_roctxRangePop, &api_callback_fun, &api_callback_arg); - if (api_callback_fun) api_callback_fun(ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_roctxRangePop, &api_data, api_callback_arg); + if (api_callback_fun) + api_callback_fun(ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_roctxRangePop, &api_data, + api_callback_arg); if (roctx::message_stack->empty()) { - EXC_ABORT(ROCTX_STATUS_ERROR, "Pop from empty stack!"); + EXC_ABORT(ROCTX_STATUS_ERROR, "Pop from empty stack!"); } else { - roctx::message_stack->pop(); + roctx::message_stack->pop(); } return roctx::message_stack->size(); @@ -173,7 +179,9 @@ PUBLIC_API roctx_range_id_t roctxRangeStartA(const char* message) { activity_rtapi_callback_t api_callback_fun = NULL; void* api_callback_arg = NULL; roctx::cb_table.get(ROCTX_API_ID_roctxRangeStartA, &api_callback_fun, &api_callback_arg); - if (api_callback_fun) api_callback_fun(ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_roctxRangeStartA, &api_data, api_callback_arg); + if (api_callback_fun) + api_callback_fun(ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_roctxRangeStartA, &api_data, + api_callback_arg); return roctx_range_counter; API_METHOD_CATCH(-1); @@ -186,14 +194,16 @@ PUBLIC_API void roctxRangeStop(roctx_range_id_t rangeId) { activity_rtapi_callback_t api_callback_fun = NULL; void* api_callback_arg = NULL; roctx::cb_table.get(ROCTX_API_ID_roctxRangeStop, &api_callback_fun, &api_callback_arg); - if (api_callback_fun) api_callback_fun(ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_roctxRangeStop, &api_data, api_callback_arg); + if (api_callback_fun) + api_callback_fun(ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_roctxRangeStop, &api_data, + api_callback_arg); API_METHOD_SUFFIX_NRET } PUBLIC_API void RangeStackIterate(roctx_range_iterate_cb_t callback, void* arg) { for (const auto& entry : roctx::thread_map) { const auto tid = entry.first; - for (roctx::message_stack_t stack = *(entry.second); !stack.empty(); stack.pop()){ + for (roctx::message_stack_t stack = *(entry.second); !stack.empty(); stack.pop()) { std::string message = stack.top(); roctx_range_data_t data{}; data.message = message.c_str(); diff --git a/src/roctx/roctx_intercept.cpp b/src/roctx/roctx_intercept.cpp index f5cfdf7669..11de368da5 100644 --- a/src/roctx/roctx_intercept.cpp +++ b/src/roctx/roctx_intercept.cpp @@ -45,8 +45,6 @@ PUBLIC_API bool RegisterApiCallback(uint32_t op, void* callback, void* arg) { return roctx::cb_table.set(op, reinterpret_cast(callback), arg); } -PUBLIC_API bool RemoveApiCallback(uint32_t op) { - return roctx::cb_table.set(op, NULL, NULL); -} +PUBLIC_API bool RemoveApiCallback(uint32_t op) { return roctx::cb_table.set(op, NULL, NULL); } } // extern "C" diff --git a/src/util/exception.h b/src/util/exception.h index 98d931bf7c..1d4ba3e4d5 100644 --- a/src/util/exception.h +++ b/src/util/exception.h @@ -43,12 +43,12 @@ #define HCC_EXC_RAISING(error, stream) \ do { \ EXC_RAISING(error, "HCC error: " << stream); \ - } while(0) + } while (0) #define HIP_EXC_RAISING(error, stream) \ do { \ EXC_RAISING(error, "HIP error: " << stream); \ - } while(0) + } while (0) namespace roctracer { namespace util { diff --git a/src/util/hsa_rsrc_factory.cpp b/src/util/hsa_rsrc_factory.cpp index caea611496..0a70b07fc3 100644 --- a/src/util/hsa_rsrc_factory.cpp +++ b/src/util/hsa_rsrc_factory.cpp @@ -69,13 +69,15 @@ static hsa_status_t FindGlobalPool(hsa_amd_memory_pool_t pool, void* data, bool return HSA_STATUS_ERROR_INVALID_ARGUMENT; } - err = HsaRsrcFactory::HsaApi()->hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment); + err = HsaRsrcFactory::HsaApi()->hsa_amd_memory_pool_get_info( + pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment); CHECK_STATUS("hsa_amd_memory_pool_get_info", err); if (HSA_AMD_SEGMENT_GLOBAL != segment) { return HSA_STATUS_SUCCESS; } - err = HsaRsrcFactory::HsaApi()->hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flag); + err = HsaRsrcFactory::HsaApi()->hsa_amd_memory_pool_get_info( + pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flag); CHECK_STATUS("hsa_amd_memory_pool_get_info", err); uint32_t karg_st = flag & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT; @@ -128,19 +130,22 @@ HsaRsrcFactory::HsaRsrcFactory(bool initialize_hsa) : initialize_hsa_(initialize #ifdef ROCP_LD_AQLPROFILE status = LoadAqlProfileLib(&aqlprofile_api_); #else - status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_AQLPROFILE, hsa_ven_amd_aqlprofile_VERSION_MAJOR, sizeof(aqlprofile_api_), &aqlprofile_api_); + status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_AQLPROFILE, + hsa_ven_amd_aqlprofile_VERSION_MAJOR, + sizeof(aqlprofile_api_), &aqlprofile_api_); #endif CHECK_STATUS("aqlprofile API table load failed", status); // Get Loader API table loader_api_ = {0}; - status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_LOADER, 1, sizeof(loader_api_), &loader_api_); + status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_LOADER, 1, + sizeof(loader_api_), &loader_api_); CHECK_STATUS("loader API table query failed", status); // Instantiate HSA timer timer_ = new HsaTimer(&hsa_api_); CHECK_STATUS("HSA timer allocation failed", - (timer_ == NULL) ? HSA_STATUS_ERROR : HSA_STATUS_SUCCESS); + (timer_ == NULL) ? HSA_STATUS_ERROR : HSA_STATUS_SUCCESS); // Time correlation const uint32_t corr_iters = 1000; @@ -148,7 +153,8 @@ HsaRsrcFactory::HsaRsrcFactory(bool initialize_hsa) : initialize_hsa_(initialize CorrelateTime(HsaTimer::TIME_ID_CLOCK_MONOTONIC, corr_iters); // System timeout - timeout_ = (timeout_ns_ == HsaTimer::TIMESTAMP_MAX) ? timeout_ns_ : timer_->ns_to_sysclock(timeout_ns_); + timeout_ = + (timeout_ns_ == HsaTimer::TIMESTAMP_MAX) ? timeout_ns_ : timer_->ns_to_sysclock(timeout_ns_); } // Destructor of the class @@ -174,9 +180,12 @@ void HsaRsrcFactory::InitHsaApiTable(HsaApiTable* table) { hsa_api_.hsa_queue_create = table->core_->hsa_queue_create_fn; hsa_api_.hsa_queue_destroy = table->core_->hsa_queue_destroy_fn; - hsa_api_.hsa_queue_load_write_index_relaxed = table->core_->hsa_queue_load_write_index_relaxed_fn; - hsa_api_.hsa_queue_store_write_index_relaxed = table->core_->hsa_queue_store_write_index_relaxed_fn; - hsa_api_.hsa_queue_load_read_index_relaxed = table->core_->hsa_queue_load_read_index_relaxed_fn; + hsa_api_.hsa_queue_load_write_index_relaxed = + table->core_->hsa_queue_load_write_index_relaxed_fn; + hsa_api_.hsa_queue_store_write_index_relaxed = + table->core_->hsa_queue_store_write_index_relaxed_fn; + hsa_api_.hsa_queue_load_read_index_relaxed = + table->core_->hsa_queue_load_read_index_relaxed_fn; hsa_api_.hsa_signal_create = table->core_->hsa_signal_create_fn; hsa_api_.hsa_signal_destroy = table->core_->hsa_signal_destroy_fn; @@ -185,18 +194,22 @@ void HsaRsrcFactory::InitHsaApiTable(HsaApiTable* table) { hsa_api_.hsa_signal_wait_scacquire = table->core_->hsa_signal_wait_scacquire_fn; hsa_api_.hsa_signal_store_screlease = table->core_->hsa_signal_store_screlease_fn; - hsa_api_.hsa_code_object_reader_create_from_file = table->core_->hsa_code_object_reader_create_from_file_fn; + hsa_api_.hsa_code_object_reader_create_from_file = + table->core_->hsa_code_object_reader_create_from_file_fn; hsa_api_.hsa_executable_create_alt = table->core_->hsa_executable_create_alt_fn; - hsa_api_.hsa_executable_load_agent_code_object = table->core_->hsa_executable_load_agent_code_object_fn; + hsa_api_.hsa_executable_load_agent_code_object = + table->core_->hsa_executable_load_agent_code_object_fn; hsa_api_.hsa_executable_freeze = table->core_->hsa_executable_freeze_fn; hsa_api_.hsa_executable_get_symbol = table->core_->hsa_executable_get_symbol_fn; hsa_api_.hsa_executable_symbol_get_info = table->core_->hsa_executable_symbol_get_info_fn; hsa_api_.hsa_executable_iterate_symbols = table->core_->hsa_executable_iterate_symbols_fn; hsa_api_.hsa_system_get_info = table->core_->hsa_system_get_info_fn; - hsa_api_.hsa_system_get_major_extension_table = table->core_->hsa_system_get_major_extension_table_fn; + hsa_api_.hsa_system_get_major_extension_table = + table->core_->hsa_system_get_major_extension_table_fn; - hsa_api_.hsa_amd_agent_iterate_memory_pools = table->amd_ext_->hsa_amd_agent_iterate_memory_pools_fn; + hsa_api_.hsa_amd_agent_iterate_memory_pools = + table->amd_ext_->hsa_amd_agent_iterate_memory_pools_fn; hsa_api_.hsa_amd_memory_pool_get_info = table->amd_ext_->hsa_amd_memory_pool_get_info_fn; hsa_api_.hsa_amd_memory_pool_allocate = table->amd_ext_->hsa_amd_memory_pool_allocate_fn; hsa_api_.hsa_amd_agents_allow_access = table->amd_ext_->hsa_amd_agents_allow_access_fn; @@ -204,9 +217,12 @@ void HsaRsrcFactory::InitHsaApiTable(HsaApiTable* table) { hsa_api_.hsa_amd_memory_async_copy_rect = table->amd_ext_->hsa_amd_memory_async_copy_rect_fn; hsa_api_.hsa_amd_signal_async_handler = table->amd_ext_->hsa_amd_signal_async_handler_fn; - hsa_api_.hsa_amd_profiling_set_profiler_enabled = table->amd_ext_->hsa_amd_profiling_set_profiler_enabled_fn; - hsa_api_.hsa_amd_profiling_get_async_copy_time = table->amd_ext_->hsa_amd_profiling_get_async_copy_time_fn; - hsa_api_.hsa_amd_profiling_get_dispatch_time = table->amd_ext_->hsa_amd_profiling_get_dispatch_time_fn; + hsa_api_.hsa_amd_profiling_set_profiler_enabled = + table->amd_ext_->hsa_amd_profiling_set_profiler_enabled_fn; + hsa_api_.hsa_amd_profiling_get_async_copy_time = + table->amd_ext_->hsa_amd_profiling_get_async_copy_time_fn; + hsa_api_.hsa_amd_profiling_get_dispatch_time = + table->amd_ext_->hsa_amd_profiling_get_dispatch_time_fn; } else { hsa_api_.hsa_init = hsa_init; hsa_api_.hsa_shut_down = hsa_shut_down; @@ -302,10 +318,13 @@ const AgentInfo* HsaRsrcFactory::AddAgentInfo(const hsa_agent_t agent) { agent_info->dev_type = HSA_DEVICE_TYPE_CPU; agent_info->dev_index = cpu_list_.size(); - status = hsa_api_.hsa_amd_agent_iterate_memory_pools(agent, FindStandardPool, &agent_info->cpu_pool); + status = + hsa_api_.hsa_amd_agent_iterate_memory_pools(agent, FindStandardPool, &agent_info->cpu_pool); if ((status == HSA_STATUS_INFO_BREAK) && (cpu_pool_ == NULL)) cpu_pool_ = &agent_info->cpu_pool; - status = hsa_api_.hsa_amd_agent_iterate_memory_pools(agent, FindKernArgPool, &agent_info->kern_arg_pool); - if ((status == HSA_STATUS_INFO_BREAK) && (kern_arg_pool_ == NULL)) kern_arg_pool_ = &agent_info->kern_arg_pool; + status = hsa_api_.hsa_amd_agent_iterate_memory_pools(agent, FindKernArgPool, + &agent_info->kern_arg_pool); + if ((status == HSA_STATUS_INFO_BREAK) && (kern_arg_pool_ == NULL)) + kern_arg_pool_ = &agent_info->kern_arg_pool; agent_info->gpu_pool = {}; cpu_list_.push_back(agent_info); @@ -317,28 +336,34 @@ const AgentInfo* HsaRsrcFactory::AddAgentInfo(const hsa_agent_t agent) { agent_info->dev_id = agent; agent_info->dev_type = HSA_DEVICE_TYPE_GPU; hsa_api_.hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, agent_info->name); - const int gfxip_label_len = std::min (strlen(agent_info->name) - 2, sizeof (agent_info->gfxip) - 1); + const int gfxip_label_len = + std::min(strlen(agent_info->name) - 2, sizeof(agent_info->gfxip) - 1); memcpy(agent_info->gfxip, agent_info->name, gfxip_label_len); agent_info->gfxip[gfxip_label_len] = '\0'; hsa_api_.hsa_agent_get_info(agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &agent_info->max_wave_size); hsa_api_.hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &agent_info->max_queue_size); hsa_api_.hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &agent_info->profile); agent_info->is_apu = (agent_info->profile == HSA_PROFILE_FULL) ? true : false; - hsa_api_.hsa_agent_get_info(agent, static_cast(HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT), - &agent_info->cu_num); - hsa_api_.hsa_agent_get_info(agent, static_cast(HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU), - &agent_info->waves_per_cu); - hsa_api_.hsa_agent_get_info(agent, static_cast(HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU), - &agent_info->simds_per_cu); - hsa_api_.hsa_agent_get_info(agent, static_cast(HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES), - &agent_info->se_num); + hsa_api_.hsa_agent_get_info( + agent, static_cast(HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT), + &agent_info->cu_num); hsa_api_.hsa_agent_get_info(agent, - static_cast(HSA_AMD_AGENT_INFO_NUM_SHADER_ARRAYS_PER_SE), - &agent_info->shader_arrays_per_se); + static_cast(HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU), + &agent_info->waves_per_cu); + hsa_api_.hsa_agent_get_info(agent, + static_cast(HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU), + &agent_info->simds_per_cu); + hsa_api_.hsa_agent_get_info( + agent, static_cast(HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES), + &agent_info->se_num); + hsa_api_.hsa_agent_get_info( + agent, static_cast(HSA_AMD_AGENT_INFO_NUM_SHADER_ARRAYS_PER_SE), + &agent_info->shader_arrays_per_se); agent_info->cpu_pool = {}; agent_info->kern_arg_pool = {}; - status = hsa_api_.hsa_amd_agent_iterate_memory_pools(agent, FindStandardPool, &agent_info->gpu_pool); + status = + hsa_api_.hsa_amd_agent_iterate_memory_pools(agent, FindStandardPool, &agent_info->gpu_pool); CHECK_ITER_STATUS("hsa_amd_agent_iterate_memory_pools(gpu pool)", status); // GFX8 and GFX9 SGPR/VGPR block sizes @@ -435,7 +460,7 @@ bool HsaRsrcFactory::CreateQueue(const AgentInfo* agent_info, uint32_t num_pkts, hsa_queue_t** queue) { hsa_status_t status; status = hsa_api_.hsa_queue_create(agent_info->dev_id, num_pkts, HSA_QUEUE_TYPE_MULTI, NULL, NULL, - UINT32_MAX, UINT32_MAX, queue); + UINT32_MAX, UINT32_MAX, queue); return (status == HSA_STATUS_SUCCESS); } @@ -458,7 +483,8 @@ uint8_t* HsaRsrcFactory::AllocateLocalMemory(const AgentInfo* agent_info, size_t hsa_status_t status = HSA_STATUS_ERROR; uint8_t* buffer = NULL; size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK; - status = hsa_api_.hsa_amd_memory_pool_allocate(agent_info->gpu_pool, size, 0, reinterpret_cast(&buffer)); + status = hsa_api_.hsa_amd_memory_pool_allocate(agent_info->gpu_pool, size, 0, + reinterpret_cast(&buffer)); uint8_t* ptr = (status == HSA_STATUS_SUCCESS) ? buffer : NULL; return ptr; } @@ -473,7 +499,8 @@ uint8_t* HsaRsrcFactory::AllocateKernArgMemory(const AgentInfo* agent_info, size uint8_t* buffer = NULL; if (!cpu_agents_.empty()) { size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK; - status = hsa_api_.hsa_amd_memory_pool_allocate(*kern_arg_pool_, size, 0, reinterpret_cast(&buffer)); + status = hsa_api_.hsa_amd_memory_pool_allocate(*kern_arg_pool_, size, 0, + reinterpret_cast(&buffer)); // Both the CPU and GPU can access the kernel arguments if (status == HSA_STATUS_SUCCESS) { hsa_agent_t ag_list[1] = {agent_info->dev_id}; @@ -493,7 +520,8 @@ uint8_t* HsaRsrcFactory::AllocateSysMemory(const AgentInfo* agent_info, size_t s uint8_t* buffer = NULL; size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK; if (!cpu_agents_.empty()) { - status = hsa_api_.hsa_amd_memory_pool_allocate(*cpu_pool_, size, 0, reinterpret_cast(&buffer)); + status = hsa_api_.hsa_amd_memory_pool_allocate(*cpu_pool_, size, 0, + reinterpret_cast(&buffer)); // Both the CPU and GPU can access the memory if (status == HSA_STATUS_SUCCESS) { hsa_agent_t ag_list[1] = {agent_info->dev_id}; @@ -518,16 +546,18 @@ uint8_t* HsaRsrcFactory::AllocateCmdMemory(const AgentInfo* agent_info, size_t s } // Wait signal -hsa_signal_value_t HsaRsrcFactory::SignalWait(const hsa_signal_t& signal, const hsa_signal_value_t& signal_value) const { +hsa_signal_value_t HsaRsrcFactory::SignalWait(const hsa_signal_t& signal, + const hsa_signal_value_t& signal_value) const { const hsa_signal_value_t exp_value = signal_value - 1; hsa_signal_value_t ret_value = signal_value; while (1) { - ret_value = - hsa_api_.hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, signal_value, timeout_, HSA_WAIT_STATE_BLOCKED); + ret_value = hsa_api_.hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, signal_value, + timeout_, HSA_WAIT_STATE_BLOCKED); if (ret_value == exp_value) break; if (ret_value != signal_value) { std::cerr << "Error: HsaRsrcFactory::SignalWait: signal_value(" << signal_value - << "), ret_value(" << ret_value << ")" << std::endl << std::flush; + << "), ret_value(" << ret_value << ")" << std::endl + << std::flush; abort(); } } @@ -535,7 +565,8 @@ hsa_signal_value_t HsaRsrcFactory::SignalWait(const hsa_signal_t& signal, const } // Wait signal with signal value restore -void HsaRsrcFactory::SignalWaitRestore(const hsa_signal_t& signal, const hsa_signal_value_t& signal_value) const { +void HsaRsrcFactory::SignalWaitRestore(const hsa_signal_t& signal, + const hsa_signal_value_t& signal_value) const { SignalWait(signal, signal_value); hsa_api_.hsa_signal_store_relaxed(const_cast(signal), signal_value); } @@ -599,13 +630,13 @@ bool HsaRsrcFactory::LoadAndFinalize(const AgentInfo* agent_info, const char* br } // Create executable. - status = hsa_api_.hsa_executable_create_alt(HSA_PROFILE_FULL, HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, - NULL, executable); + status = hsa_api_.hsa_executable_create_alt( + HSA_PROFILE_FULL, HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, NULL, executable); CHECK_STATUS("Error in creating executable object", status); // Load code object. - status = hsa_api_.hsa_executable_load_agent_code_object(*executable, agent_info->dev_id, code_obj_rdr, - NULL, NULL); + status = hsa_api_.hsa_executable_load_agent_code_object(*executable, agent_info->dev_id, + code_obj_rdr, NULL, NULL); CHECK_STATUS("Error in loading executable object", status); // Freeze executable. @@ -615,7 +646,7 @@ bool HsaRsrcFactory::LoadAndFinalize(const AgentInfo* agent_info, const char* br // Get symbol handle. hsa_executable_symbol_t kernelSymbol; status = hsa_api_.hsa_executable_get_symbol(*executable, NULL, kernel_name, agent_info->dev_id, 0, - &kernelSymbol); + &kernelSymbol); CHECK_STATUS("Error in looking up kernel symbol", status); // Update output parameter @@ -659,7 +690,8 @@ uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, const void* packet) { } uint32_t slot_idx = (uint32_t)(write_idx % queue->size); - uint32_t* queue_slot = reinterpret_cast((uintptr_t)(queue->base_address) + (slot_idx * slot_size_b)); + uint32_t* queue_slot = + reinterpret_cast((uintptr_t)(queue->base_address) + (slot_idx * slot_size_b)); const uint32_t* slot_data = reinterpret_cast(packet); // Copy buffered commands into the queue slot. @@ -709,18 +741,22 @@ void HsaRsrcFactory::EnableExecutableTracking(HsaApiTable* table) { table->core_->hsa_executable_freeze_fn = hsa_executable_freeze_interceptor; } -hsa_status_t HsaRsrcFactory::executable_symbols_cb(hsa_executable_t exec, hsa_executable_symbol_t symbol, void *data) { +hsa_status_t HsaRsrcFactory::executable_symbols_cb(hsa_executable_t exec, + hsa_executable_symbol_t symbol, void* data) { hsa_symbol_kind_t value = (hsa_symbol_kind_t)0; - hsa_status_t status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_TYPE, &value); + hsa_status_t status = + hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_TYPE, &value); CHECK_STATUS("Error in getting symbol info", status); if (value == HSA_SYMBOL_KIND_KERNEL) { uint64_t addr = 0; uint32_t len = 0; - status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &addr); + status = hsa_api_.hsa_executable_symbol_get_info( + symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &addr); CHECK_STATUS("Error in getting kernel object", status); - status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, &len); + status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, + &len); CHECK_STATUS("Error in getting name len", status); - char *name = new char[len + 1]; + char* name = new char[len + 1]; status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME, name); CHECK_STATUS("Error in getting kernel name", status); name[len] = 0; @@ -733,12 +769,15 @@ hsa_status_t HsaRsrcFactory::executable_symbols_cb(hsa_executable_t exec, hsa_ex return HSA_STATUS_SUCCESS; } -hsa_status_t HsaRsrcFactory::hsa_executable_freeze_interceptor(hsa_executable_t executable, const char *options) { +hsa_status_t HsaRsrcFactory::hsa_executable_freeze_interceptor(hsa_executable_t executable, + const char* options) { std::lock_guard lck(mutex_); if (symbols_map_ == NULL) symbols_map_ = new symbols_map_t; - hsa_status_t status = hsa_api_.hsa_executable_iterate_symbols(executable, executable_symbols_cb, NULL); + hsa_status_t status = + hsa_api_.hsa_executable_iterate_symbols(executable, executable_symbols_cb, NULL); CHECK_STATUS("Error in iterating executable symbols", status); - return hsa_api_.hsa_executable_freeze(executable, options);; + return hsa_api_.hsa_executable_freeze(executable, options); + ; } void HsaRsrcFactory::DumpHandles(FILE* file) { @@ -746,10 +785,14 @@ void HsaRsrcFactory::DumpHandles(FILE* file) { auto end = agent_map_.end(); for (auto it = beg; it != end; ++it) { const AgentInfo* agent_info = it->second; - fprintf(file, "0x%lx agent %s\n", agent_info->dev_id.handle, (agent_info->dev_type == HSA_DEVICE_TYPE_CPU) ? "cpu" : "gpu"); - if (agent_info->cpu_pool.handle != 0) fprintf(file, "0x%lx pool cpu\n", agent_info->cpu_pool.handle); - if (agent_info->kern_arg_pool.handle != 0) fprintf(file, "0x%lx pool cpu kernarg\n", agent_info->kern_arg_pool.handle); - if (agent_info->gpu_pool.handle != 0) fprintf(file, "0x%lx pool gpu%u\n", agent_info->gpu_pool.handle, agent_info->dev_index); + fprintf(file, "0x%lx agent %s\n", agent_info->dev_id.handle, + (agent_info->dev_type == HSA_DEVICE_TYPE_CPU) ? "cpu" : "gpu"); + if (agent_info->cpu_pool.handle != 0) + fprintf(file, "0x%lx pool cpu\n", agent_info->cpu_pool.handle); + if (agent_info->kern_arg_pool.handle != 0) + fprintf(file, "0x%lx pool cpu kernarg\n", agent_info->kern_arg_pool.handle); + if (agent_info->gpu_pool.handle != 0) + fprintf(file, "0x%lx pool gpu%u\n", agent_info->gpu_pool.handle, agent_info->dev_index); } fflush(file); } diff --git a/src/util/hsa_rsrc_factory.h b/src/util/hsa_rsrc_factory.h index 9ed0025c59..325a3c9042 100644 --- a/src/util/hsa_rsrc_factory.h +++ b/src/util/hsa_rsrc_factory.h @@ -44,23 +44,25 @@ #define HSA_QUEUE_ALIGN_BYTES 64 #define HSA_PACKET_ALIGN_BYTES 64 -#define CHECK_STATUS(msg, status) do { \ - if ((status) != HSA_STATUS_SUCCESS) { \ - const char* emsg = 0; \ - hsa_status_string(status, &emsg); \ - printf("%s: %s\n", msg, emsg ? emsg : ""); \ - abort(); \ - } \ -} while (0) +#define CHECK_STATUS(msg, status) \ + do { \ + if ((status) != HSA_STATUS_SUCCESS) { \ + const char* emsg = 0; \ + hsa_status_string(status, &emsg); \ + printf("%s: %s\n", msg, emsg ? emsg : ""); \ + abort(); \ + } \ + } while (0) -#define CHECK_ITER_STATUS(msg, status) do { \ - if ((status) != HSA_STATUS_INFO_BREAK) { \ - const char* emsg = 0; \ - hsa_status_string(status, &emsg); \ - printf("%s: %s\n", msg, emsg ? emsg : ""); \ - abort(); \ - } \ -} while (0) +#define CHECK_ITER_STATUS(msg, status) \ + do { \ + if ((status) != HSA_STATUS_INFO_BREAK) { \ + const char* emsg = 0; \ + hsa_status_string(status, &emsg); \ + printf("%s: %s\n", msg, emsg ? emsg : ""); \ + abort(); \ + } \ + } while (0) namespace util { static const size_t MEM_PAGE_BYTES = 0x1000; @@ -174,15 +176,12 @@ class HsaTimer { static const timestamp_t TIMESTAMP_MAX = UINT64_MAX; typedef long double freq_t; - enum time_id_t { - TIME_ID_CLOCK_REALTIME = 0, - TIME_ID_CLOCK_MONOTONIC = 1, - TIME_ID_NUMBER - }; + enum time_id_t { TIME_ID_CLOCK_REALTIME = 0, TIME_ID_CLOCK_MONOTONIC = 1, TIME_ID_NUMBER }; HsaTimer(const hsa_pfn_t* hsa_api) : hsa_api_(hsa_api) { timestamp_t sysclock_hz = 0; - hsa_status_t status = hsa_api_->hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sysclock_hz); + hsa_status_t status = + hsa_api_->hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sysclock_hz); CHECK_STATUS("hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY)", status); sysclock_factor_ = (freq_t)1000000000 / (freq_t)sysclock_hz; } @@ -217,8 +216,8 @@ class HsaTimer { // Return pair of correlated values of profiling timestamp and time with // correlation error for a given time ID and number of iterations - void correlated_pair_ns(time_id_t time_id, uint32_t iters, - timestamp_t* timestamp_v, timestamp_t* time_v, timestamp_t* error_v) const { + void correlated_pair_ns(time_id_t time_id, uint32_t iters, timestamp_t* timestamp_v, + timestamp_t* time_v, timestamp_t* error_v) const { clockid_t clock_id = 0; switch (time_id) { case TIME_ID_CLOCK_REALTIME: @@ -357,7 +356,8 @@ class HsaRsrcFactory { uint8_t* AllocateCmdMemory(const AgentInfo* agent_info, size_t size); // Wait signal - hsa_signal_value_t SignalWait(const hsa_signal_t& signal, const hsa_signal_value_t& signal_value) const; + hsa_signal_value_t SignalWait(const hsa_signal_t& signal, + const hsa_signal_value_t& signal_value) const; // Wait signal with signal value restore void SignalWaitRestore(const hsa_signal_t& signal, const hsa_signal_value_t& signal_value) const; @@ -403,7 +403,9 @@ class HsaRsrcFactory { const hsa_ven_amd_loader_1_00_pfn_t* LoaderApi() const { return &loader_api_; } // Methods for system-clock/ns conversion and timestamp in 'ns' - timestamp_t SysclockToNs(const timestamp_t& sysclock) const { return timer_->sysclock_to_ns(sysclock); } + timestamp_t SysclockToNs(const timestamp_t& sysclock) const { + return timer_->sysclock_to_ns(sysclock); + } timestamp_t NsToSysclock(const timestamp_t& time) const { return timer_->ns_to_sysclock(time); } timestamp_t TimestampNs() const { return timer_->timestamp_ns(); } @@ -482,8 +484,10 @@ class HsaRsrcFactory { typedef std::map symbols_map_t; static symbols_map_t* symbols_map_; static bool executable_tracking_on_; - static hsa_status_t hsa_executable_freeze_interceptor(hsa_executable_t executable, const char *options); - static hsa_status_t executable_symbols_cb(hsa_executable_t exec, hsa_executable_symbol_t symbol, void *data); + static hsa_status_t hsa_executable_freeze_interceptor(hsa_executable_t executable, + const char* options); + static hsa_status_t executable_symbols_cb(hsa_executable_t exec, hsa_executable_symbol_t symbol, + void* data); // HSA runtime API table static hsa_pfn_t hsa_api_; @@ -507,8 +511,8 @@ class HsaRsrcFactory { timestamp_t time_error_[HsaTimer::TIME_ID_NUMBER]; // CPU/kern-arg memory pools - hsa_amd_memory_pool_t *cpu_pool_; - hsa_amd_memory_pool_t *kern_arg_pool_; + hsa_amd_memory_pool_t* cpu_pool_; + hsa_amd_memory_pool_t* kern_arg_pool_; }; } // namespace util diff --git a/src/util/logger.h b/src/util/logger.h index 37e1c3ec31..e4927df2f0 100644 --- a/src/util/logger.h +++ b/src/util/logger.h @@ -168,32 +168,32 @@ class Logger { } // namespace roctracer #define ERR_LOGGING(stream) \ - do { \ - roctracer::util::Logger::Instance() << "error: " << roctracer::util::Logger::begm \ - << stream << roctracer::util::Logger::endl; \ - } while(0) + do { \ + roctracer::util::Logger::Instance() \ + << "error: " << roctracer::util::Logger::begm << stream << roctracer::util::Logger::endl; \ + } while (0) #define INFO_LOGGING(stream) \ - do { \ - roctracer::util::Logger::Instance() << "info: " << roctracer::util::Logger::begm << stream \ - << roctracer::util::Logger::endl; \ - } while(0) + do { \ + roctracer::util::Logger::Instance() \ + << "info: " << roctracer::util::Logger::begm << stream << roctracer::util::Logger::endl; \ + } while (0) #define WARN_LOGGING(stream) \ - do { \ - std::cerr << "ROCProfiler: " << stream << std::endl; \ - roctracer::util::Logger::Instance() << "warning: " << roctracer::util::Logger::begm << stream \ - << roctracer::util::Logger::endl; \ - } while(0) + do { \ + std::cerr << "ROCProfiler: " << stream << std::endl; \ + roctracer::util::Logger::Instance() << "warning: " << roctracer::util::Logger::begm << stream \ + << roctracer::util::Logger::endl; \ + } while (0) #ifdef DEBUG #define DBG_LOGGING(stream) \ - do { \ - roctracer::util::Logger::Instance() << roctracer::util::Logger::begm << "debug: \"" \ - << stream << "\"" < < < < \ + do { \ + roctracer::util::Logger::Instance() \ + << roctracer::util::Logger::begm << "debug: \"" << stream << "\"" < < < < \ " in " << __FUNCTION__ << " at " << __FILE__ << " line " << __LINE__ \ - << roctracer::util::Logger::endl; \ - } while(0) + << roctracer::util::Logger::endl; \ + } while (0) #endif #if DEBUG_TRACE_ON @@ -204,8 +204,8 @@ inline static void DEBUG_TRACE(const char* fmt, ...) { va_list valist; va_start(valist, fmt); vsnprintf(buf, size, fmt, valist); - printf("%u:%u %s", - roctracer::util::Logger::GetPid(), roctracer::util::Logger::GetTid(), buf); fflush(stdout); + printf("%u:%u %s", roctracer::util::Logger::GetPid(), roctracer::util::Logger::GetTid(), buf); + fflush(stdout); va_end(valist); } #else diff --git a/test/MatrixTranspose/MatrixTranspose.cpp b/test/MatrixTranspose/MatrixTranspose.cpp index c50b67d2e2..95d699f341 100644 --- a/test/MatrixTranspose/MatrixTranspose.cpp +++ b/test/MatrixTranspose/MatrixTranspose.cpp @@ -37,57 +37,56 @@ #define THREADS_PER_BLOCK_Z 1 // Mark API -extern "C" -void roctracer_mark(const char* str); +extern "C" void roctracer_mark(const char* str); // Device (Kernel) function, it must be void __global__ void matrixTranspose(float* out, float* in, const int width) { - int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; - int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; + int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; + int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; - out[y * width + x] = in[x * width + y]; + out[y * width + x] = in[x * width + y]; } // CPU implementation of matrix transpose void matrixTransposeCPUReference(float* output, float* input, const unsigned int width) { - for (unsigned int j = 0; j < width; j++) { - for (unsigned int i = 0; i < width; i++) { - output[i * width + j] = input[j * width + i]; - } + for (unsigned int j = 0; j < width; j++) { + for (unsigned int i = 0; i < width; i++) { + output[i * width + j] = input[j * width + i]; } + } } int main() { - float* Matrix; - float* TransposeMatrix; - float* cpuTransposeMatrix; + float* Matrix; + float* TransposeMatrix; + float* cpuTransposeMatrix; - float* gpuMatrix; - float* gpuTransposeMatrix; + float* gpuMatrix; + float* gpuTransposeMatrix; - hipDeviceProp_t devProp; - hipGetDeviceProperties(&devProp, 0); + hipDeviceProp_t devProp; + hipGetDeviceProperties(&devProp, 0); - std::cout << "Device name " << devProp.name << std::endl; + std::cout << "Device name " << devProp.name << std::endl; - int i; - int errors; + int i; + int errors; - Matrix = (float*)malloc(NUM * sizeof(float)); - TransposeMatrix = (float*)malloc(NUM * sizeof(float)); - cpuTransposeMatrix = (float*)malloc(NUM * sizeof(float)); + Matrix = (float*)malloc(NUM * sizeof(float)); + TransposeMatrix = (float*)malloc(NUM * sizeof(float)); + cpuTransposeMatrix = (float*)malloc(NUM * sizeof(float)); - // initialize the input data - for (i = 0; i < NUM; i++) { - Matrix[i] = (float)i * 10.0f; - } + // initialize the input data + for (i = 0; i < NUM; i++) { + Matrix[i] = (float)i * 10.0f; + } - // allocate the memory on the device side - hipMalloc((void**)&gpuMatrix, NUM * sizeof(float)); - hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)); + // allocate the memory on the device side + hipMalloc((void**)&gpuMatrix, NUM * sizeof(float)); + hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)); - uint32_t iterations = 100; - while (iterations-- > 0) { + uint32_t iterations = 100; + while (iterations-- > 0) { std::cout << "## Iteration (" << iterations << ") #################" << std::endl; // Memory transfer from host to device @@ -98,9 +97,9 @@ int main() { int rangeId = roctxRangeStart("hipLaunchKernel range"); roctxRangePush("hipLaunchKernel"); // Lauching kernel from host - hipLaunchKernelGGL(matrixTranspose, dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y), - dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, gpuTransposeMatrix, - gpuMatrix, WIDTH); + hipLaunchKernelGGL( + matrixTranspose, dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y), + dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, gpuTransposeMatrix, gpuMatrix, WIDTH); roctracer_mark("after HIP LaunchKernel"); roctxMark("after hipLaunchKernel"); @@ -109,8 +108,8 @@ int main() { hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost); - roctxRangePop(); // for "hipMemcpy" - roctxRangePop(); // for "hipLaunchKernel" + roctxRangePop(); // for "hipMemcpy" + roctxRangePop(); // for "hipLaunchKernel" roctxRangeStop(rangeId); // CPU MatrixTranspose computation @@ -120,26 +119,25 @@ int main() { errors = 0; double eps = 1.0E-6; for (i = 0; i < NUM; i++) { - if (std::abs(TransposeMatrix[i] - cpuTransposeMatrix[i]) > eps) { - errors++; - } + if (std::abs(TransposeMatrix[i] - cpuTransposeMatrix[i]) > eps) { + errors++; + } } if (errors != 0) { - printf("FAILED: %d errors\n", errors); + printf("FAILED: %d errors\n", errors); } else { - printf("PASSED!\n"); + printf("PASSED!\n"); } + } - } + // free the resources on device side + hipFree(gpuMatrix); + hipFree(gpuTransposeMatrix); - // free the resources on device side - hipFree(gpuMatrix); - hipFree(gpuTransposeMatrix); + // free the resources on host side + free(Matrix); + free(TransposeMatrix); + free(cpuTransposeMatrix); - // free the resources on host side - free(Matrix); - free(TransposeMatrix); - free(cpuTransposeMatrix); - - return errors; + return errors; } diff --git a/test/MatrixTranspose_test/MatrixTranspose.cpp b/test/MatrixTranspose_test/MatrixTranspose.cpp index 86f14e2e65..b3a607a32c 100644 --- a/test/MatrixTranspose_test/MatrixTranspose.cpp +++ b/test/MatrixTranspose_test/MatrixTranspose.cpp @@ -45,7 +45,7 @@ static char* message = NULL; #endif void SPRINT(const char* fmt, ...) { if (msg_buf == NULL) { - msg_buf = (char*) calloc(msg_size, 1); + msg_buf = (char*)calloc(msg_size, 1); message = msg_buf; } @@ -66,13 +66,18 @@ void SFLUSH() { // hip header file #include // Macro to call HIP API -#define HIP_CALL(call) do { call; } while(0) +#define HIP_CALL(call) \ + do { \ + call; \ + } while (0) #else -#define HIP_CALL(call) do {} while(0) +#define HIP_CALL(call) \ + do { \ + } while (0) #endif #ifndef ITERATIONS -# define ITERATIONS 101 +#define ITERATIONS 101 #endif #define WIDTH 1024 #define NUM (WIDTH * WIDTH) @@ -83,20 +88,20 @@ void SFLUSH() { #if HIP_TEST // Device (Kernel) function, it must be void __global__ void matrixTranspose(float* out, float* in, const int width) { - int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; - int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; + int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; + int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y; - out[y * width + x] = in[x * width + y]; + out[y * width + x] = in[x * width + y]; } #endif // CPU implementation of matrix transpose void matrixTransposeCPUReference(float* output, float* input, const unsigned int width) { - for (unsigned int j = 0; j < width; j++) { - for (unsigned int i = 0; i < width; i++) { - output[i * width + j] = input[j * width + i]; - } + for (unsigned int j = 0; j < width; j++) { + for (unsigned int i = 0; i < width; i++) { + output[i * width + j] = input[j * width + i]; } + } } int iterations = ITERATIONS; @@ -105,28 +110,28 @@ void start_tracing(); void stop_tracing(); int main() { - float* Matrix; - float* TransposeMatrix; - float* cpuTransposeMatrix; + float* Matrix; + float* TransposeMatrix; + float* cpuTransposeMatrix; - float* gpuMatrix; - float* gpuTransposeMatrix; + float* gpuMatrix; + float* gpuTransposeMatrix; - int i; - int errors; + int i; + int errors; - init_tracing(); + init_tracing(); #if HIP_TEST - int gpuCount = 1; + int gpuCount = 1; #if MGPU_TEST - hipGetDeviceCount(&gpuCount); - printf("Number of GPUs: %d\n", gpuCount); + hipGetDeviceCount(&gpuCount); + printf("Number of GPUs: %d\n", gpuCount); #endif - iterations *= gpuCount; + iterations *= gpuCount; #endif - while (iterations-- > 0) { + while (iterations-- > 0) { start_tracing(); #if HIP_TEST @@ -145,7 +150,7 @@ int main() { // initialize the input data for (i = 0; i < NUM; i++) { - Matrix[i] = (float)i * 10.0f; + Matrix[i] = (float)i * 10.0f; } // allocate the memory on the device side @@ -167,9 +172,10 @@ int main() { roctxRangePush("hipLaunchKernel"); // Lauching kernel from host - HIP_CALL(hipLaunchKernelGGL(matrixTranspose, dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y), - dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, gpuTransposeMatrix, - gpuMatrix, WIDTH)); + HIP_CALL(hipLaunchKernelGGL(matrixTranspose, + dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y), + dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, + gpuTransposeMatrix, gpuMatrix, WIDTH)); roctxMark("after hipLaunchKernel"); @@ -179,10 +185,11 @@ int main() { // Memory transfer from device to host roctxRangePush("hipMemcpy"); - HIP_CALL(hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost)); + HIP_CALL( + hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost)); - roctxRangePop(); // for "hipMemcpy" - roctxRangePop(); // for "hipLaunchKernel" + roctxRangePop(); // for "hipMemcpy" + roctxRangePop(); // for "hipLaunchKernel" // correlation reagion end roctracer_activity_pop_external_correlation_id(NULL); @@ -194,15 +201,15 @@ int main() { errors = 0; double eps = 1.0E-6; for (i = 0; i < NUM; i++) { - if (abs(TransposeMatrix[i] - cpuTransposeMatrix[i]) > eps) { - errors++; - } + if (abs(TransposeMatrix[i] - cpuTransposeMatrix[i]) > eps) { + errors++; + } } if ((HIP_TEST != 0) && (errors != 0)) { - printf("FAILED: %d errors\n", errors); + printf("FAILED: %d errors\n", errors); } else { - errors = 0; - printf("PASSED!\n"); + errors = 0; + printf("PASSED!\n"); } // free the resources on device side @@ -218,11 +225,11 @@ int main() { free(Matrix); free(TransposeMatrix); free(cpuTransposeMatrix); - } + } - stop_tracing(); + stop_tracing(); - return errors; + return errors; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -234,15 +241,15 @@ int main() { #include #include -#include -#include /* For SYS_xxx definitions */ +#include +#include /* For SYS_xxx definitions */ // Macro to check ROC-tracer calls status #define ROCTRACER_CALL(call) \ do { \ int err = call; \ if (err != 0) { \ - fprintf(stderr, "%s\n", roctracer_error_string()); \ + fprintf(stderr, "%s\n", roctracer_error_string()); \ abort(); \ } \ } while (0) @@ -252,12 +259,7 @@ static inline uint32_t GetPid() { return syscall(__NR_getpid); } // Runtime API callback function -void api_callback( - uint32_t domain, - uint32_t cid, - const void* callback_data, - void* arg) -{ +void api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg) { (void)arg; if (domain == ACTIVITY_DOMAIN_ROCTX) { @@ -267,31 +269,25 @@ void api_callback( } const hip_api_data_t* data = (const hip_api_data_t*)(callback_data); SPRINT("<%s id(%u)\tcorrelation_id(%lu) %s pid(%d) tid(%d)> ", - roctracer_op_string(ACTIVITY_DOMAIN_HIP_API, cid, 0), - cid, - data->correlation_id, - (data->phase == ACTIVITY_API_PHASE_ENTER) ? "on-enter" : "on-exit", GetPid(), GetTid()); + roctracer_op_string(ACTIVITY_DOMAIN_HIP_API, cid, 0), cid, data->correlation_id, + (data->phase == ACTIVITY_API_PHASE_ENTER) ? "on-enter" : "on-exit", GetPid(), GetTid()); if (data->phase == ACTIVITY_API_PHASE_ENTER) { switch (cid) { case HIP_API_ID_hipMemcpy: - SPRINT("dst(%p) src(%p) size(0x%x) kind(%u)", - data->args.hipMemcpy.dst, - data->args.hipMemcpy.src, - (uint32_t)(data->args.hipMemcpy.sizeBytes), - (uint32_t)(data->args.hipMemcpy.kind)); + SPRINT("dst(%p) src(%p) size(0x%x) kind(%u)", data->args.hipMemcpy.dst, + data->args.hipMemcpy.src, (uint32_t)(data->args.hipMemcpy.sizeBytes), + (uint32_t)(data->args.hipMemcpy.kind)); break; case HIP_API_ID_hipMalloc: - SPRINT("ptr(%p) size(0x%x)", - data->args.hipMalloc.ptr, - (uint32_t)(data->args.hipMalloc.size)); + SPRINT("ptr(%p) size(0x%x)", data->args.hipMalloc.ptr, + (uint32_t)(data->args.hipMalloc.size)); break; case HIP_API_ID_hipFree: SPRINT("ptr(%p)", data->args.hipFree.ptr); break; case HIP_API_ID_hipModuleLaunchKernel: - SPRINT("kernel(\"%s\") stream(%p)", - hipKernelNameRef(data->args.hipModuleLaunchKernel.f), - data->args.hipModuleLaunchKernel.stream); + SPRINT("kernel(\"%s\") stream(%p)", hipKernelNameRef(data->args.hipModuleLaunchKernel.f), + data->args.hipModuleLaunchKernel.stream); break; default: break; @@ -316,26 +312,17 @@ void activity_callback(const char* begin, const char* end, void* arg) { SPRINT("\tActivity records:\n"); while (record < end_record) { - const char * name = roctracer_op_string(record->domain, record->op, record->kind); - SPRINT("\t%s\tcorrelation_id(%lu) time_ns(%lu:%lu)", - name, - record->correlation_id, - record->begin_ns, - record->end_ns); + const char* name = roctracer_op_string(record->domain, record->op, record->kind); + SPRINT("\t%s\tcorrelation_id(%lu) time_ns(%lu:%lu)", name, record->correlation_id, + record->begin_ns, record->end_ns); if (record->domain == ACTIVITY_DOMAIN_HIP_API) { - SPRINT(" process_id(%u) thread_id(%u)", - record->process_id, - record->thread_id); + SPRINT(" process_id(%u) thread_id(%u)", record->process_id, record->thread_id); } else if (record->domain == ACTIVITY_DOMAIN_HCC_OPS) { - SPRINT(" device_id(%d) queue_id(%lu)", - record->device_id, - record->queue_id); + SPRINT(" device_id(%d) queue_id(%lu)", record->device_id, record->queue_id); if (record->op == HIP_OP_ID_COPY) SPRINT(" bytes(0x%zx)", record->bytes); } else if (record->domain == ACTIVITY_DOMAIN_HSA_OPS) { - SPRINT(" se(%u) cycle(%lu) pc(%lx)", - record->pc_sample.se, - record->pc_sample.cycle, - record->pc_sample.pc); + SPRINT(" se(%u) cycle(%lu) pc(%lx)", record->pc_sample.se, record->pc_sample.cycle, + record->pc_sample.pc); } else if (record->domain == ACTIVITY_DOMAIN_EXT_API) { SPRINT(" external_id(%lu)", record->external_id); } else { @@ -377,8 +364,10 @@ void init_tracing() { void start_tracing() { printf("# START (%d) #############################\n", iterations); // Start - if ((iterations & 1) == 1) roctracer_start(); - else roctracer_stop(); + if ((iterations & 1) == 1) + roctracer_start(); + else + roctracer_stop(); } // Stop tracing routine diff --git a/test/app/codeobj_test.cpp b/test/app/codeobj_test.cpp index f6a8416ccd..4593a8f4e9 100644 --- a/test/app/codeobj_test.cpp +++ b/test/app/codeobj_test.cpp @@ -43,19 +43,18 @@ void check_status(roctracer_status_t status) { void codeobj_callback(uint32_t domain, uint32_t cid, const void* data, void* arg) { const hsa_evt_data_t* evt_data = reinterpret_cast(data); const char* uri = evt_data->codeobj.uri; - printf("codeobj_callback domain(%u) cid(%u): load_base(0x%lx) load_size(0x%lx) load_delta(0x%lx) uri(\"%s\")\n", - domain, - cid, - evt_data->codeobj.load_base, - evt_data->codeobj.load_size, - evt_data->codeobj.load_delta, - uri); + printf( + "codeobj_callback domain(%u) cid(%u): load_base(0x%lx) load_size(0x%lx) load_delta(0x%lx) " + "uri(\"%s\")\n", + domain, cid, evt_data->codeobj.load_base, evt_data->codeobj.load_size, + evt_data->codeobj.load_delta, uri); free((void*)uri); fflush(stdout); } void initialize() { - roctracer_status_t status = roctracer_enable_op_callback(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_CODEOBJ, codeobj_callback, NULL); + roctracer_status_t status = roctracer_enable_op_callback( + ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_CODEOBJ, codeobj_callback, NULL); check_status(status); } @@ -79,9 +78,8 @@ extern "C" PUBLIC_API void OnUnloadTool() { } extern "C" CONSTRUCTOR_API void constructor() { - printf("constructor\n"); fflush(stdout); + printf("constructor\n"); + fflush(stdout); } -extern "C" DESTRUCTOR_API void destructor() { - OnUnloadTool(); -} +extern "C" DESTRUCTOR_API void destructor() { OnUnloadTool(); } diff --git a/test/app/hsaco_test.cpp b/test/app/hsaco_test.cpp index 733e3c799a..2cc7c1e2cc 100644 --- a/test/app/hsaco_test.cpp +++ b/test/app/hsaco_test.cpp @@ -28,25 +28,24 @@ #define CONSTRUCTOR_API __attribute__((constructor)) #define DESTRUCTOR_API __attribute__((destructor)) -#define HSA_RT(call) \ - do { \ - const hsa_status_t status = call; \ - if (status != HSA_STATUS_SUCCESS) { \ - printf("error \"%s\"\n", #call); fflush(stdout); \ - abort(); \ - } \ - } while(0) +#define HSA_RT(call) \ + do { \ + const hsa_status_t status = call; \ + if (status != HSA_STATUS_SUCCESS) { \ + printf("error \"%s\"\n", #call); \ + fflush(stdout); \ + abort(); \ + } \ + } while (0) // HSA API intercepting primitives decltype(hsa_executable_freeze)* hsa_executable_freeze_fn; hsa_ven_amd_loader_1_01_pfn_t loader_api_table{}; -hsa_status_t code_object_callback( - hsa_executable_t executable, - hsa_loaded_code_object_t loaded_code_object, - void* arg) -{ - printf("code_object_callback\n"); fflush(stdout); +hsa_status_t code_object_callback(hsa_executable_t executable, + hsa_loaded_code_object_t loaded_code_object, void* arg) { + printf("code_object_callback\n"); + fflush(stdout); uint64_t load_base = 0; uint64_t load_size = 0; @@ -55,21 +54,13 @@ hsa_status_t code_object_callback( char* uri_str = NULL; HSA_RT(loader_api_table.hsa_ven_amd_loader_loaded_code_object_get_info( - loaded_code_object, - HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_BASE, - &load_base)); + loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_BASE, &load_base)); HSA_RT(loader_api_table.hsa_ven_amd_loader_loaded_code_object_get_info( - loaded_code_object, - HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_SIZE, - &load_size)); + loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_SIZE, &load_size)); HSA_RT(loader_api_table.hsa_ven_amd_loader_loaded_code_object_get_info( - loaded_code_object, - HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_DELTA, - &load_delta)); + loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_DELTA, &load_delta)); HSA_RT(loader_api_table.hsa_ven_amd_loader_loaded_code_object_get_info( - loaded_code_object, - HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI_LENGTH, - &uri_len)); + loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI_LENGTH, &uri_len)); uri_str = (char*)calloc(uri_len + 1, sizeof(char)); if (!uri_str) { @@ -78,63 +69,59 @@ hsa_status_t code_object_callback( } HSA_RT(loader_api_table.hsa_ven_amd_loader_loaded_code_object_get_info( - loaded_code_object, - HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI, - uri_str)); + loaded_code_object, HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI, uri_str)); - printf("load_base(0x%lx)\n", load_base); fflush(stdout); - printf("load_size(0x%lx)\n", load_size); fflush(stdout); - printf("load_delta(0x%lx)\n", load_delta); fflush(stdout); - printf("uri_len(%u)\n", uri_len); fflush(stdout); - printf("uri_str(\"%s\")\n", uri_str); fflush(stdout); + printf("load_base(0x%lx)\n", load_base); + fflush(stdout); + printf("load_size(0x%lx)\n", load_size); + fflush(stdout); + printf("load_delta(0x%lx)\n", load_delta); + fflush(stdout); + printf("uri_len(%u)\n", uri_len); + fflush(stdout); + printf("uri_str(\"%s\")\n", uri_str); + fflush(stdout); free(uri_str); return HSA_STATUS_SUCCESS; } -hsa_status_t hsa_executable_freeze_interceptor( - hsa_executable_t executable, - const char *options) -{ +hsa_status_t hsa_executable_freeze_interceptor(hsa_executable_t executable, const char* options) { HSA_RT(loader_api_table.hsa_ven_amd_loader_executable_iterate_loaded_code_objects( - executable, - code_object_callback, - NULL)); - HSA_RT(hsa_executable_freeze_fn( - executable, - options)); + executable, code_object_callback, NULL)); + HSA_RT(hsa_executable_freeze_fn(executable, options)); return HSA_STATUS_SUCCESS; } // HSA-runtime tool on-load method -extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, - uint64_t runtime_version, +extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count, - const char* const* failed_tool_names) -{ - printf("OnLoad: begin\n"); fflush(stdout); + const char* const* failed_tool_names) { + printf("OnLoad: begin\n"); + fflush(stdout); // intercepting hsa_executable_freeze API hsa_executable_freeze_fn = table->core_->hsa_executable_freeze_fn; table->core_->hsa_executable_freeze_fn = hsa_executable_freeze_interceptor; // Fetching AMD Loader HSA extension API HSA_RT(hsa_system_get_major_extension_table( - HSA_EXTENSION_AMD_LOADER, - 1, - sizeof(hsa_ven_amd_loader_1_01_pfn_t), - &loader_api_table)); - printf("OnLoad: end\n"); fflush(stdout); + HSA_EXTENSION_AMD_LOADER, 1, sizeof(hsa_ven_amd_loader_1_01_pfn_t), &loader_api_table)); + printf("OnLoad: end\n"); + fflush(stdout); return true; } extern "C" PUBLIC_API void OnUnload() { - printf("OnUnload\n"); fflush(stdout); + printf("OnUnload\n"); + fflush(stdout); } extern "C" CONSTRUCTOR_API void constructor() { - printf("constructor\n"); fflush(stdout); + printf("constructor\n"); + fflush(stdout); } extern "C" DESTRUCTOR_API void destructor() { - printf("destructor\n"); fflush(stdout); + printf("destructor\n"); + fflush(stdout); } diff --git a/test/hsa/src/hsa_rsrc_factory.cpp b/test/hsa/src/hsa_rsrc_factory.cpp index 0c1b094bb6..66510fbaf0 100644 --- a/test/hsa/src/hsa_rsrc_factory.cpp +++ b/test/hsa/src/hsa_rsrc_factory.cpp @@ -67,13 +67,15 @@ static hsa_status_t FindGlobalPool(hsa_amd_memory_pool_t pool, void* data, bool return HSA_STATUS_ERROR_INVALID_ARGUMENT; } - err = HsaRsrcFactory::HsaApi()->hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment); + err = HsaRsrcFactory::HsaApi()->hsa_amd_memory_pool_get_info( + pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment); CHECK_STATUS("hsa_amd_memory_pool_get_info", err); if (HSA_AMD_SEGMENT_GLOBAL != segment) { return HSA_STATUS_SUCCESS; } - err = HsaRsrcFactory::HsaApi()->hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flag); + err = HsaRsrcFactory::HsaApi()->hsa_amd_memory_pool_get_info( + pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flag); CHECK_STATUS("hsa_amd_memory_pool_get_info", err); uint32_t karg_st = flag & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT; @@ -126,19 +128,22 @@ HsaRsrcFactory::HsaRsrcFactory(bool initialize_hsa) : initialize_hsa_(initialize #ifdef ROCP_LD_AQLPROFILE status = LoadAqlProfileLib(&aqlprofile_api_); #else - status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_AQLPROFILE, hsa_ven_amd_aqlprofile_VERSION_MAJOR, sizeof(aqlprofile_api_), &aqlprofile_api_); + status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_AQLPROFILE, + hsa_ven_amd_aqlprofile_VERSION_MAJOR, + sizeof(aqlprofile_api_), &aqlprofile_api_); #endif CHECK_STATUS("aqlprofile API table load failed", status); // Get Loader API table loader_api_ = {0}; - status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_LOADER, 1, sizeof(loader_api_), &loader_api_); + status = hsa_api_.hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_LOADER, 1, + sizeof(loader_api_), &loader_api_); CHECK_STATUS("loader API table query failed", status); // Instantiate HSA timer timer_ = new HsaTimer(&hsa_api_); CHECK_STATUS("HSA timer allocation failed", - (timer_ == NULL) ? HSA_STATUS_ERROR : HSA_STATUS_SUCCESS); + (timer_ == NULL) ? HSA_STATUS_ERROR : HSA_STATUS_SUCCESS); // Time correlation const uint32_t corr_iters = 1000; @@ -146,7 +151,8 @@ HsaRsrcFactory::HsaRsrcFactory(bool initialize_hsa) : initialize_hsa_(initialize CorrelateTime(HsaTimer::TIME_ID_CLOCK_MONOTONIC, corr_iters); // System timeout - timeout_ = (timeout_ns_ == HsaTimer::TIMESTAMP_MAX) ? timeout_ns_ : timer_->ns_to_sysclock(timeout_ns_); + timeout_ = + (timeout_ns_ == HsaTimer::TIMESTAMP_MAX) ? timeout_ns_ : timer_->ns_to_sysclock(timeout_ns_); } // Destructor of the class @@ -172,9 +178,12 @@ void HsaRsrcFactory::InitHsaApiTable(HsaApiTable* table) { hsa_api_.hsa_queue_create = table->core_->hsa_queue_create_fn; hsa_api_.hsa_queue_destroy = table->core_->hsa_queue_destroy_fn; - hsa_api_.hsa_queue_load_write_index_relaxed = table->core_->hsa_queue_load_write_index_relaxed_fn; - hsa_api_.hsa_queue_store_write_index_relaxed = table->core_->hsa_queue_store_write_index_relaxed_fn; - hsa_api_.hsa_queue_load_read_index_relaxed = table->core_->hsa_queue_load_read_index_relaxed_fn; + hsa_api_.hsa_queue_load_write_index_relaxed = + table->core_->hsa_queue_load_write_index_relaxed_fn; + hsa_api_.hsa_queue_store_write_index_relaxed = + table->core_->hsa_queue_store_write_index_relaxed_fn; + hsa_api_.hsa_queue_load_read_index_relaxed = + table->core_->hsa_queue_load_read_index_relaxed_fn; hsa_api_.hsa_signal_create = table->core_->hsa_signal_create_fn; hsa_api_.hsa_signal_destroy = table->core_->hsa_signal_destroy_fn; @@ -183,27 +192,34 @@ void HsaRsrcFactory::InitHsaApiTable(HsaApiTable* table) { hsa_api_.hsa_signal_wait_scacquire = table->core_->hsa_signal_wait_scacquire_fn; hsa_api_.hsa_signal_store_screlease = table->core_->hsa_signal_store_screlease_fn; - hsa_api_.hsa_code_object_reader_create_from_file = table->core_->hsa_code_object_reader_create_from_file_fn; + hsa_api_.hsa_code_object_reader_create_from_file = + table->core_->hsa_code_object_reader_create_from_file_fn; hsa_api_.hsa_executable_create_alt = table->core_->hsa_executable_create_alt_fn; - hsa_api_.hsa_executable_load_agent_code_object = table->core_->hsa_executable_load_agent_code_object_fn; + hsa_api_.hsa_executable_load_agent_code_object = + table->core_->hsa_executable_load_agent_code_object_fn; hsa_api_.hsa_executable_freeze = table->core_->hsa_executable_freeze_fn; hsa_api_.hsa_executable_get_symbol = table->core_->hsa_executable_get_symbol_fn; hsa_api_.hsa_executable_symbol_get_info = table->core_->hsa_executable_symbol_get_info_fn; hsa_api_.hsa_executable_iterate_symbols = table->core_->hsa_executable_iterate_symbols_fn; hsa_api_.hsa_system_get_info = table->core_->hsa_system_get_info_fn; - hsa_api_.hsa_system_get_major_extension_table = table->core_->hsa_system_get_major_extension_table_fn; + hsa_api_.hsa_system_get_major_extension_table = + table->core_->hsa_system_get_major_extension_table_fn; - hsa_api_.hsa_amd_agent_iterate_memory_pools = table->amd_ext_->hsa_amd_agent_iterate_memory_pools_fn; + hsa_api_.hsa_amd_agent_iterate_memory_pools = + table->amd_ext_->hsa_amd_agent_iterate_memory_pools_fn; hsa_api_.hsa_amd_memory_pool_get_info = table->amd_ext_->hsa_amd_memory_pool_get_info_fn; hsa_api_.hsa_amd_memory_pool_allocate = table->amd_ext_->hsa_amd_memory_pool_allocate_fn; hsa_api_.hsa_amd_agents_allow_access = table->amd_ext_->hsa_amd_agents_allow_access_fn; hsa_api_.hsa_amd_memory_async_copy = table->amd_ext_->hsa_amd_memory_async_copy_fn; hsa_api_.hsa_amd_signal_async_handler = table->amd_ext_->hsa_amd_signal_async_handler_fn; - hsa_api_.hsa_amd_profiling_set_profiler_enabled = table->amd_ext_->hsa_amd_profiling_set_profiler_enabled_fn; - hsa_api_.hsa_amd_profiling_get_async_copy_time = table->amd_ext_->hsa_amd_profiling_get_async_copy_time_fn; - hsa_api_.hsa_amd_profiling_get_dispatch_time = table->amd_ext_->hsa_amd_profiling_get_dispatch_time_fn; + hsa_api_.hsa_amd_profiling_set_profiler_enabled = + table->amd_ext_->hsa_amd_profiling_set_profiler_enabled_fn; + hsa_api_.hsa_amd_profiling_get_async_copy_time = + table->amd_ext_->hsa_amd_profiling_get_async_copy_time_fn; + hsa_api_.hsa_amd_profiling_get_dispatch_time = + table->amd_ext_->hsa_amd_profiling_get_dispatch_time_fn; } else { hsa_api_.hsa_init = hsa_init; hsa_api_.hsa_shut_down = hsa_shut_down; @@ -298,10 +314,13 @@ const AgentInfo* HsaRsrcFactory::AddAgentInfo(const hsa_agent_t agent) { agent_info->dev_type = HSA_DEVICE_TYPE_CPU; agent_info->dev_index = cpu_list_.size(); - status = hsa_api_.hsa_amd_agent_iterate_memory_pools(agent, FindStandardPool, &agent_info->cpu_pool); + status = + hsa_api_.hsa_amd_agent_iterate_memory_pools(agent, FindStandardPool, &agent_info->cpu_pool); if ((status == HSA_STATUS_INFO_BREAK) && (cpu_pool_ == NULL)) cpu_pool_ = &agent_info->cpu_pool; - status = hsa_api_.hsa_amd_agent_iterate_memory_pools(agent, FindKernArgPool, &agent_info->kern_arg_pool); - if ((status == HSA_STATUS_INFO_BREAK) && (kern_arg_pool_ == NULL)) kern_arg_pool_ = &agent_info->kern_arg_pool; + status = hsa_api_.hsa_amd_agent_iterate_memory_pools(agent, FindKernArgPool, + &agent_info->kern_arg_pool); + if ((status == HSA_STATUS_INFO_BREAK) && (kern_arg_pool_ == NULL)) + kern_arg_pool_ = &agent_info->kern_arg_pool; agent_info->gpu_pool = {}; cpu_list_.push_back(agent_info); @@ -319,21 +338,26 @@ const AgentInfo* HsaRsrcFactory::AddAgentInfo(const hsa_agent_t agent) { hsa_api_.hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &agent_info->max_queue_size); hsa_api_.hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &agent_info->profile); agent_info->is_apu = (agent_info->profile == HSA_PROFILE_FULL) ? true : false; - hsa_api_.hsa_agent_get_info(agent, static_cast(HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT), - &agent_info->cu_num); - hsa_api_.hsa_agent_get_info(agent, static_cast(HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU), - &agent_info->waves_per_cu); - hsa_api_.hsa_agent_get_info(agent, static_cast(HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU), - &agent_info->simds_per_cu); - hsa_api_.hsa_agent_get_info(agent, static_cast(HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES), - &agent_info->se_num); + hsa_api_.hsa_agent_get_info( + agent, static_cast(HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT), + &agent_info->cu_num); hsa_api_.hsa_agent_get_info(agent, - static_cast(HSA_AMD_AGENT_INFO_NUM_SHADER_ARRAYS_PER_SE), - &agent_info->shader_arrays_per_se); + static_cast(HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU), + &agent_info->waves_per_cu); + hsa_api_.hsa_agent_get_info(agent, + static_cast(HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU), + &agent_info->simds_per_cu); + hsa_api_.hsa_agent_get_info( + agent, static_cast(HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES), + &agent_info->se_num); + hsa_api_.hsa_agent_get_info( + agent, static_cast(HSA_AMD_AGENT_INFO_NUM_SHADER_ARRAYS_PER_SE), + &agent_info->shader_arrays_per_se); agent_info->cpu_pool = {}; agent_info->kern_arg_pool = {}; - status = hsa_api_.hsa_amd_agent_iterate_memory_pools(agent, FindStandardPool, &agent_info->gpu_pool); + status = + hsa_api_.hsa_amd_agent_iterate_memory_pools(agent, FindStandardPool, &agent_info->gpu_pool); CHECK_ITER_STATUS("hsa_amd_agent_iterate_memory_pools(gpu pool)", status); // GFX8 and GFX9 SGPR/VGPR block sizes @@ -430,7 +454,7 @@ bool HsaRsrcFactory::CreateQueue(const AgentInfo* agent_info, uint32_t num_pkts, hsa_queue_t** queue) { hsa_status_t status; status = hsa_api_.hsa_queue_create(agent_info->dev_id, num_pkts, HSA_QUEUE_TYPE_MULTI, NULL, NULL, - UINT32_MAX, UINT32_MAX, queue); + UINT32_MAX, UINT32_MAX, queue); return (status == HSA_STATUS_SUCCESS); } @@ -453,7 +477,8 @@ uint8_t* HsaRsrcFactory::AllocateLocalMemory(const AgentInfo* agent_info, size_t hsa_status_t status = HSA_STATUS_ERROR; uint8_t* buffer = NULL; size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK; - status = hsa_api_.hsa_amd_memory_pool_allocate(agent_info->gpu_pool, size, 0, reinterpret_cast(&buffer)); + status = hsa_api_.hsa_amd_memory_pool_allocate(agent_info->gpu_pool, size, 0, + reinterpret_cast(&buffer)); uint8_t* ptr = (status == HSA_STATUS_SUCCESS) ? buffer : NULL; return ptr; } @@ -468,7 +493,8 @@ uint8_t* HsaRsrcFactory::AllocateKernArgMemory(const AgentInfo* agent_info, size uint8_t* buffer = NULL; if (!cpu_agents_.empty()) { size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK; - status = hsa_api_.hsa_amd_memory_pool_allocate(*kern_arg_pool_, size, 0, reinterpret_cast(&buffer)); + status = hsa_api_.hsa_amd_memory_pool_allocate(*kern_arg_pool_, size, 0, + reinterpret_cast(&buffer)); // Both the CPU and GPU can access the kernel arguments if (status == HSA_STATUS_SUCCESS) { hsa_agent_t ag_list[1] = {agent_info->dev_id}; @@ -488,7 +514,8 @@ uint8_t* HsaRsrcFactory::AllocateSysMemory(const AgentInfo* agent_info, size_t s uint8_t* buffer = NULL; size = (size + MEM_PAGE_MASK) & ~MEM_PAGE_MASK; if (!cpu_agents_.empty()) { - status = hsa_api_.hsa_amd_memory_pool_allocate(*cpu_pool_, size, 0, reinterpret_cast(&buffer)); + status = hsa_api_.hsa_amd_memory_pool_allocate(*cpu_pool_, size, 0, + reinterpret_cast(&buffer)); // Both the CPU and GPU can access the memory if (status == HSA_STATUS_SUCCESS) { hsa_agent_t ag_list[1] = {agent_info->dev_id}; @@ -513,16 +540,18 @@ uint8_t* HsaRsrcFactory::AllocateCmdMemory(const AgentInfo* agent_info, size_t s } // Wait signal -hsa_signal_value_t HsaRsrcFactory::SignalWait(const hsa_signal_t& signal, const hsa_signal_value_t& signal_value) const { +hsa_signal_value_t HsaRsrcFactory::SignalWait(const hsa_signal_t& signal, + const hsa_signal_value_t& signal_value) const { const hsa_signal_value_t exp_value = signal_value - 1; hsa_signal_value_t ret_value = signal_value; while (1) { - ret_value = - hsa_api_.hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, signal_value, timeout_, HSA_WAIT_STATE_BLOCKED); + ret_value = hsa_api_.hsa_signal_wait_scacquire(signal, HSA_SIGNAL_CONDITION_LT, signal_value, + timeout_, HSA_WAIT_STATE_BLOCKED); if (ret_value == exp_value) break; if (ret_value != signal_value) { std::cerr << "Error: HsaRsrcFactory::SignalWait: signal_value(" << signal_value - << "), ret_value(" << ret_value << ")" << std::endl << std::flush; + << "), ret_value(" << ret_value << ")" << std::endl + << std::flush; abort(); } } @@ -530,7 +559,8 @@ hsa_signal_value_t HsaRsrcFactory::SignalWait(const hsa_signal_t& signal, const } // Wait signal with signal value restore -void HsaRsrcFactory::SignalWaitRestore(const hsa_signal_t& signal, const hsa_signal_value_t& signal_value) const { +void HsaRsrcFactory::SignalWaitRestore(const hsa_signal_t& signal, + const hsa_signal_value_t& signal_value) const { SignalWait(signal, signal_value); hsa_api_.hsa_signal_store_relaxed(const_cast(signal), signal_value); } @@ -594,13 +624,13 @@ bool HsaRsrcFactory::LoadAndFinalize(const AgentInfo* agent_info, const char* br } // Create executable. - status = hsa_api_.hsa_executable_create_alt(HSA_PROFILE_FULL, HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, - NULL, executable); + status = hsa_api_.hsa_executable_create_alt( + HSA_PROFILE_FULL, HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, NULL, executable); CHECK_STATUS("Error in creating executable object", status); // Load code object. - status = hsa_api_.hsa_executable_load_agent_code_object(*executable, agent_info->dev_id, code_obj_rdr, - NULL, NULL); + status = hsa_api_.hsa_executable_load_agent_code_object(*executable, agent_info->dev_id, + code_obj_rdr, NULL, NULL); CHECK_STATUS("Error in loading executable object", status); // Freeze executable. @@ -610,7 +640,7 @@ bool HsaRsrcFactory::LoadAndFinalize(const AgentInfo* agent_info, const char* br // Get symbol handle. hsa_executable_symbol_t kernelSymbol; status = hsa_api_.hsa_executable_get_symbol(*executable, NULL, kernel_name, agent_info->dev_id, 0, - &kernelSymbol); + &kernelSymbol); CHECK_STATUS("Error in looking up kernel symbol", status); // Update output parameter @@ -654,7 +684,8 @@ uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, const void* packet) { } uint32_t slot_idx = (uint32_t)(write_idx % queue->size); - uint32_t* queue_slot = reinterpret_cast((uintptr_t)(queue->base_address) + (slot_idx * slot_size_b)); + uint32_t* queue_slot = + reinterpret_cast((uintptr_t)(queue->base_address) + (slot_idx * slot_size_b)); const uint32_t* slot_data = reinterpret_cast(packet); // Copy buffered commands into the queue slot. @@ -704,18 +735,22 @@ void HsaRsrcFactory::EnableExecutableTracking(HsaApiTable* table) { table->core_->hsa_executable_freeze_fn = hsa_executable_freeze_interceptor; } -hsa_status_t HsaRsrcFactory::executable_symbols_cb(hsa_executable_t exec, hsa_executable_symbol_t symbol, void *data) { +hsa_status_t HsaRsrcFactory::executable_symbols_cb(hsa_executable_t exec, + hsa_executable_symbol_t symbol, void* data) { hsa_symbol_kind_t value = (hsa_symbol_kind_t)0; - hsa_status_t status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_TYPE, &value); + hsa_status_t status = + hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_TYPE, &value); CHECK_STATUS("Error in getting symbol info", status); if (value == HSA_SYMBOL_KIND_KERNEL) { uint64_t addr = 0; uint32_t len = 0; - status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &addr); + status = hsa_api_.hsa_executable_symbol_get_info( + symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &addr); CHECK_STATUS("Error in getting kernel object", status); - status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, &len); + status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, + &len); CHECK_STATUS("Error in getting name len", status); - char *name = new char[len + 1]; + char* name = new char[len + 1]; status = hsa_api_.hsa_executable_symbol_get_info(symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME, name); CHECK_STATUS("Error in getting kernel name", status); name[len] = 0; @@ -728,12 +763,15 @@ hsa_status_t HsaRsrcFactory::executable_symbols_cb(hsa_executable_t exec, hsa_ex return HSA_STATUS_SUCCESS; } -hsa_status_t HsaRsrcFactory::hsa_executable_freeze_interceptor(hsa_executable_t executable, const char *options) { +hsa_status_t HsaRsrcFactory::hsa_executable_freeze_interceptor(hsa_executable_t executable, + const char* options) { std::lock_guard lck(mutex_); if (symbols_map_ == NULL) symbols_map_ = new symbols_map_t; - hsa_status_t status = hsa_api_.hsa_executable_iterate_symbols(executable, executable_symbols_cb, NULL); + hsa_status_t status = + hsa_api_.hsa_executable_iterate_symbols(executable, executable_symbols_cb, NULL); CHECK_STATUS("Error in iterating executable symbols", status); - return hsa_api_.hsa_executable_freeze(executable, options);; + return hsa_api_.hsa_executable_freeze(executable, options); + ; } void HsaRsrcFactory::DumpHandles(FILE* file) { @@ -741,10 +779,14 @@ void HsaRsrcFactory::DumpHandles(FILE* file) { auto end = agent_map_.end(); for (auto it = beg; it != end; ++it) { const AgentInfo* agent_info = it->second; - fprintf(file, "0x%lx agent %s\n", agent_info->dev_id.handle, (agent_info->dev_type == HSA_DEVICE_TYPE_CPU) ? "cpu" : "gpu"); - if (agent_info->cpu_pool.handle != 0) fprintf(file, "0x%lx pool cpu\n", agent_info->cpu_pool.handle); - if (agent_info->kern_arg_pool.handle != 0) fprintf(file, "0x%lx pool cpu kernarg\n", agent_info->kern_arg_pool.handle); - if (agent_info->gpu_pool.handle != 0) fprintf(file, "0x%lx pool gpu\n", agent_info->gpu_pool.handle); + fprintf(file, "0x%lx agent %s\n", agent_info->dev_id.handle, + (agent_info->dev_type == HSA_DEVICE_TYPE_CPU) ? "cpu" : "gpu"); + if (agent_info->cpu_pool.handle != 0) + fprintf(file, "0x%lx pool cpu\n", agent_info->cpu_pool.handle); + if (agent_info->kern_arg_pool.handle != 0) + fprintf(file, "0x%lx pool cpu kernarg\n", agent_info->kern_arg_pool.handle); + if (agent_info->gpu_pool.handle != 0) + fprintf(file, "0x%lx pool gpu\n", agent_info->gpu_pool.handle); } fflush(file); } diff --git a/test/hsa/src/hsa_rsrc_factory.h b/test/hsa/src/hsa_rsrc_factory.h index 09be5ca9dd..1142abd943 100644 --- a/test/hsa/src/hsa_rsrc_factory.h +++ b/test/hsa/src/hsa_rsrc_factory.h @@ -44,23 +44,25 @@ #define HSA_QUEUE_ALIGN_BYTES 64 #define HSA_PACKET_ALIGN_BYTES 64 -#define CHECK_STATUS(msg, status) do { \ - if ((status) != HSA_STATUS_SUCCESS) { \ - const char* emsg = 0; \ - hsa_status_string(status, &emsg); \ - printf("%s: %s\n", msg, emsg ? emsg : ""); \ - abort(); \ - } \ -} while (0) +#define CHECK_STATUS(msg, status) \ + do { \ + if ((status) != HSA_STATUS_SUCCESS) { \ + const char* emsg = 0; \ + hsa_status_string(status, &emsg); \ + printf("%s: %s\n", msg, emsg ? emsg : ""); \ + abort(); \ + } \ + } while (0) -#define CHECK_ITER_STATUS(msg, status) do { \ - if ((status) != HSA_STATUS_INFO_BREAK) { \ - const char* emsg = 0; \ - hsa_status_string(status, &emsg); \ - printf("%s: %s\n", msg, emsg ? emsg : ""); \ - abort(); \ - } \ -} while (0) +#define CHECK_ITER_STATUS(msg, status) \ + do { \ + if ((status) != HSA_STATUS_INFO_BREAK) { \ + const char* emsg = 0; \ + hsa_status_string(status, &emsg); \ + printf("%s: %s\n", msg, emsg ? emsg : ""); \ + abort(); \ + } \ + } while (0) static const size_t MEM_PAGE_BYTES = 0x1000; static const size_t MEM_PAGE_MASK = MEM_PAGE_BYTES - 1; @@ -172,15 +174,12 @@ class HsaTimer { static const timestamp_t TIMESTAMP_MAX = UINT64_MAX; typedef long double freq_t; - enum time_id_t { - TIME_ID_CLOCK_REALTIME = 0, - TIME_ID_CLOCK_MONOTONIC = 1, - TIME_ID_NUMBER - }; + enum time_id_t { TIME_ID_CLOCK_REALTIME = 0, TIME_ID_CLOCK_MONOTONIC = 1, TIME_ID_NUMBER }; HsaTimer(const hsa_pfn_t* hsa_api) : hsa_api_(hsa_api) { timestamp_t sysclock_hz = 0; - hsa_status_t status = hsa_api_->hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sysclock_hz); + hsa_status_t status = + hsa_api_->hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sysclock_hz); CHECK_STATUS("hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY)", status); sysclock_factor_ = (freq_t)1000000000 / (freq_t)sysclock_hz; } @@ -215,8 +214,8 @@ class HsaTimer { // Return pair of correlated values of profiling timestamp and time with // correlation error for a given time ID and number of iterations - void correlated_pair_ns(time_id_t time_id, uint32_t iters, - timestamp_t* timestamp_v, timestamp_t* time_v, timestamp_t* error_v) const { + void correlated_pair_ns(time_id_t time_id, uint32_t iters, timestamp_t* timestamp_v, + timestamp_t* time_v, timestamp_t* error_v) const { clockid_t clock_id = 0; switch (clock_id) { case TIME_ID_CLOCK_REALTIME: @@ -355,7 +354,8 @@ class HsaRsrcFactory { uint8_t* AllocateCmdMemory(const AgentInfo* agent_info, size_t size); // Wait signal - hsa_signal_value_t SignalWait(const hsa_signal_t& signal, const hsa_signal_value_t& signal_value) const; + hsa_signal_value_t SignalWait(const hsa_signal_t& signal, + const hsa_signal_value_t& signal_value) const; // Wait signal with signal value restore void SignalWaitRestore(const hsa_signal_t& signal, const hsa_signal_value_t& signal_value) const; @@ -401,7 +401,9 @@ class HsaRsrcFactory { const hsa_ven_amd_loader_1_00_pfn_t* LoaderApi() const { return &loader_api_; } // Methods for system-clock/ns conversion and timestamp in 'ns' - timestamp_t SysclockToNs(const timestamp_t& sysclock) const { return timer_->sysclock_to_ns(sysclock); } + timestamp_t SysclockToNs(const timestamp_t& sysclock) const { + return timer_->sysclock_to_ns(sysclock); + } timestamp_t NsToSysclock(const timestamp_t& time) const { return timer_->ns_to_sysclock(time); } timestamp_t TimestampNs() const { return timer_->timestamp_ns(); } @@ -480,8 +482,10 @@ class HsaRsrcFactory { typedef std::map symbols_map_t; static symbols_map_t* symbols_map_; static bool executable_tracking_on_; - static hsa_status_t hsa_executable_freeze_interceptor(hsa_executable_t executable, const char *options); - static hsa_status_t executable_symbols_cb(hsa_executable_t exec, hsa_executable_symbol_t symbol, void *data); + static hsa_status_t hsa_executable_freeze_interceptor(hsa_executable_t executable, + const char* options); + static hsa_status_t executable_symbols_cb(hsa_executable_t exec, hsa_executable_symbol_t symbol, + void* data); // HSA runtime API table static hsa_pfn_t hsa_api_; @@ -505,8 +509,8 @@ class HsaRsrcFactory { timestamp_t time_error_[HsaTimer::TIME_ID_NUMBER]; // CPU/kern-arg memory pools - hsa_amd_memory_pool_t *cpu_pool_; - hsa_amd_memory_pool_t *kern_arg_pool_; + hsa_amd_memory_pool_t* cpu_pool_; + hsa_amd_memory_pool_t* kern_arg_pool_; }; #endif // _HSA_RSRC_FACTORY_H_ diff --git a/test/hsa/test/ctrl/run_kernel.h b/test/hsa/test/ctrl/run_kernel.h index c1d3af183e..fd01e46d1f 100644 --- a/test/hsa/test/ctrl/run_kernel.h +++ b/test/hsa/test/ctrl/run_kernel.h @@ -24,7 +24,9 @@ #include "ctrl/test_hsa.h" #include "util/test_assert.h" -template bool RunKernel(int argc = 0, char* argv[] = NULL, const AgentInfo* agent_info = NULL, hsa_queue_t* queue = NULL, int count = 1) { +template +bool RunKernel(int argc = 0, char* argv[] = NULL, const AgentInfo* agent_info = NULL, + hsa_queue_t* queue = NULL, int count = 1) { bool ret_val = false; if (getenv("ROC_TEST_TRACE") == NULL) std::clog.rdbuf(NULL); diff --git a/test/hsa/test/ctrl/test_hsa.cpp b/test/hsa/test/ctrl/test_hsa.cpp index 0613ea83ac..24083c8ecf 100644 --- a/test/hsa/test/ctrl/test_hsa.cpp +++ b/test/hsa/test/ctrl/test_hsa.cpp @@ -63,7 +63,8 @@ bool TestHsa::Initialize(int /*arg_cnt*/, char** /*arg_list*/) { return false; } } - std::clog << "> Using agent[" << agent_info_->dev_index << "] : " << agent_info_->name << std::endl; + std::clog << "> Using agent[" << agent_info_->dev_index << "] : " << agent_info_->name + << std::endl; // Create an instance of Aql Queue if (hsa_queue_ == NULL) { @@ -116,8 +117,8 @@ bool TestHsa::Setup() { size_t size_info = 0; const hsa_status_t status = hsa_executable_symbol_get_info( kernel_code_desc_, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE, &size_info); - TEST_ASSERT(status == HSA_STATUS_SUCCESS); - size_info = kernarg_size; + TEST_ASSERT(status == HSA_STATUS_SUCCESS); + size_info = kernarg_size; const bool kernarg_missmatch = (kernarg_size > size_info); if (kernarg_missmatch) { std::cout << "kernarg_size = " << kernarg_size << ", size_info = " << size_info @@ -209,7 +210,8 @@ bool TestHsa::Run() { // Submit AQL packet to the queue const uint64_t que_idx = hsa_rsrc_->Submit(hsa_queue_, &aql); - std::clog << "> Waiting on kernel dispatch signal, que_idx=" << que_idx << std::endl << std::flush; + std::clog << "> Waiting on kernel dispatch signal, que_idx=" << que_idx << std::endl + << std::flush; // Wait on the dispatch signal until the kernel is finished. // Update wait condition to HSA_WAIT_STATE_ACTIVE for Polling diff --git a/test/hsa/test/dummy_kernel/dummy_kernel.h b/test/hsa/test/dummy_kernel/dummy_kernel.h index dd75aa70cf..15ed51d205 100644 --- a/test/hsa/test/dummy_kernel/dummy_kernel.h +++ b/test/hsa/test/dummy_kernel/dummy_kernel.h @@ -33,10 +33,7 @@ class DummyKernel : public TestKernel { enum { KERNARG_BUF_ID, LOCAL_BUF_ID }; // Constructor - DummyKernel() : - width_(64), - height_(64) - { + DummyKernel() : width_(64), height_(64) { SetInDescr(KERNARG_BUF_ID, KERNARG_DES_ID, 0); SetOutDescr(LOCAL_BUF_ID, LOCAL_DES_ID, 0); } @@ -57,7 +54,9 @@ class DummyKernel : public TestKernel { // Reference CPU implementation bool ReferenceImplementation(uint32_t* output, const uint32_t* input, const float* mask, const uint32_t width, const uint32_t height, - const uint32_t maskWidth, const uint32_t maskHeight) { return true; } + const uint32_t maskWidth, const uint32_t maskHeight) { + return true; + } // Width of the Input array const uint32_t width_; diff --git a/test/hsa/test/util/evt_stats.h b/test/hsa/test/util/evt_stats.h index c494f2b82f..4731cd1bc4 100644 --- a/test/hsa/test/util/evt_stats.h +++ b/test/hsa/test/util/evt_stats.h @@ -28,9 +28,8 @@ #include #include -template -class EvtStatsT { - public: +template class EvtStatsT { + public: typedef std::mutex mutex_t; typedef uint64_t evt_count_t; typedef double evt_avr_t; @@ -51,7 +50,7 @@ class EvtStatsT { inline void add_event(evt_id_t id, evt_weight_t weight) { std::lock_guard lck(mutex_); - //printf("EvtStats %p ::add_event %u %lu\n", this, id, weight); fflush(stdout); + // printf("EvtStats %p ::add_event %u %lu\n", this, id, weight); fflush(stdout); evt_record_t& rec = map_[id]; const evt_count_t prev_count = rec.count; @@ -65,7 +64,8 @@ class EvtStatsT { void dump() { std::lock_guard lck(mutex_); - fprintf(stdout, "Dumping %s\n", path_); fflush(stdout); + fprintf(stdout, "Dumping %s\n", path_); + fflush(stdout); typedef typename std::set, cmpfun> set_t; set_t s_(map_.begin(), map_.end()); @@ -75,7 +75,8 @@ class EvtStatsT { const evt_id_t id = e.first; const char* label = get_label(id); std::ostringstream oss; - oss << index << ",\"" << label << "\"," << e.second.count << "," << (uint64_t)(e.second.avr) << "," << (uint64_t)(e.second.count * e.second.avr); + oss << index << ",\"" << label << "\"," << e.second.count << "," << (uint64_t)(e.second.avr) + << "," << (uint64_t)(e.second.count * e.second.avr); fprintf(fdes_, "%s\n", oss.str().c_str()); index += 1; } @@ -88,24 +89,20 @@ class EvtStatsT { const char* label = ret.first->second; return label; } - const char* get_label(const char* id) { - return id; - } - const char* get_label(const std::string& id) { - return id.c_str(); - } + const char* get_label(const char* id) { return id; } + const char* get_label(const std::string& id) { return id.c_str(); } void set_label(evt_id_t id, const char* label) { - //printf("EvtStats %p ::set_label %u %s\n", this, id, label); fflush(stdout); + // printf("EvtStats %p ::set_label %u %s\n", this, id, label); fflush(stdout); labels_[id] = label; } EvtStatsT(FILE* f, const char* path) : fdes_(f), path_(path) { - //printf("EvtStats %p ::EvtStatsT()\n", this); fflush(stdout); + // printf("EvtStats %p ::EvtStatsT()\n", this); fflush(stdout); fprintf(fdes_, "Index,Name,Count,Avr,Total\n"); } - private: + private: mutex_t mutex_; map_t map_; labels_t labels_; @@ -115,4 +112,4 @@ class EvtStatsT { typedef EvtStatsT EvtStats; -#endif // EVT_STATS_H_ +#endif // EVT_STATS_H_ diff --git a/test/hsa/test/util/xml.h b/test/hsa/test/util/xml.h index 8b559b68cb..12aa5817b6 100644 --- a/test/hsa/test/util/xml.h +++ b/test/hsa/test/util/xml.h @@ -212,7 +212,8 @@ class Xml { buf[size - 1] = '\0'; if (strncmp(buf, "#include \"", 10) == 0) { - for (ind = 0; (ind < size) && (buf[ind] != '\n'); ++ind) {} + for (ind = 0; (ind < size) && (buf[ind] != '\n'); ++ind) { + } if (ind == size) { fprintf(stderr, "XML PreProcess failed, line size limit %zu\n", kBufSize); error = true; @@ -222,7 +223,8 @@ class Xml { size = ind; lseek(fd_, pos + ind + 1, SEEK_SET); - for (ind = 10; (ind < size) && (buf[ind] != '"'); ++ind) {} + for (ind = 10; (ind < size) && (buf[ind] != '"'); ++ind) { + } if (ind == size) { error = true; break; diff --git a/test/tool/tracer_tool.cpp b/test/tool/tracer_tool.cpp index 70c1865daf..65c93e706b 100644 --- a/test/tool/tracer_tool.cpp +++ b/test/tool/tracer_tool.cpp @@ -21,15 +21,15 @@ #include #include -#include /* names denangle */ +#include /* names denangle */ #include #include #include #include #include -#include /* SYS_xxx definitions */ +#include /* SYS_xxx definitions */ #include -#include /* usleep */ +#include /* usleep */ #include #include "src/util/exception.h" @@ -70,10 +70,12 @@ } \ } while (0) -#define ONLOAD_TRACE(str) \ - if (getenv("ROCP_ONLOAD_TRACE")) do { \ - std::cout << "PID(" << GetPid() << "): TRACER_TOOL::" << __FUNCTION__ << " " << str << std::endl << std::flush; \ - } while(0); +#define ONLOAD_TRACE(str) \ + if (getenv("ROCP_ONLOAD_TRACE")) do { \ + std::cout << "PID(" << GetPid() << "): TRACER_TOOL::" << __FUNCTION__ << " " << str \ + << std::endl \ + << std::flush; \ + } while (0); #define ONLOAD_TRACE_BEG() ONLOAD_TRACE("begin") #define ONLOAD_TRACE_END() ONLOAD_TRACE("end") @@ -88,7 +90,8 @@ inline static void DEBUG_TRACE(const char* fmt, ...) { va_list valist; va_start(valist, fmt); vsnprintf(buf, size, fmt, valist); - printf("%u:%u %s", GetPid(), GetTid(), buf); fflush(stdout); + printf("%u:%u %s", GetPid(), GetTid(), buf); + fflush(stdout); va_end(valist); } #else @@ -107,18 +110,18 @@ bool trace_hip_activity = false; bool trace_pcs = false; // The below getter functions have been written intentionally to fix an issue -// with constructor ordering. Previously when hip_api_vec and hsa_api_vec +// with constructor ordering. Previously when hip_api_vec and hsa_api_vec // were left as simple global variables, whenever the tool_load() function // was called from "extern "C" CONSTRUCTOR_API void constructor()" of libtracer_tool.so // the ordering of std::vector constructor becomes undefined. This meant that you could assign -// hip_api_vec and hsa_api_vec with a value in tool_load() and once the function returns, the std::vector -// default constructor would execute later, causing the values to be lost. +// hip_api_vec and hsa_api_vec with a value in tool_load() and once the function returns, the +// std::vector default constructor would execute later, causing the values to be lost. -static std::vector &hsa_api_vec() { +static std::vector& hsa_api_vec() { static std::vector hsa_api_vec; return hsa_api_vec; } -static std::vector &hip_api_vec() { +static std::vector& hip_api_vec() { static std::vector hip_api_vec; return hip_api_vec; } @@ -167,7 +170,8 @@ void fatal(const std::string msg) { static inline const char* cxx_demangle(const char* symbol) { size_t funcnamesize; int status; - const char* ret = (symbol != NULL) ? abi::__cxa_demangle(symbol, NULL, &funcnamesize, &status) : symbol; + const char* ret = + (symbol != NULL) ? abi::__cxa_demangle(symbol, NULL, &funcnamesize, &status) : symbol; return (ret != NULL) ? ret : strdup(symbol); } @@ -208,7 +212,8 @@ void* control_thr_fun(void*) { uint32_t control_flush_us = 0; pthread_t flush_thread; bool flush_thread_started = false; -std::mutex flush_thread_mutex;; +std::mutex flush_thread_mutex; +; void* flush_thr_fun(void*) { const uint32_t dist_sec = control_flush_us / 1000000; @@ -218,7 +223,8 @@ void* flush_thr_fun(void*) { sleep(dist_sec); usleep(dist_us); std::lock_guard lock(flush_thread_mutex); - if (!flush_thread_started) while(1) sleep(1); + if (!flush_thread_started) + while (1) sleep(1); ROCTRACER_CALL(roctracer_flush_activity()); roctracer::TraceBufferBase::FlushAll(); } @@ -241,17 +247,13 @@ struct roctx_trace_entry_t { }; void roctx_flush_cb(roctx_trace_entry_t* entry); -constexpr roctracer::TraceBuffer::flush_prm_t roctx_flush_prm = {roctracer::DFLT_ENTRY_TYPE, roctx_flush_cb}; +constexpr roctracer::TraceBuffer::flush_prm_t roctx_flush_prm = { + roctracer::DFLT_ENTRY_TYPE, roctx_flush_cb}; roctracer::TraceBuffer* roctx_trace_buffer = NULL; // rocTX callback function -static inline void roctx_callback_fun( - uint32_t domain, - uint32_t cid, - uint32_t tid, - roctx_range_id_t rid, - const char* message) -{ +static inline void roctx_callback_fun(uint32_t domain, uint32_t cid, uint32_t tid, + roctx_range_id_t rid, const char* message) { #if ROCTX_CLOCK_TIME const timestamp_t time = HsaTimer::clocktime_ns(HsaTimer::TIME_ID_CLOCK_MONOTONIC); #else @@ -267,12 +269,7 @@ static inline void roctx_callback_fun( entry->valid.store(roctracer::TRACE_ENTRY_COMPL, std::memory_order_release); } -void roctx_api_callback( - uint32_t domain, - uint32_t cid, - const void* callback_data, - void* arg) -{ +void roctx_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg) { (void)arg; const roctx_api_data_t* data = reinterpret_cast(callback_data); roctx_callback_fun(domain, cid, GetTid(), data->args.id, data->args.message); @@ -280,27 +277,37 @@ void roctx_api_callback( // rocTX Start/Stop callbacks void roctx_range_start_callback(const roctx_range_data_t* data, void* arg) { - roctx_callback_fun(ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_roctxRangePushA, data->tid, 0, data->message); + roctx_callback_fun(ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_roctxRangePushA, data->tid, 0, + data->message); } void roctx_range_stop_callback(const roctx_range_data_t* data, void* arg) { roctx_callback_fun(ACTIVITY_DOMAIN_ROCTX, ROCTX_API_ID_roctxRangePop, data->tid, 0, NULL); } -void start_callback() { roctracer::RocTxLoader::Instance().RangeStackIterate(roctx_range_start_callback, NULL); } -void stop_callback() { roctracer::RocTxLoader::Instance().RangeStackIterate(roctx_range_stop_callback, NULL); } +void start_callback() { + roctracer::RocTxLoader::Instance().RangeStackIterate(roctx_range_start_callback, NULL); +} +void stop_callback() { + roctracer::RocTxLoader::Instance().RangeStackIterate(roctx_range_stop_callback, NULL); +} // rocTX buffer flush function void roctx_flush_cb(roctx_trace_entry_t* entry) { #if ROCTX_CLOCK_TIME timestamp_t timestamp = 0; - HsaRsrcFactory::Instance().GetTimestamp(HsaTimer::TIME_ID_CLOCK_MONOTONIC, entry->time, ×tamp); + HsaRsrcFactory::Instance().GetTimestamp(HsaTimer::TIME_ID_CLOCK_MONOTONIC, entry->time, + ×tamp); #else const timestamp_t timestamp = entry->time; #endif std::ostringstream os; - os << timestamp << " " << entry->pid << ":" << entry->tid << " " << entry->cid << ":" << entry->rid; - if (entry->message != NULL) os << ":\"" << entry->message << "\""; - else os << ":\"\""; - fprintf(roctx_file_handle, "%s\n", os.str().c_str()); fflush(roctx_file_handle); + os << timestamp << " " << entry->pid << ":" << entry->tid << " " << entry->cid << ":" + << entry->rid; + if (entry->message != NULL) + os << ":\"" << entry->message << "\""; + else + os << ":\"\""; + fprintf(roctx_file_handle, "%s\n", os.str().c_str()); + fflush(roctx_file_handle); } /////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -318,24 +325,20 @@ struct hsa_api_trace_entry_t { }; void hsa_api_flush_cb(hsa_api_trace_entry_t* entry); -constexpr roctracer::TraceBuffer::flush_prm_t hsa_flush_prm = {roctracer::DFLT_ENTRY_TYPE, hsa_api_flush_cb}; +constexpr roctracer::TraceBuffer::flush_prm_t hsa_flush_prm = { + roctracer::DFLT_ENTRY_TYPE, hsa_api_flush_cb}; roctracer::TraceBuffer* hsa_api_trace_buffer = NULL; // HSA API callback function -void hsa_api_callback( - uint32_t domain, - uint32_t cid, - const void* callback_data, - void* arg) -{ +void hsa_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg) { (void)arg; const hsa_api_data_t* data = reinterpret_cast(callback_data); if (data->phase == ACTIVITY_API_PHASE_ENTER) { hsa_begin_timestamp = timer->timestamp_fn_ns(); } else { - - const timestamp_t end_timestamp = (cid == HSA_API_ID_hsa_shut_down) ? hsa_begin_timestamp : timer->timestamp_fn_ns(); + const timestamp_t end_timestamp = + (cid == HSA_API_ID_hsa_shut_down) ? hsa_begin_timestamp : timer->timestamp_fn_ns(); hsa_api_trace_entry_t* entry = hsa_api_trace_buffer->GetEntry(); entry->cid = cid; entry->begin = hsa_begin_timestamp; @@ -349,17 +352,17 @@ void hsa_api_callback( void hsa_api_flush_cb(hsa_api_trace_entry_t* entry) { std::ostringstream os; - os << entry->begin << ":" << entry->end << " " << entry->pid << ":" << entry->tid << " " << hsa_api_data_pair_t(entry->cid, entry->data); - fprintf(hsa_api_file_handle, "%s\n", os.str().c_str()); fflush(hsa_api_file_handle); + os << entry->begin << ":" << entry->end << " " << entry->pid << ":" << entry->tid << " " + << hsa_api_data_pair_t(entry->cid, entry->data); + fprintf(hsa_api_file_handle, "%s\n", os.str().c_str()); + fflush(hsa_api_file_handle); } -void hsa_activity_callback( - uint32_t op, - activity_record_t* record, - void* arg) -{ +void hsa_activity_callback(uint32_t op, activity_record_t* record, void* arg) { static uint64_t index = 0; - fprintf(hsa_async_copy_file_handle, "%lu:%lu async-copy:%lu:%u\n", record->begin_ns, record->end_ns, index, my_pid); fflush(hsa_async_copy_file_handle); + fprintf(hsa_async_copy_file_handle, "%lu:%lu async-copy:%lu:%u\n", record->begin_ns, + record->end_ns, index, my_pid); + fflush(hsa_async_copy_file_handle); index++; } @@ -381,28 +384,21 @@ struct hip_api_trace_entry_t { }; void hip_api_flush_cb(hip_api_trace_entry_t* entry); -constexpr roctracer::TraceBuffer::flush_prm_t hip_api_flush_prm = {roctracer::DFLT_ENTRY_TYPE, hip_api_flush_cb}; +constexpr roctracer::TraceBuffer::flush_prm_t hip_api_flush_prm = { + roctracer::DFLT_ENTRY_TYPE, hip_api_flush_cb}; roctracer::TraceBuffer* hip_api_trace_buffer = NULL; static inline bool is_hip_kernel_launch_api(const uint32_t& cid) { - bool ret = - (cid == HIP_API_ID_hipLaunchKernel) || - (cid == HIP_API_ID_hipExtLaunchKernel) || - (cid == HIP_API_ID_hipLaunchCooperativeKernel) || - (cid == HIP_API_ID_hipLaunchCooperativeKernelMultiDevice) || - (cid == HIP_API_ID_hipExtLaunchMultiKernelMultiDevice) || - (cid == HIP_API_ID_hipModuleLaunchKernel) || - (cid == HIP_API_ID_hipExtModuleLaunchKernel) || - (cid == HIP_API_ID_hipHccModuleLaunchKernel); + bool ret = (cid == HIP_API_ID_hipLaunchKernel) || (cid == HIP_API_ID_hipExtLaunchKernel) || + (cid == HIP_API_ID_hipLaunchCooperativeKernel) || + (cid == HIP_API_ID_hipLaunchCooperativeKernelMultiDevice) || + (cid == HIP_API_ID_hipExtLaunchMultiKernelMultiDevice) || + (cid == HIP_API_ID_hipModuleLaunchKernel) || (cid == HIP_API_ID_hipExtModuleLaunchKernel) || + (cid == HIP_API_ID_hipHccModuleLaunchKernel); return ret; } -void hip_api_callback( - uint32_t domain, - uint32_t cid, - const void* callback_data, - void* arg) -{ +void hip_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg) { (void)arg; const hip_api_data_t* data = reinterpret_cast(callback_data); const timestamp_t timestamp = timer->timestamp_fn_ns(); @@ -428,16 +424,17 @@ void hip_api_callback( if (cid == HIP_API_ID_hipMalloc) { entry->ptr = *(data->args.hipMalloc.ptr); } else if (is_hip_kernel_launch_api(cid)) { - switch(cid) { + switch (cid) { case HIP_API_ID_hipExtLaunchMultiKernelMultiDevice: - case HIP_API_ID_hipLaunchCooperativeKernelMultiDevice: - { - const hipLaunchParams* listKernels = data->args.hipLaunchCooperativeKernelMultiDevice.launchParamsList; + case HIP_API_ID_hipLaunchCooperativeKernelMultiDevice: { + const hipLaunchParams* listKernels = + data->args.hipLaunchCooperativeKernelMultiDevice.launchParamsList; std::string name_str = ""; for (int i = 0; i < data->args.hipLaunchCooperativeKernelMultiDevice.numDevices; ++i) { const hipLaunchParams& lp = listKernels[i]; if (lp.func != NULL) { - const char* kernel_name = roctracer::HipLoader::Instance().KernelNameRefByPtr(lp.func, lp.stream); + const char* kernel_name = + roctracer::HipLoader::Instance().KernelNameRefByPtr(lp.func, lp.stream); const int device_id = roctracer::HipLoader::Instance().GetStreamDeviceId(lp.stream); name_str += std::string(kernel_name) + ":" + std::to_string(device_id) + ";"; } @@ -446,22 +443,21 @@ void hip_api_callback( break; } case HIP_API_ID_hipLaunchKernel: - case HIP_API_ID_hipLaunchCooperativeKernel: - { + case HIP_API_ID_hipLaunchCooperativeKernel: { const void* f = data->args.hipLaunchKernel.function_address; hipStream_t stream = data->args.hipLaunchKernel.stream; - if (f != NULL) entry->name = strdup(roctracer::HipLoader::Instance().KernelNameRefByPtr(f, stream)); + if (f != NULL) + entry->name = strdup(roctracer::HipLoader::Instance().KernelNameRefByPtr(f, stream)); break; } - case HIP_API_ID_hipExtLaunchKernel: - { + case HIP_API_ID_hipExtLaunchKernel: { const void* f = data->args.hipExtLaunchKernel.function_address; hipStream_t stream = data->args.hipExtLaunchKernel.stream; - if (f != NULL) entry->name = strdup(roctracer::HipLoader::Instance().KernelNameRefByPtr(f, stream)); + if (f != NULL) + entry->name = strdup(roctracer::HipLoader::Instance().KernelNameRefByPtr(f, stream)); break; } - default: - { + default: { const hipFunction_t f = data->args.hipModuleLaunchKernel.f; if (f != NULL) entry->name = strdup(roctracer::HipLoader::Instance().KernelNameRef(f)); } @@ -471,17 +467,15 @@ void hip_api_callback( entry->valid.store(roctracer::TRACE_ENTRY_COMPL, std::memory_order_release); } - const char * name = roctracer_op_string(domain, cid, 0); - DEBUG_TRACE("hip_api_callback(\"%s\") phase(%d): cid(%u) data(%p) entry(%p) name(\"%s\") correlation_id(%lu) timestamp(%lu)\n", - name, data->phase, cid, data, entry, (entry) ? entry->name : NULL, data->correlation_id, timestamp); + const char* name = roctracer_op_string(domain, cid, 0); + DEBUG_TRACE( + "hip_api_callback(\"%s\") phase(%d): cid(%u) data(%p) entry(%p) name(\"%s\") " + "correlation_id(%lu) timestamp(%lu)\n", + name, data->phase, cid, data, entry, (entry) ? entry->name : NULL, data->correlation_id, + timestamp); } -void mark_api_callback( - uint32_t domain, - uint32_t cid, - const void* callback_data, - void* arg) -{ +void mark_api_callback(uint32_t domain, uint32_t cid, const void* callback_data, void* arg) { (void)arg; const char* name = reinterpret_cast(callback_data); @@ -513,22 +507,27 @@ void hip_api_flush_cb(hip_api_trace_entry_t* entry) { std::ostringstream rec_ss; std::ostringstream oss; - const char* str = (domain != ACTIVITY_DOMAIN_EXT_API) ? roctracer_op_string(domain, cid, 0) : strdup("MARK"); - rec_ss << std::dec << begin_timestamp << ":" << end_timestamp << " " << entry->pid << ":" << entry->tid; + const char* str = + (domain != ACTIVITY_DOMAIN_EXT_API) ? roctracer_op_string(domain, cid, 0) : strdup("MARK"); + rec_ss << std::dec << begin_timestamp << ":" << end_timestamp << " " << entry->pid << ":" + << entry->tid; oss << std::dec << rec_ss.str() << " " << str; - const char * name = roctracer_op_string(entry->domain, entry->cid, 0); - DEBUG_TRACE("hip_api_flush_cb(\"%s\"): domain(%u) cid(%u) entry(%p) name(\"%s\" correlation_id(%lu) beg(%lu) end(%lu))\n", - name, entry->domain, entry->cid, entry, entry->name, correlation_id, begin_timestamp, end_timestamp); + const char* name = roctracer_op_string(entry->domain, entry->cid, 0); + DEBUG_TRACE( + "hip_api_flush_cb(\"%s\"): domain(%u) cid(%u) entry(%p) name(\"%s\" correlation_id(%lu) " + "beg(%lu) end(%lu))\n", + name, entry->domain, entry->cid, entry, entry->name, correlation_id, begin_timestamp, + end_timestamp); if (domain == ACTIVITY_DOMAIN_HIP_API) { #if HIP_PROF_HIP_API_STRING if (hip_api_stats != NULL) { hip_api_stats->add_event(cid, end_timestamp - begin_timestamp); if (is_hip_kernel_launch_api(cid)) { - hip_kernel_mutex.lock(); + hip_kernel_mutex.lock(); (*hip_kernel_map)[correlation_id] = entry->name; - hip_kernel_mutex.unlock(); + hip_kernel_mutex.unlock(); } } else { const char* str = hipApiString((hip_api_id_t)cid, data); @@ -537,50 +536,36 @@ void hip_api_flush_cb(hip_api_trace_entry_t* entry) { const char* kernel_name = cxx_demangle(entry->name); rec_ss << " kernel=" << kernel_name; } - rec_ss<< " :" << correlation_id; + rec_ss << " :" << correlation_id; fprintf(hip_api_file_handle, "%s\n", rec_ss.str().c_str()); } -#else // !HIP_PROF_HIP_API_STRING +#else // !HIP_PROF_HIP_API_STRING switch (cid) { case HIP_API_ID_hipMemcpy: - fprintf(hip_api_file_handle, "%s(dst(%p) src(%p) size(0x%x) kind(%u))\n", - oss.str().c_str(), - data->args.hipMemcpy.dst, - data->args.hipMemcpy.src, - (uint32_t)(data->args.hipMemcpy.sizeBytes), - (uint32_t)(data->args.hipMemcpy.kind)); + fprintf(hip_api_file_handle, "%s(dst(%p) src(%p) size(0x%x) kind(%u))\n", oss.str().c_str(), + data->args.hipMemcpy.dst, data->args.hipMemcpy.src, + (uint32_t)(data->args.hipMemcpy.sizeBytes), (uint32_t)(data->args.hipMemcpy.kind)); break; case HIP_API_ID_hipMemcpyAsync: fprintf(hip_api_file_handle, "%s(dst(%p) src(%p) size(0x%x) kind(%u) stream(%p))\n", - oss.str().c_str(), - data->args.hipMemcpyAsync.dst, - data->args.hipMemcpyAsync.src, - (uint32_t)(data->args.hipMemcpyAsync.sizeBytes), - (uint32_t)(data->args.hipMemcpyAsync.kind), - data->args.hipMemcpyAsync.stream); + oss.str().c_str(), data->args.hipMemcpyAsync.dst, data->args.hipMemcpyAsync.src, + (uint32_t)(data->args.hipMemcpyAsync.sizeBytes), + (uint32_t)(data->args.hipMemcpyAsync.kind), data->args.hipMemcpyAsync.stream); break; case HIP_API_ID_hipMalloc: - fprintf(hip_api_file_handle, "%s(ptr(%p) size(0x%x))\n", - oss.str().c_str(), - entry->ptr, - (uint32_t)(data->args.hipMalloc.size)); + fprintf(hip_api_file_handle, "%s(ptr(%p) size(0x%x))\n", oss.str().c_str(), entry->ptr, + (uint32_t)(data->args.hipMalloc.size)); break; case HIP_API_ID_hipFree: - fprintf(hip_api_file_handle, "%s(ptr(%p))\n", - oss.str().c_str(), - data->args.hipFree.ptr); + fprintf(hip_api_file_handle, "%s(ptr(%p))\n", oss.str().c_str(), data->args.hipFree.ptr); break; case HIP_API_ID_hipModuleLaunchKernel: - fprintf(hip_api_file_handle, "%s(kernel(%s) stream(%p))\n", - oss.str().c_str(), - cxx_demangle(entry->name), - data->args.hipModuleLaunchKernel.stream); + fprintf(hip_api_file_handle, "%s(kernel(%s) stream(%p))\n", oss.str().c_str(), + cxx_demangle(entry->name), data->args.hipModuleLaunchKernel.stream); break; case HIP_API_ID_hipExtModuleLaunchKernel: - fprintf(hip_api_file_handle, "%s(kernel(%s) stream(%p))\n", - oss.str().c_str(), - cxx_demangle(entry->name), - data->args.hipExtModuleLaunchKernel.hStream); + fprintf(hip_api_file_handle, "%s(kernel(%s) stream(%p))\n", oss.str().c_str(), + cxx_demangle(entry->name), data->args.hipExtModuleLaunchKernel.hStream); break; default: fprintf(hip_api_file_handle, "%s()\n", oss.str().c_str()); @@ -605,23 +590,26 @@ struct hip_act_trace_entry_t { }; void hip_act_flush_cb(hip_act_trace_entry_t* entry); -constexpr roctracer::TraceBuffer::flush_prm_t hip_act_flush_prm = {roctracer::DFLT_ENTRY_TYPE, hip_act_flush_cb}; +constexpr roctracer::TraceBuffer::flush_prm_t hip_act_flush_prm = { + roctracer::DFLT_ENTRY_TYPE, hip_act_flush_cb}; roctracer::TraceBuffer* hip_act_trace_buffer = NULL; // HIP ACT trace buffer flush callback void hip_act_flush_cb(hip_act_trace_entry_t* entry) { const uint32_t domain = ACTIVITY_DOMAIN_HCC_OPS; const uint32_t op = 0; - const char * name = roctracer_op_string(domain, op, entry->kind); + const char* name = roctracer_op_string(domain, op, entry->kind); if (name == NULL) { - printf("hip_act_flush_cb name is NULL\n"); fflush(stdout); + printf("hip_act_flush_cb name is NULL\n"); + fflush(stdout); abort(); } if (strncmp("Kernel", name, 6) == 0) { hip_kernel_mutex.lock(); if (hip_kernel_stats == NULL) { - printf("hip_act_flush_cb hip_kernel_stats is NULL\n"); fflush(stdout); + printf("hip_act_flush_cb hip_kernel_stats is NULL\n"); + fflush(stdout); abort(); } name = (*hip_kernel_map)[entry->correlation_id]; @@ -640,11 +628,14 @@ void pool_activity_callback(const char* begin, const char* end, void* arg) { const roctracer_record_t* end_record = reinterpret_cast(end); while (record < end_record) { - const char * name = roctracer_op_string(record->domain, record->op, record->kind); - DEBUG_TRACE("pool_activity_callback(\"%s\"): domain(%u) op(%u) kind(%u) record(%p) correlation_id(%lu) beg(%lu) end(%lu)\n", - name, record->domain, record->op, record->kind, record, record->correlation_id, record->begin_ns, record->end_ns); + const char* name = roctracer_op_string(record->domain, record->op, record->kind); + DEBUG_TRACE( + "pool_activity_callback(\"%s\"): domain(%u) op(%u) kind(%u) record(%p) correlation_id(%lu) " + "beg(%lu) end(%lu)\n", + name, record->domain, record->op, record->kind, record, record->correlation_id, + record->begin_ns, record->end_ns); - switch(record->domain) { + switch (record->domain) { case ACTIVITY_DOMAIN_HCC_OPS: if (hip_memcpy_stats != NULL) { hip_act_trace_entry_t* entry = hip_act_trace_buffer->GetEntry(); @@ -653,17 +644,16 @@ void pool_activity_callback(const char* begin, const char* end, void* arg) { entry->correlation_id = record->correlation_id; entry->valid.store(roctracer::TRACE_ENTRY_COMPL, std::memory_order_release); } else { - fprintf(hcc_activity_file_handle, "%lu:%lu %d:%lu %s:%lu:%u\n", - record->begin_ns, record->end_ns, - record->device_id, record->queue_id, - name, record->correlation_id, my_pid); + fprintf(hcc_activity_file_handle, "%lu:%lu %d:%lu %s:%lu:%u\n", record->begin_ns, + record->end_ns, record->device_id, record->queue_id, name, record->correlation_id, + my_pid); fflush(hcc_activity_file_handle); } break; case ACTIVITY_DOMAIN_HSA_OPS: if (record->op == HSA_OP_ID_RESERVED1) { - fprintf(pc_sample_file_handle, "%u %lu 0x%lx %s\n", - record->pc_sample.se, record->pc_sample.cycle, record->pc_sample.pc, name); + fprintf(pc_sample_file_handle, "%u %lu 0x%lx %s\n", record->pc_sample.se, + record->pc_sample.cycle, record->pc_sample.pc, name); fflush(pc_sample_file_handle); } break; @@ -682,11 +672,14 @@ std::string normalize_token(const std::string& token, bool not_empty, const std: std::string error_str = "none"; if (first_pos != std::string::npos) { const size_t last_pos = token.find_last_not_of(space_chars_set); - if (last_pos == std::string::npos) error_str = "token string error: \"" + token + "\""; + if (last_pos == std::string::npos) + error_str = "token string error: \"" + token + "\""; else { const size_t end_pos = last_pos + 1; - if (end_pos <= first_pos) error_str = "token string error: \"" + token + "\""; - else norm_len = end_pos - first_pos; + if (end_pos <= first_pos) + error_str = "token string error: \"" + token + "\""; + else + norm_len = end_pos - first_pos; } } if (((first_pos != std::string::npos) && (norm_len == 0)) || @@ -696,7 +689,8 @@ std::string normalize_token(const std::string& token, bool not_empty, const std: return (norm_len != 0) ? token.substr(first_pos, norm_len) : std::string(""); } -int get_xml_array(const xml::Xml::level_t* node, const std::string& field, const std::string& delim, std::vector* vec, const char* label = NULL) { +int get_xml_array(const xml::Xml::level_t* node, const std::string& field, const std::string& delim, + std::vector* vec, const char* label = NULL) { int parse_iter = 0; const auto& opts = node->opts; auto it = opts.find(field); @@ -706,7 +700,7 @@ int get_xml_array(const xml::Xml::level_t* node, const std::string& field, const size_t pos1 = 0; const size_t string_len = array_string.length(); while (pos1 < string_len) { - // set pos2 such that it also handles case of multiple delimiter options. + // set pos2 such that it also handles case of multiple delimiter options. // For example- "hipLaunchKernel, hipExtModuleLaunchKernel, hipMemsetAsync" // in this example delimiters are ' ' and also ',' const size_t pos2 = array_string.find_first_of(delim, pos1); @@ -716,9 +710,9 @@ int get_xml_array(const xml::Xml::level_t* node, const std::string& field, const const std::string norm_str = normalize_token(token, found, "get_xml_array"); if (norm_str.length() != 0) vec->push_back(norm_str); if (!found) break; - // update pos2 such that it represents the first non-delimiter character - // in case multiple delimiters are specified in variable 'delim' - pos1 = array_string.find_first_not_of(delim, pos2); + // update pos2 such that it represents the first non-delimiter character + // in case multiple delimiters are specified in variable 'delim' + pos1 = array_string.find_first_not_of(delim, pos2); ++parse_iter; } } @@ -742,7 +736,8 @@ FILE* open_output_file(const char* prefix, const char* name, const char** path = } if (path != NULL) *path = strdup(oss.str().c_str()); - } else file_handle = stdout; + } else + file_handle = stdout; return file_handle; } @@ -785,7 +780,7 @@ void tool_unload() { flush_thread_started = false; flush_thread_mutex.unlock(); PTHREAD_CALL(pthread_cancel(flush_thread)); - void *res; + void* res; PTHREAD_CALL(pthread_join(flush_thread, &res)); if (res != PTHREAD_CANCELED) FATAL("flush thread wasn't stopped correctly"); } @@ -860,7 +855,8 @@ void tool_load() { } } - printf("ROCTracer (pid=%d): ", (int)GetPid()); fflush(stdout); + printf("ROCTracer (pid=%d): ", (int)GetPid()); + fflush(stdout); // XML input const char* xml_name = getenv("ROCP_INPUT"); @@ -879,8 +875,10 @@ void tool_load() { std::vector api_vec; for (const auto* node : entry->nodes) { - if (node->tag != "parameters") fatal("ROCTracer: trace node is not supported '" + name + ":" + node->tag + "'"); - get_xml_array(node, "api", ", ", &api_vec); // delimiter options given as both spaces and commas (' ' and ',') + if (node->tag != "parameters") + fatal("ROCTracer: trace node is not supported '" + name + ":" + node->tag + "'"); + get_xml_array(node, "api", ", ", + &api_vec); // delimiter options given as both spaces and commas (' ' and ',') break; } @@ -917,14 +915,13 @@ void tool_load() { roctx_file_handle = open_output_file(output_prefix, "roctx_trace.txt"); // initialize HSA tracing - roctracer_ext_properties_t properties { - start_callback, - stop_callback - }; + roctracer_ext_properties_t properties{start_callback, stop_callback}; roctracer_set_properties(ACTIVITY_DOMAIN_EXT_API, &properties); - fprintf(stdout, " rocTX-trace()\n"); fflush(stdout); - ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_ROCTX, roctx_api_callback, NULL)); + fprintf(stdout, " rocTX-trace()\n"); + fflush(stdout); + ROCTRACER_CALL( + roctracer_enable_domain_callback(ACTIVITY_DOMAIN_ROCTX, roctx_api_callback, NULL)); } const char* ctrl_str = getenv("ROCP_CTRL_RATE"); @@ -934,10 +931,13 @@ void tool_load() { uint32_t ctrl_rate = 0; if (sscanf(ctrl_str, "%d:%d:%d", &ctrl_delay, &ctrl_len, &ctrl_rate) != 3) { - EXC_RAISING(ROCTRACER_STATUS_ERROR, "Invalid ROCP_CTRL_RATE var(" << ctrl_str << "), expected ctrl_delay:ctrl_len:ctrl_rate"); + EXC_RAISING( + ROCTRACER_STATUS_ERROR, + "Invalid ROCP_CTRL_RATE var(" << ctrl_str << "), expected ctrl_delay:ctrl_len:ctrl_rate"); } if (ctrl_len > ctrl_rate) { - EXC_RAISING(ROCTRACER_STATUS_ERROR, "Control length value " << ctrl_len << " > rate value " << ctrl_rate); + EXC_RAISING(ROCTRACER_STATUS_ERROR, + "Control length value " << ctrl_len << " > rate value " << ctrl_rate); } control_dist_us = ctrl_rate - ctrl_len; control_len_us = ctrl_len; @@ -946,14 +946,21 @@ void tool_load() { roctracer_stop(); if (ctrl_delay != UINT32_MAX) { - fprintf(stdout, "ROCTracer: trace control: delay(%uus), length(%uus), rate(%uus)\n", ctrl_delay, ctrl_len, ctrl_rate); fflush(stdout); + fprintf(stdout, "ROCTracer: trace control: delay(%uus), length(%uus), rate(%uus)\n", + ctrl_delay, ctrl_len, ctrl_rate); + fflush(stdout); pthread_t thread; pthread_attr_t attr; int err = pthread_attr_init(&attr); - if (err) { errno = err; perror("pthread_attr_init"); abort(); } + if (err) { + errno = err; + perror("pthread_attr_init"); + abort(); + } err = pthread_create(&thread, &attr, control_thr_fun, NULL); } else { - fprintf(stdout, "ROCTracer: trace start disabled\n"); fflush(stdout); + fprintf(stdout, "ROCTracer: trace start disabled\n"); + fflush(stdout); } } @@ -965,10 +972,15 @@ void tool_load() { abort(); } - fprintf(stdout, "ROCTracer: trace control flush rate(%uus)\n", control_flush_us); fflush(stdout); + fprintf(stdout, "ROCTracer: trace control flush rate(%uus)\n", control_flush_us); + fflush(stdout); pthread_attr_t attr; int err = pthread_attr_init(&attr); - if (err) { errno = err; perror("pthread_attr_init"); abort(); } + if (err) { + errno = err; + perror("pthread_attr_init"); + abort(); + } std::lock_guard lock(flush_thread_mutex); PTHREAD_CALL(pthread_create(&flush_thread, &attr, flush_thr_fun, NULL)); flush_thread_started = true; @@ -978,7 +990,8 @@ void tool_load() { } // HSA-runtime tool on-load method -extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count, +extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, + uint64_t failed_tool_count, const char* const* failed_tool_names) { ONLOAD_TRACE_BEG(); @@ -998,17 +1011,20 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, // initialize HSA tracing roctracer_set_properties(ACTIVITY_DOMAIN_HSA_API, (void*)table); - fprintf(stdout, " HSA-trace("); fflush(stdout); + fprintf(stdout, " HSA-trace("); + fflush(stdout); if (hsa_api_vec().size() != 0) { for (unsigned i = 0; i < hsa_api_vec().size(); ++i) { uint32_t cid = HSA_API_ID_NUMBER; const char* api = hsa_api_vec()[i].c_str(); ROCTRACER_CALL(roctracer_op_code(ACTIVITY_DOMAIN_HSA_API, api, &cid, NULL)); - ROCTRACER_CALL(roctracer_enable_op_callback(ACTIVITY_DOMAIN_HSA_API, cid, hsa_api_callback, NULL)); + ROCTRACER_CALL( + roctracer_enable_op_callback(ACTIVITY_DOMAIN_HSA_API, cid, hsa_api_callback, NULL)); printf(" %s", api); } } else { - ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HSA_API, hsa_api_callback, NULL)); + ROCTRACER_CALL( + roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HSA_API, hsa_api_callback, NULL)); } printf(")\n"); } @@ -1018,21 +1034,20 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, hsa_async_copy_file_handle = open_output_file(output_prefix, "async_copy_trace.txt"); // initialize HSA tracing - roctracer::hsa_ops_properties_t ops_properties { - table, - reinterpret_cast(hsa_activity_callback), - NULL, - output_prefix - }; + roctracer::hsa_ops_properties_t ops_properties{ + table, reinterpret_cast(hsa_activity_callback), NULL, + output_prefix}; roctracer_set_properties(ACTIVITY_DOMAIN_HSA_OPS, &ops_properties); - fprintf(stdout, " HSA-activity-trace()\n"); fflush(stdout); + fprintf(stdout, " HSA-activity-trace()\n"); + fflush(stdout); ROCTRACER_CALL(roctracer_enable_op_activity(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_COPY)); } // Enable HIP API callbacks/activity if (trace_hip_api || trace_hip_activity) { - fprintf(stdout, " HIP-trace()\n"); fflush(stdout); + fprintf(stdout, " HIP-trace()\n"); + fflush(stdout); // roctracer properties roctracer_set_properties(ACTIVITY_DOMAIN_HIP_API, (void*)mark_api_callback); // Allocating tracing pool @@ -1052,11 +1067,13 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint32_t cid = HIP_API_ID_NONE; const char* api = hip_api_vec()[i].c_str(); ROCTRACER_CALL(roctracer_op_code(ACTIVITY_DOMAIN_HIP_API, api, &cid, NULL)); - ROCTRACER_CALL(roctracer_enable_op_callback(ACTIVITY_DOMAIN_HIP_API, cid, hip_api_callback, NULL)); + ROCTRACER_CALL( + roctracer_enable_op_callback(ACTIVITY_DOMAIN_HIP_API, cid, hip_api_callback, NULL)); printf(" %s", api); } } else { - ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HIP_API, hip_api_callback, NULL)); + ROCTRACER_CALL( + roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HIP_API, hip_api_callback, NULL)); } if (is_stats_opt) { @@ -1087,7 +1104,8 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, // Enable PC sampling if (trace_pcs) { - fprintf(stdout, " PCS-trace()\n"); fflush(stdout); + fprintf(stdout, " PCS-trace()\n"); + fflush(stdout); open_tracing_pool(); pc_sample_file_handle = open_output_file(output_prefix, "pcs_trace.txt"); ROCTRACER_CALL(roctracer_enable_op_activity(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_RESERVED1)); @@ -1103,17 +1121,19 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, } // HSA-runtime on-unload method -extern "C" PUBLIC_API void OnUnload() { - ONLOAD_TRACE(""); -} +extern "C" PUBLIC_API void OnUnload() { ONLOAD_TRACE(""); } extern "C" CONSTRUCTOR_API void constructor() { ONLOAD_TRACE_BEG(); roctracer::hip_support::HIP_depth_max = 0; - roctx_trace_buffer = new roctracer::TraceBuffer("rocTX API", 0x200000, &roctx_flush_prm, 1); - hip_api_trace_buffer = new roctracer::TraceBuffer("HIP API", 0x200000, &hip_api_flush_prm, 1); - hip_act_trace_buffer = new roctracer::TraceBuffer("HIP ACT", 0x200000, &hip_act_flush_prm, 1, 1); - hsa_api_trace_buffer = new roctracer::TraceBuffer("HSA API", 0x200000, &hsa_flush_prm, 1); + roctx_trace_buffer = + new roctracer::TraceBuffer("rocTX API", 0x200000, &roctx_flush_prm, 1); + hip_api_trace_buffer = + new roctracer::TraceBuffer("HIP API", 0x200000, &hip_api_flush_prm, 1); + hip_act_trace_buffer = new roctracer::TraceBuffer( + "HIP ACT", 0x200000, &hip_act_flush_prm, 1, 1); + hsa_api_trace_buffer = + new roctracer::TraceBuffer("HSA API", 0x200000, &hsa_flush_prm, 1); roctracer_load(); tool_load(); ONLOAD_TRACE_END();