diff --git a/inc/ext/prof_protocol.h b/inc/ext/prof_protocol.h index c578df0fd4..b6384b42ee 100644 --- a/inc/ext/prof_protocol.h +++ b/inc/ext/prof_protocol.h @@ -80,6 +80,11 @@ typedef struct activity_record_s { struct { activity_correlation_id_t external_id; // external correlatino id }; + struct { + uint32_t se; // sampled SE + uint64_t cycle; // sample cycle + uint64_t pc; // sample PC + } ps_sample; }; size_t bytes; // data size bytes } activity_record_t; diff --git a/inc/roctracer.h b/inc/roctracer.h index deffb0f6d3..8b0f2114f9 100644 --- a/inc/roctracer.h +++ b/inc/roctracer.h @@ -67,6 +67,7 @@ typedef enum { ROCTRACER_STATUS_BAD_PARAMETER = 5, ROCTRACER_STATUS_HIP_API_ERR = 6, ROCTRACER_STATUS_HCC_OPS_ERR = 7, + ROCTRACER_STATUS_HSA_ERR = 7, ROCTRACER_STATUS_ROCTX_ERR = 8, } roctracer_status_t; diff --git a/inc/roctracer_hsa.h b/inc/roctracer_hsa.h index 8531ab51dc..625fa0b760 100644 --- a/inc/roctracer_hsa.h +++ b/inc/roctracer_hsa.h @@ -28,6 +28,15 @@ THE SOFTWARE. #include "roctracer.h" +// HSA OP ID enumeration +enum hsa_op_id_t { + HSA_OP_ID_DISPATCH = 0, + HSA_OP_ID_COPY = 1, + HSA_OP_ID_BARRIER = 2, + HSA_OP_ID_PCSAMPLE = 3, + HSA_OP_ID_NUMBER = 4 +}; + #ifdef __cplusplus #include #include diff --git a/script/hsaap.py b/script/hsaap.py index 1e413a5295..f07c43d902 100755 --- a/script/hsaap.py +++ b/script/hsaap.py @@ -342,8 +342,8 @@ class API_DescrParser: self.content += ' ' + self.api_id[call] + ' = ' + str(n) + ',\n' else: self.content += '\n' - self.content += ' HSA_API_ID_NUMBER = ' + str(n) + ',\n' - self.content += ' HSA_API_ID_ANY = ' + str(n + 1) + ',\n' + self.content += ' HSA_API_ID_DISPATCH = ' + str(n) + ',\n' + self.content += ' HSA_API_ID_NUMBER = ' + str(n + 1) + ',\n' self.content += '};\n' # generate API args structure diff --git a/src/core/loader.h b/src/core/loader.h index 43b07be905..27a6fda55b 100644 --- a/src/core/loader.h +++ b/src/core/loader.h @@ -70,6 +70,33 @@ class BaseLoader : public T { void* handle_; }; +// 'rocprofiler' library loader class +class RocpApi { + public: + typedef BaseLoader Loader; + + typedef bool (RegisterCallback_t)(uint32_t op, void* callback, void* arg); + typedef bool (OperateCallback_t)(uint32_t op); + typedef bool (InitCallback_t)(void* callback, void* arg); + typedef bool (EnableCallback_t)(uint32_t op, bool enable); + typedef const char* (NameCallback_t)(uint32_t op); + + RegisterCallback_t* RegisterApiCallback; + OperateCallback_t* RemoveApiCallback; + InitCallback_t* InitActivityCallback; + EnableCallback_t* EnableActivityCallback; + NameCallback_t* GetOpName; + + protected: + void init(Loader* loader) { + RegisterApiCallback = loader->GetFun("RegisterApiCallback"); + RemoveApiCallback = loader->GetFun("RemoveApiCallback"); + InitActivityCallback = loader->GetFun("InitActivityCallback"); + EnableActivityCallback = loader->GetFun("EnableActivityCallback"); + GetOpName = loader->GetFun("GetOpName"); + } +}; + // HIP runtime library loader class class HipApi { public: @@ -164,6 +191,7 @@ class RocTxApi { } }; +typedef BaseLoader RocpLoader; typedef BaseLoader HipLoader; typedef BaseLoader HccLoader; typedef BaseLoader KfdLoader; @@ -176,6 +204,8 @@ typedef BaseLoader RocTxLoader; template std::atomic*> roctracer::BaseLoader::instance_{}; \ template bool roctracer::BaseLoader::to_load_ = false; \ template bool roctracer::BaseLoader::to_check_ = true; \ + template<> const char* roctracer::RocpLoader::lib_name_ = "librocprofiler64.so"; \ + template<> bool roctracer::RocpLoader::to_load_ = true; \ template<> const char* roctracer::HipLoader::lib_name_ = "libhip_hcc.so"; \ template<> bool roctracer::HipLoader::to_check_ = false; \ template<> const char* roctracer::HccLoader::lib_name_ = "libmcwamp.so"; \ diff --git a/src/core/roctracer.cpp b/src/core/roctracer.cpp index b975117ae6..9b31c64055 100644 --- a/src/core/roctracer.cpp +++ b/src/core/roctracer.cpp @@ -394,8 +394,6 @@ void HCC_ActivityIdCallback(activity_correlation_id_t correlation_id) { } void HCC_AsyncActivityCallback(uint32_t op_id, void* record, void* arg) { - static hsa_rt_utils::Timer timer; - MemoryPool* pool = reinterpret_cast(arg); roctracer_record_t* record_ptr = reinterpret_cast(record); record_ptr->domain = ACTIVITY_DOMAIN_HCC_OPS; @@ -526,6 +524,13 @@ hsa_status_t hsa_amd_memory_async_copy_rect_interceptor( return status; } +void HSA_AsyncActivityCallback(uint32_t op_id, void* record, void* arg) { + MemoryPool* pool = reinterpret_cast(arg); + roctracer_record_t* record_ptr = reinterpret_cast(record); + record_ptr->domain = ACTIVITY_DOMAIN_HSA_OPS; + pool->Write(*record_ptr); +} + // Logger routines and primitives util::Logger::mutex_t util::Logger::mutex_; std::atomic util::Logger::instance_{}; @@ -573,22 +578,16 @@ PUBLIC_API const char* roctracer_op_string( { API_METHOD_PREFIX switch (domain) { - case ACTIVITY_DOMAIN_HSA_API: { + case ACTIVITY_DOMAIN_HSA_API: return roctracer::hsa_support::GetApiName(op); - break; - } - case ACTIVITY_DOMAIN_HCC_OPS: { + case ACTIVITY_DOMAIN_HSA_OPS: + return roctracer::RocpLoader::Instance().GetOpName(op); + case ACTIVITY_DOMAIN_HCC_OPS: return roctracer::HccLoader::Instance().GetOpName(kind); - break; - } - case ACTIVITY_DOMAIN_HIP_API: { + case ACTIVITY_DOMAIN_HIP_API: return roctracer::HipLoader::Instance().ApiName(op); - break; - } - case ACTIVITY_DOMAIN_KFD_API: { + case ACTIVITY_DOMAIN_KFD_API: return roctracer::kfd_support::GetApiName(op); - break; - } default: EXC_RAISING(ROCTRACER_STATUS_BAD_DOMAIN, "invalid domain ID(" << domain << ")"); } @@ -622,7 +621,7 @@ PUBLIC_API roctracer_status_t roctracer_op_code( static inline uint32_t get_op_num(const uint32_t& domain) { switch (domain) { - case ACTIVITY_DOMAIN_HSA_OPS: return 1; + case ACTIVITY_DOMAIN_HSA_OPS: return HSA_OP_ID_NUMBER; case ACTIVITY_DOMAIN_HSA_API: return HSA_API_ID_NUMBER; case ACTIVITY_DOMAIN_HCC_OPS: return HIP_OP_ID_NUMBER; case ACTIVITY_DOMAIN_HIP_API: return HIP_API_ID_NUMBER; @@ -645,11 +644,16 @@ static roctracer_status_t roctracer_enable_callback_fun( switch (domain) { case ACTIVITY_DOMAIN_KFD_API: { const bool succ = roctracer::KfdLoader::Instance().RegisterApiCallback(op, (void*)callback, user_data); - if (succ == false) EXC_RAISING(ROCTRACER_STATUS_ERROR, "KFD RegisterApiCallback error"); + if (succ == false) EXC_RAISING(ROCTRACER_STATUS_ERROR, "KFD RegisterApiCallback error(" << op << ") failed"); break; } case ACTIVITY_DOMAIN_HSA_OPS: break; case ACTIVITY_DOMAIN_HSA_API: { + if (op == HSA_API_ID_DISPATCH) { + const bool succ = roctracer::RocpLoader::Instance().RegisterApiCallback(op, (void*)callback, user_data); + if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HSA_ERR, "HSA::EnableActivityCallback error(" << op << ") failed"); + break; + } roctracer::hsa_support::cb_table.set(op, callback, user_data); break; } @@ -658,13 +662,13 @@ static roctracer_status_t roctracer_enable_callback_fun( if (roctracer::HipLoader::Instance().Enabled() == false) break; hipError_t hip_err = roctracer::HipLoader::Instance().RegisterApiCallback(op, (void*)callback, user_data); - if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "hipRegisterApiCallback(" << op << ") error(" << hip_err << ")"); + if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "HIP::RegisterApiCallback(" << op << ") error(" << hip_err << ")"); break; } case ACTIVITY_DOMAIN_ROCTX: { if (roctracer::RocTxLoader::Instance().Enabled()) { const bool suc = roctracer::RocTxLoader::Instance().RegisterApiCallback(op, (void*)callback, user_data); - if (suc == false) EXC_RAISING(ROCTRACER_STATUS_ROCTX_ERR, "roctxRegisterApiCallback(" << op << ") failed"); + if (suc == false) EXC_RAISING(ROCTRACER_STATUS_ROCTX_ERR, "ROCTX::RegisterApiCallback(" << op << ") failed"); } break; } @@ -730,19 +734,26 @@ static roctracer_status_t roctracer_disable_callback_fun( break; } case ACTIVITY_DOMAIN_HSA_OPS: break; - case ACTIVITY_DOMAIN_HSA_API: break; + case ACTIVITY_DOMAIN_HSA_API: { + if (op == HSA_API_ID_DISPATCH) { + const bool succ = roctracer::RocpLoader::Instance().RemoveApiCallback(op); + if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HSA_ERR, "HSA::RemoveActivityCallback error(" << op << ") failed"); + break; + } + break; + } case ACTIVITY_DOMAIN_HCC_OPS: break; case ACTIVITY_DOMAIN_HIP_API: { if (roctracer::HipLoader::Instance().Enabled() == false) break; hipError_t hip_err = roctracer::HipLoader::Instance().RemoveApiCallback(op); - if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "hipRemoveApiCallback error(" << hip_err << ")"); + if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "HIP::RemoveApiCallback(" << op << "), error(" << hip_err << ")"); break; } case ACTIVITY_DOMAIN_ROCTX: { if (roctracer::RocTxLoader::Instance().Enabled()) { const bool suc = roctracer::RocTxLoader::Instance().RemoveApiCallback(op); - if (suc == false) EXC_RAISING(ROCTRACER_STATUS_ROCTX_ERR, "roctxRemoveApiCallback(" << op << ") failed"); + if (suc == false) EXC_RAISING(ROCTRACER_STATUS_ROCTX_ERR, "ROCTX::RemoveApiCallback(" << op << ") failed"); } break; } @@ -833,8 +844,17 @@ static roctracer_status_t roctracer_enable_activity_fun( if (pool == NULL) pool = roctracer_default_pool(); switch (domain) { case ACTIVITY_DOMAIN_HSA_OPS: { - roctracer::hsa_support::async_copy_callback_enabled = true; - rocprofiler::InterceptQueue::Enable(true); + if (op == HSA_OP_ID_DISPATCH) { + const bool init_phase = (roctracer::RocpLoader::GetRef() == NULL); + if (init_phase == true) { + roctracer::RocpLoader::Instance().InitActivityCallback((void*)roctracer::HSA_AsyncActivityCallback, + (void*)pool); + } + const bool succ = roctracer::RocpLoader::Instance().EnableActivityCallback(op, true); + if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HSA_ERR, "HSA::EnableActivityCallback error"); + } else if (op == HSA_OP_ID_COPY) { + roctracer::hsa_support::async_copy_callback_enabled = true; + } break; } case ACTIVITY_DOMAIN_HSA_API: break; @@ -921,8 +941,12 @@ static roctracer_status_t roctracer_disable_activity_fun( { switch (domain) { case ACTIVITY_DOMAIN_HSA_OPS: { - roctracer::hsa_support::async_copy_callback_enabled = false; - rocprofiler::InterceptQueue::Enable(false); + if (op == HSA_OP_ID_DISPATCH) { + const bool succ = roctracer::RocpLoader::Instance().EnableActivityCallback(op, false); + if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HSA_ERR, "HSA::EnableActivityCallback(false) error, op(" << op << ")"); + } else if (op == HSA_OP_ID_COPY) { + roctracer::hsa_support::async_copy_callback_enabled = true; + } break; } case ACTIVITY_DOMAIN_HSA_API: break; @@ -931,14 +955,14 @@ static roctracer_status_t roctracer_disable_activity_fun( if (roctracer::HccLoader::Instance().Enabled() == false) break; const bool succ = roctracer::HccLoader::Instance().EnableActivityCallback(op, false); - if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HCC_OPS_ERR, "HCC::EnableActivityCallback(NULL) error domain(" << domain << ") op(" << op << ")"); + if (succ == false) HCC_EXC_RAISING(ROCTRACER_STATUS_HCC_OPS_ERR, "HCC::EnableActivityCallback(NULL) error, op(" << op << ")"); break; } case ACTIVITY_DOMAIN_HIP_API: { if (roctracer::HipLoader::Instance().Enabled() == false) break; const hipError_t hip_err = roctracer::HipLoader::Instance().RemoveActivityCallback(op); - if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "hipRemoveActivityCallback error(" << hip_err << ")"); + if (hip_err != hipSuccess) HIP_EXC_RAISING(ROCTRACER_STATUS_HIP_API_ERR, "HIP::RemoveActivityCallback op(" << op << "), error(" << hip_err << ")"); break; } case ACTIVITY_DOMAIN_ROCTX: break; diff --git a/test/MatrixTranspose_test/Makefile b/test/MatrixTranspose_test/Makefile index d25f64340b..571725fd1d 100644 --- a/test/MatrixTranspose_test/Makefile +++ b/test/MatrixTranspose_test/Makefile @@ -41,7 +41,7 @@ $(EXECUTABLE): $(OBJECTS) $(HIPCC) $(OBJECTS) -o $@ $(ROC_LIBS) test: $(EXECUTABLE) - LD_PRELOAD=$(LIB_PATH)/libkfdwrapper64.so $(EXECUTABLE) + LD_PRELOAD="$(LIB_PATH)/libkfdwrapper64.so librocprofiler64.so" $(EXECUTABLE) clean: rm -f $(EXECUTABLE) diff --git a/test/MatrixTranspose_test/MatrixTranspose.cpp b/test/MatrixTranspose_test/MatrixTranspose.cpp index cc261f4312..3877a4dead 100644 --- a/test/MatrixTranspose_test/MatrixTranspose.cpp +++ b/test/MatrixTranspose_test/MatrixTranspose.cpp @@ -310,6 +310,12 @@ void activity_callback(const char* begin, const char* end, void* arg) { record->queue_id ); if (record->op == HIP_OP_ID_COPY) fprintf(stdout, " bytes(0x%zx)", record->bytes); + } else if (record->domain == ACTIVITY_DOMAIN_HSA_OPS) { + fprintf(stdout, " se(%u) cycle(%lu) pc(%lx)", + record->ps_sample.se, + record->ps_sample.cycle, + record->ps_sample.pc + ); } else if (record->domain == ACTIVITY_DOMAIN_EXT_API) { fprintf(stdout, " external_id(%lu)", record->external_id @@ -340,6 +346,8 @@ void init_tracing() { // Enable HIP activity tracing ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_API)); ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HCC_OPS)); + // Enable PC sampling + ROCTRACER_CALL(roctracer_enable_op_activity(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_PCSAMPLE)); // Enable KFD API tracing ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_KFD_API, api_callback, NULL)); ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_KFD_API));