From 7ba92e192bdf51db940fbc9aa873dfdd319caaec Mon Sep 17 00:00:00 2001 From: Evgeny Date: Mon, 9 Jul 2018 20:33:58 -0500 Subject: [PATCH] queue profiled API Change-Id: I08f496fa76903738e937d2f29dc558ed9def3e0a --- inc/rocprofiler.h | 15 ++++-- src/core/intercept_queue.cpp | 2 + src/core/intercept_queue.h | 97 ++++++++++++++++++++++++++---------- src/core/rocprofiler.cpp | 38 +++++++++++++- src/core/tracker.h | 6 ++- test/run.sh | 2 + test/tool/tool.cpp | 2 +- 7 files changed, 127 insertions(+), 35 deletions(-) diff --git a/inc/rocprofiler.h b/inc/rocprofiler.h index fe64fc4e4c..fb28364924 100644 --- a/inc/rocprofiler.h +++ b/inc/rocprofiler.h @@ -207,10 +207,10 @@ hsa_status_t rocprofiler_reset(rocprofiler_t* context, // [in] profiling contex // Dispatch record typedef struct { - uint64_t dispatch; // dispatch timestamp - uint64_t begin; // begin timestamp - uint64_t end; // end timestamp - uint64_t complete; // completion signal timestamp + uint64_t dispatch; // dispatch timestamp, ns + uint64_t begin; // kernel begin timestamp, ns + uint64_t end; // kernel end timestamp, ns + uint64_t complete; // completion signal timestamp, ns } rocprofiler_dispatch_record_t; // Profiling callback data @@ -371,6 +371,13 @@ hsa_status_t rocprofiler_query_info( hsa_status_t (*callback)(const rocprofiler_info_data_t info, void *data), // callback void *data); // [in/out] data passed to callback +// Creates a profiled queue. All dispatches on this queue will be profiled +hsa_status_t rocprofiler_queue_create_profiled( + hsa_agent_t agent_handle,uint32_t size, hsa_queue_type32_t type, + void (*callback)(hsa_status_t status, hsa_queue_t* source, void* data), + void* data, uint32_t private_segment_size, uint32_t group_segment_size, + hsa_queue_t** queue); + #ifdef __cplusplus } // extern "C" block #endif // __cplusplus diff --git a/src/core/intercept_queue.cpp b/src/core/intercept_queue.cpp index 4ac6b0e271..e67c34dcb7 100644 --- a/src/core/intercept_queue.cpp +++ b/src/core/intercept_queue.cpp @@ -39,4 +39,6 @@ const char* InterceptQueue::kernel_none_ = ""; uint64_t InterceptQueue::timeout_ = UINT64_MAX; Tracker* InterceptQueue::tracker_ = NULL; bool InterceptQueue::tracker_on_ = false; +bool InterceptQueue::in_constr_call_ = false; + } // namespace rocprofiler diff --git a/src/core/intercept_queue.h b/src/core/intercept_queue.h index 0d9f29e17c..733f0fc33a 100644 --- a/src/core/intercept_queue.h +++ b/src/core/intercept_queue.h @@ -53,22 +53,26 @@ class InterceptQueue { static void HsaIntercept(HsaApiTable* table); - static hsa_status_t QueueCreate(hsa_agent_t agent, uint32_t size, hsa_queue_type32_t type, + static hsa_status_t InterceptQueueCreate(hsa_agent_t agent, uint32_t size, hsa_queue_type32_t type, void (*callback)(hsa_status_t status, hsa_queue_t* source, void* data), void* data, uint32_t private_segment_size, - uint32_t group_segment_size, hsa_queue_t** queue) { - hsa_status_t status = HSA_STATUS_ERROR; + uint32_t group_segment_size, hsa_queue_t** queue, + const bool& tracker_on) { std::lock_guard lck(mutex_); + hsa_status_t status = HSA_STATUS_ERROR; + + if (in_constr_call_) EXC_ABORT(status, "recursive InterceptQueueCreate()"); + in_constr_call_ = true; ProxyQueue* proxy = ProxyQueue::Create(agent, size, type, callback, data, private_segment_size, group_segment_size, queue, &status); - if (status != HSA_STATUS_SUCCESS) abort(); + if (status != HSA_STATUS_SUCCESS) EXC_ABORT(status, "ProxyQueue::Create()"); - if (tracker_on_) { + if (tracker_on || tracker_on_) { if (tracker_ == NULL) tracker_ = new Tracker(timeout_); status = hsa_amd_profiling_set_profiler_enabled(*queue, true); - if (status != HSA_STATUS_SUCCESS) abort(); + if (status != HSA_STATUS_SUCCESS) EXC_ABORT(status, "hsa_amd_profiling_set_profiler_enabled()"); } if (!obj_map_) obj_map_ = new obj_map_t; @@ -76,25 +80,36 @@ class InterceptQueue { (*obj_map_)[(uint64_t)(*queue)] = obj; status = proxy->SetInterceptCB(OnSubmitCB, obj); + in_constr_call_ = false; return status; } + static hsa_status_t QueueCreate(hsa_agent_t agent, uint32_t size, hsa_queue_type32_t type, + void (*callback)(hsa_status_t status, hsa_queue_t* source, + void* data), + void* data, uint32_t private_segment_size, + uint32_t group_segment_size, hsa_queue_t** queue) { + return InterceptQueueCreate(agent, size, type, callback, data, private_segment_size, group_segment_size, queue, false); + } + + static hsa_status_t QueueCreateTracked(hsa_agent_t agent, uint32_t size, hsa_queue_type32_t type, + void (*callback)(hsa_status_t status, hsa_queue_t* source, + void* data), + void* data, uint32_t private_segment_size, + uint32_t group_segment_size, hsa_queue_t** queue) { + return InterceptQueueCreate(agent, size, type, callback, data, private_segment_size, group_segment_size, queue, true); + } + static hsa_status_t QueueDestroy(hsa_queue_t* queue) { std::lock_guard lck(mutex_); hsa_status_t status = HSA_STATUS_ERROR; - if (destroy_callback_ != NULL) { - status = destroy_callback_(queue, callback_data_); - if (status != HSA_STATUS_SUCCESS) return status; - } + if (destroy_callback_ != NULL) { + status = destroy_callback_(queue, callback_data_); + } - obj_map_t::iterator it = obj_map_->find((uint64_t)queue); - if (it != obj_map_->end()) { - const InterceptQueue* obj = it->second; - assert(queue == obj->queue_); - delete obj; - obj_map_->erase(it); - status = HSA_STATUS_SUCCESS; + if (status == HSA_STATUS_SUCCESS) { + status = DelObj(queue); } return status; @@ -179,17 +194,9 @@ class InterceptQueue { static bool IsTrackerOn() { return tracker_on_; } private: - InterceptQueue(const hsa_agent_t& agent, hsa_queue_t* const queue, ProxyQueue* proxy) : - queue_(queue), - proxy_(proxy) - { - agent_info_ = util::HsaRsrcFactory::Instance().GetAgentInfo(agent); - } - ~InterceptQueue() { ProxyQueue::Destroy(proxy_); } - - static packet_word_t GetHeaderType(const packet_t* packet) { + static hsa_packet_type_t GetHeaderType(const packet_t* packet) { const packet_word_t* header = reinterpret_cast(packet); - return (*header >> HSA_PACKET_HEADER_TYPE) & header_type_mask; + return static_cast((*header >> HSA_PACKET_HEADER_TYPE) & header_type_mask); } static const char* GetKernelName(const hsa_kernel_dispatch_packet_t* dispatch_packet) { @@ -219,6 +226,41 @@ class InterceptQueue { return funcname; } + // method to get an intercept queue object + static InterceptQueue* GetObj(const hsa_queue_t* queue) { + std::lock_guard lck(mutex_); + InterceptQueue* obj = NULL; + obj_map_t::const_iterator it = obj_map_->find((uint64_t)queue); + if (it != obj_map_->end()) { + obj = it->second; + assert(queue == obj->queue_); + } + return obj; + } + + // method to delete an intercept queue object + static hsa_status_t DelObj(const hsa_queue_t* queue) { + std::lock_guard lck(mutex_); + hsa_status_t status = HSA_STATUS_ERROR; + obj_map_t::const_iterator it = obj_map_->find((uint64_t)queue); + if (it != obj_map_->end()) { + const InterceptQueue* obj = it->second; + assert(queue == obj->queue_); + delete obj; + obj_map_->erase(it); + status = HSA_STATUS_SUCCESS;; + } + return status; + } + + InterceptQueue(const hsa_agent_t& agent, hsa_queue_t* const queue, ProxyQueue* proxy) : + queue_(queue), + proxy_(proxy) + { + agent_info_ = util::HsaRsrcFactory::Instance().GetAgentInfo(agent); + } + ~InterceptQueue() { ProxyQueue::Destroy(proxy_); } + static mutex_t mutex_; static const packet_word_t header_type_mask = (1ul << HSA_PACKET_HEADER_WIDTH_TYPE) - 1; static rocprofiler_callback_t dispatch_callback_; @@ -229,6 +271,7 @@ class InterceptQueue { static uint64_t timeout_; static Tracker* tracker_; static bool tracker_on_; + static bool in_constr_call_; hsa_queue_t* const queue_; ProxyQueue* const proxy_; diff --git a/src/core/rocprofiler.cpp b/src/core/rocprofiler.cpp index 30247ff8aa..5c26d75063 100644 --- a/src/core/rocprofiler.cpp +++ b/src/core/rocprofiler.cpp @@ -232,11 +232,35 @@ PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t fa rocprofiler::SaveHsaApi(table); rocprofiler::ProxyQueue::InitFactory(); bool intercept_mode = false; + + // Checking environment to enable intercept mode const char* intercept_env = getenv("ROCP_HSA_INTERCEPT"); if (intercept_env != NULL) { - if (strncmp(intercept_env, "1", 1) == 0) intercept_mode = true; + switch (atoi(intercept_env)) { + // Intercepting disabled + case 0: + intercept_mode = false; + rocprofiler::InterceptQueue::TrackerOn(false); + break; + // Intercepting enabled without timestamping + case 1: + intercept_mode = true; + rocprofiler::InterceptQueue::TrackerOn(false); + break; + // Intercepting enabled with timestamping + case 2: + intercept_mode = true; + rocprofiler::InterceptQueue::TrackerOn(true); + break; + default: + ERR_LOGGING("Bad ROCP_HSA_INTERCEPT env var value (" << intercept_env << ")"); + return false; + } } - if (rocprofiler::LoadTool()) intercept_mode = true; + + // Loading a tool lib and setting of intercept mode + intercept_mode = rocprofiler::LoadTool(); + // HSA intercepting if (intercept_mode) { rocprofiler::ProxyQueue::HsaIntercept(table); @@ -558,4 +582,14 @@ PUBLIC_API hsa_status_t rocprofiler_query_info( API_METHOD_SUFFIX } +// Creates a profiled queue. All dispatches on this queue will be profiled +PUBLIC_API hsa_status_t rocprofiler_queue_create_profiled( + hsa_agent_t agent, uint32_t size, hsa_queue_type32_t type, + void (*callback)(hsa_status_t status, hsa_queue_t* source, void* data), + void* data, uint32_t private_segment_size, uint32_t group_segment_size, + hsa_queue_t** queue) +{ + return rocprofiler::InterceptQueue::QueueCreateTracked(agent, size, type, callback, data, private_segment_size, group_segment_size, queue); +} + } // extern "C" diff --git a/src/core/tracker.h b/src/core/tracker.h index c35f0d1c03..ee22e3d592 100644 --- a/src/core/tracker.h +++ b/src/core/tracker.h @@ -99,9 +99,13 @@ class Tracker { record_t* record = new record_t{}; assert(record); entry->record = record; - status = hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP, &record->dispatch); + + timestamp_t dispatch_timestamp = 0; + status = hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP, &dispatch_timestamp); if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP)"); + record->dispatch = timestamp2ns(dispatch_timestamp); + status = hsa_amd_signal_async_handler(entry->signal, HSA_SIGNAL_CONDITION_LT, 1, Handler, entry); if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "hsa_amd_signal_async_handler"); diff --git a/test/run.sh b/test/run.sh index 7e63024ecd..86fa2772dd 100755 --- a/test/run.sh +++ b/test/run.sh @@ -38,6 +38,8 @@ export ROCP_TOOL_LIB=libtool.so unset ROCP_PROXY_QUEUE # ROC profiler metrics config file export ROCP_METRICS=metrics.xml +# ROC profiler kernels timing +export ROCP_TIMESTAMP_ON=1 # output directory for the tool library, for metrics results file 'results.txt' # and SQTT trace files 'thread_trace.se.out' export ROCP_OUTPUT_DIR=./RESULTS diff --git a/test/tool/tool.cpp b/test/tool/tool.cpp index 5cf8443a26..0c658d5f68 100644 --- a/test/tool/tool.cpp +++ b/test/tool/tool.cpp @@ -600,7 +600,7 @@ hsa_status_t dispatch_callback(const rocprofiler_callback_data_t* callback_data, feature_count = next_offset - set_offset; } - if (tool_data->feature_count > 0) { + if (feature_count > 0) { // Open profiling context status = rocprofiler_open(callback_data->agent, features, feature_count, &context, 0 /*ROCPROFILER_MODE_SINGLEGROUP*/, &properties);