diff --git a/inc/rocprofiler.h b/inc/rocprofiler.h index 8b34940601..bf176928f8 100644 --- a/inc/rocprofiler.h +++ b/inc/rocprofiler.h @@ -207,13 +207,15 @@ hsa_status_t rocprofiler_reset(rocprofiler_t* context, // [in] profiling contex uint32_t group_index); // group index //////////////////////////////////////////////////////////////////////////////// -// Runtime API observer +// Queue callbacks // -// Runtime API observer is called on enter and exit for the API +// Queue callbacks for initiating profiling per kernel dispatch and to wait +// the profiling data on the queue destroy. // Profiling callback data typedef struct { hsa_agent_t agent; + const hsa_queue_t* queue; uint64_t queue_index; uint64_t kernel_object; const char* kernel_name; @@ -226,12 +228,19 @@ typedef hsa_status_t (*rocprofiler_callback_t)( void* user_data, // [in/out] user data passed to the callback rocprofiler_group_t* group); // [out] profiling group -// Set/remove kernel dispatch observer -hsa_status_t rocprofiler_set_dispatch_callback( - rocprofiler_callback_t callback, // observer callback - void* data); // [in/out] passed callback data +// Queue callbacks +typedef struct { + rocprofiler_callback_t dispatch; // dispatch callback + hsa_status_t (*destroy)(hsa_queue_t* queue, void* data); // destroy callback +} rocprofiler_queue_callbacks_t; -hsa_status_t rocprofiler_remove_dispatch_callback(); +// Set queue callbacks +hsa_status_t rocprofiler_set_queue_callbacks( + rocprofiler_queue_callbacks_t callbacks, // callbacks + void* data); // [in/out] passed callbacks data + +// Remove queue callbacks +hsa_status_t rocprofiler_remove_queue_callbacks(); //////////////////////////////////////////////////////////////////////////////// // Start/stop profiling diff --git a/src/core/context.h b/src/core/context.h index acda717c00..4f9b573d37 100644 --- a/src/core/context.h +++ b/src/core/context.h @@ -329,7 +329,7 @@ class Context { const profile_vector_t profile_vector = GetProfiles(group_index); for (auto& tuple : profile_vector) { // Wait for stop packet to complete - const uint64_t timeout = UINT64_MAX; + const uint64_t timeout = timeout_; bool complete = false; while (!complete) { const hsa_signal_value_t signal_value = hsa_signal_wait_scacquire(tuple.completion_signal, HSA_SIGNAL_CONDITION_LT, 1, timeout, @@ -372,6 +372,10 @@ class Context { } } + static void SetTimeout(uint64_t timeout) { + timeout_ = timeout; + } + private: // Getting profling packets profile_vector_t GetProfiles(const uint32_t& index) { @@ -469,6 +473,9 @@ class Context { return info; } + // Profiling data waiting timeout + static uint64_t timeout_; + // GPU handel const hsa_agent_t agent_; const util::AgentInfo* agent_info_; diff --git a/src/core/intercept_queue.cpp b/src/core/intercept_queue.cpp index 6712e60dab..aee19cf78d 100644 --- a/src/core/intercept_queue.cpp +++ b/src/core/intercept_queue.cpp @@ -7,8 +7,9 @@ void InterceptQueue::HsaIntercept(HsaApiTable* table) { } InterceptQueue::mutex_t InterceptQueue::mutex_; -rocprofiler_callback_t InterceptQueue::on_dispatch_cb_ = NULL; -void* InterceptQueue::on_dispatch_cb_data_ = NULL; +rocprofiler_callback_t InterceptQueue::dispatch_callback_ = NULL; +InterceptQueue::queue_callback_t InterceptQueue::destroy_callback_ = NULL; +void* InterceptQueue::callback_data_ = NULL; InterceptQueue::obj_map_t* InterceptQueue::obj_map_ = NULL; const char* InterceptQueue::kernel_none_ = ""; } // namespace rocprofiler diff --git a/src/core/intercept_queue.h b/src/core/intercept_queue.h index 2cc976b55e..3d666d54aa 100644 --- a/src/core/intercept_queue.h +++ b/src/core/intercept_queue.h @@ -23,6 +23,7 @@ class InterceptQueue { public: typedef std::recursive_mutex mutex_t; typedef std::map obj_map_t; + typedef hsa_status_t (*queue_callback_t)(hsa_queue_t*, void* data); static void HsaIntercept(HsaApiTable* table); @@ -39,7 +40,7 @@ class InterceptQueue { ProxyQueue* proxy = ProxyQueue::Create(agent, size, type, callback, data, private_segment_size, group_segment_size, queue, &status); if (status == HSA_STATUS_SUCCESS) { - InterceptQueue* obj = new InterceptQueue(agent, proxy); + InterceptQueue* obj = new InterceptQueue(agent, *queue, proxy); (*obj_map_)[(uint64_t)(*queue)] = obj; status = proxy->SetInterceptCB(OnSubmitCB, obj); } @@ -53,9 +54,15 @@ class InterceptQueue { std::lock_guard lck(mutex_); hsa_status_t status = HSA_STATUS_ERROR; + if (destroy_callback_ != NULL) { + status = destroy_callback_(queue, callback_data_); + if (status != HSA_STATUS_SUCCESS) return status; + } + obj_map_t::iterator it = obj_map_->find((uint64_t)queue); if (it != obj_map_->end()) { const InterceptQueue* obj = it->second; + assert(queue == obj->queue_); delete obj; obj_map_->erase(it); status = HSA_STATUS_SUCCESS; @@ -74,14 +81,17 @@ class InterceptQueue { bool to_submit = true; const packet_t* packet = &packets_arr[j]; - if ((GetHeaderType(packet) == HSA_PACKET_TYPE_KERNEL_DISPATCH) && (on_dispatch_cb_ != NULL)) { + if ((GetHeaderType(packet) == HSA_PACKET_TYPE_KERNEL_DISPATCH) && (dispatch_callback_ != NULL)) { rocprofiler_group_t group = {}; const hsa_kernel_dispatch_packet_t* dispatch_packet = reinterpret_cast(packet); const char* kernel_name = GetKernelName(dispatch_packet); - rocprofiler_callback_data_t data = {obj->agent_info_->dev_id, user_que_idx, - dispatch_packet->kernel_object, kernel_name}; - hsa_status_t status = on_dispatch_cb_(&data, on_dispatch_cb_data_, &group); + rocprofiler_callback_data_t data = {obj->agent_info_->dev_id, + obj->queue_, + user_que_idx, + dispatch_packet->kernel_object, + kernel_name}; + hsa_status_t status = dispatch_callback_(&data, callback_data_, &group); free(const_cast(kernel_name)); if ((status == HSA_STATUS_SUCCESS) && (group.context != NULL)) { Context* context = reinterpret_cast(group.context); @@ -112,20 +122,18 @@ class InterceptQueue { } } - static void SetDispatchCB(rocprofiler_callback_t on_dispatch_cb, void* data) { + static void SetCallbacks(rocprofiler_callback_t dispatch_callback, queue_callback_t destroy_callback, void* data) { std::lock_guard lck(mutex_); - on_dispatch_cb_ = on_dispatch_cb; - on_dispatch_cb_data_ = data; - } - - static void UnsetDispatchCB() { - std::lock_guard lck(mutex_); - on_dispatch_cb_ = NULL; - on_dispatch_cb_data_ = NULL; + callback_data_ = data; + dispatch_callback_ = dispatch_callback; + destroy_callback_ = destroy_callback; } private: - InterceptQueue(const hsa_agent_t& agent, ProxyQueue* proxy) : proxy_(proxy) { + InterceptQueue(const hsa_agent_t& agent, hsa_queue_t* const queue, ProxyQueue* proxy) : + queue_(queue), + proxy_(proxy) + { agent_info_ = util::HsaRsrcFactory::Instance().GetAgentInfo(agent); } ~InterceptQueue() { ProxyQueue::Destroy(proxy_); } @@ -164,11 +172,13 @@ class InterceptQueue { static mutex_t mutex_; static const packet_word_t header_type_mask = (1ul << HSA_PACKET_HEADER_WIDTH_TYPE) - 1; - static rocprofiler_callback_t on_dispatch_cb_; - static void* on_dispatch_cb_data_; + static rocprofiler_callback_t dispatch_callback_; + static queue_callback_t destroy_callback_; + static void* callback_data_; static obj_map_t* obj_map_; static const char* kernel_none_; + hsa_queue_t* const queue_; ProxyQueue* const proxy_; const util::AgentInfo* agent_info_; }; diff --git a/src/core/rocprofiler.cpp b/src/core/rocprofiler.cpp index d6fa2e4555..aab13df140 100644 --- a/src/core/rocprofiler.cpp +++ b/src/core/rocprofiler.cpp @@ -141,6 +141,12 @@ void UnloadTool() { CONSTRUCTOR_API void constructor() { util::Logger::Create(); + + const char* timeout_str = getenv("ROCP_DATA_TIMEOUT"); + if (timeout_str != NULL) { + const uint64_t timeout_val = strtoull(timeout_str, NULL, 0); + Context::SetTimeout(timeout_val); + } } DESTRUCTOR_API void destructor() { @@ -168,6 +174,7 @@ const MetricsDict* GetMetrics(const hsa_agent_t& agent) { util::Logger::mutex_t util::Logger::mutex_; util::Logger* util::Logger::instance_ = NULL; +uint64_t Context::timeout_ = 1000; } /////////////////////////////////////////////////////////////////////////////////////////////////// @@ -341,18 +348,17 @@ PUBLIC_API hsa_status_t rocprofiler_get_metrics(const rocprofiler_t* handle) { API_METHOD_SUFFIX } -// Set kernel dispatch observer -PUBLIC_API hsa_status_t rocprofiler_set_dispatch_callback(rocprofiler_callback_t callback, - void* data) { +// Set/remove queue callbacks +PUBLIC_API hsa_status_t rocprofiler_set_queue_callbacks(rocprofiler_queue_callbacks_t callbacks, void* data) { API_METHOD_PREFIX - rocprofiler::InterceptQueue::SetDispatchCB(callback, data); + rocprofiler::InterceptQueue::SetCallbacks(callbacks.dispatch, callbacks.destroy, data); API_METHOD_SUFFIX } -// Set kernel dispatch observer -PUBLIC_API hsa_status_t rocprofiler_remove_dispatch_callback() { +// Remove queue callbacks +PUBLIC_API hsa_status_t rocprofiler_remove_queue_callbacks() { API_METHOD_PREFIX - rocprofiler::InterceptQueue::UnsetDispatchCB(); + rocprofiler::InterceptQueue::SetCallbacks(NULL, NULL, NULL); API_METHOD_SUFFIX } diff --git a/test/ctrl/tool.cpp b/test/ctrl/tool.cpp index 2ea0f119ff..b019e4883e 100644 --- a/test/ctrl/tool.cpp +++ b/test/ctrl/tool.cpp @@ -27,7 +27,7 @@ #define KERNEL_NAME_LEN_MAX 128 // Disoatch callback data type -struct dispatch_data_t { +struct callbacks_data_t { rocprofiler_feature_t* features; unsigned feature_count; unsigned group_index; @@ -48,7 +48,7 @@ struct context_entry_t { // Dispatch callbacks and context handlers synchronization pthread_mutex_t mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP; // Dispatch callback data -dispatch_data_t* dispatch_data = NULL; +callbacks_data_t* callbacks_data = NULL; // Stored contexts array typedef std::map context_array_t; context_array_t* context_array = NULL; @@ -227,8 +227,7 @@ void dump_context(context_entry_t* entry) { const unsigned feature_count = entry->feature_count; fprintf(file_handle, - "dispatch[%u], queue_index(%lu), kernel_object(0x%lx), kernel_name(\"%s\"):\n", index, - entry->data.queue_index, entry->data.kernel_object, entry->data.kernel_name); + "dispatch[%u], queue_index(%lu), kernel_name(\"%s\"):\n", index, entry->data.queue_index, entry->data.kernel_name); rocprofiler_group_t group = entry->group; status = rocprofiler_group_get_data(&group); @@ -289,7 +288,7 @@ hsa_status_t dispatch_callback(const rocprofiler_callback_data_t* callback_data, // HSA status hsa_status_t status = HSA_STATUS_ERROR; // Passed tool data - dispatch_data_t* tool_data = reinterpret_cast(user_data); + callbacks_data_t* tool_data = reinterpret_cast(user_data); // Profiling context rocprofiler_t* context = NULL; // Context entry @@ -326,6 +325,11 @@ hsa_status_t dispatch_callback(const rocprofiler_callback_data_t* callback_data, return status; } +hsa_status_t destroy_callback(hsa_queue_t* queue, void*) { + dump_context_array(); + return HSA_STATUS_SUCCESS; +} + static hsa_status_t info_callback(const rocprofiler_info_data_t info, void * arg) { const char symb = *reinterpret_cast(arg); if (((symb == 'b') && (info.metric.expr == NULL)) || @@ -473,14 +477,20 @@ extern "C" PUBLIC_API void OnLoadTool() } fflush(stdout); + // Adding dispatch observer if (feature_count) { - dispatch_data = new dispatch_data_t{}; - dispatch_data->features = features; - dispatch_data->feature_count = feature_count; - dispatch_data->group_index = 0; - dispatch_data->file_handle = result_file_handle; - rocprofiler_set_dispatch_callback(dispatch_callback, dispatch_data); + rocprofiler_queue_callbacks_t callbacks_ptrs{0}; + callbacks_ptrs.dispatch = dispatch_callback; + callbacks_ptrs.destroy = destroy_callback; + + callbacks_data = new callbacks_data_t{}; + callbacks_data->features = features; + callbacks_data->feature_count = feature_count; + callbacks_data->group_index = 0; + callbacks_data->file_handle = result_file_handle; + + rocprofiler_set_queue_callbacks(callbacks_ptrs, callbacks_data); } xml::Xml::Destroy(xml); @@ -489,7 +499,7 @@ extern "C" PUBLIC_API void OnLoadTool() // Tool destructor extern "C" PUBLIC_API void OnUnloadTool() { // Unregister dispatch callback - rocprofiler_remove_dispatch_callback(); + rocprofiler_remove_queue_callbacks(); // Dump stored profiling output data const bool result_file_opened = (result_prefix != NULL) && (result_file_handle != NULL); @@ -500,8 +510,8 @@ extern "C" PUBLIC_API void OnUnloadTool() { if (result_file_opened) fclose(result_file_handle); // Cleanup - if (dispatch_data != NULL) { - delete[] dispatch_data->features; - delete dispatch_data; + if (callbacks_data != NULL) { + delete[] callbacks_data->features; + delete callbacks_data; } }