From 777eb116d468cd247d3ed73f75d71bd5a54077b2 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Mon, 5 Mar 2018 20:59:01 -0600 Subject: [PATCH] adding timestamps dispatch/begin/end/complete Change-Id: I8c23e5f7ff75174243224ff008ba266553ee30c8 [ROCm/rocprofiler commit: 363700581a17c910d5058d606a435fcc79f4eb9b] --- projects/rocprofiler/inc/rocprofiler.h | 21 +++- projects/rocprofiler/src/core/context.h | 8 +- .../rocprofiler/src/core/intercept_queue.cpp | 2 + .../rocprofiler/src/core/intercept_queue.h | 29 +++-- projects/rocprofiler/src/core/rocprofiler.cpp | 1 + projects/rocprofiler/src/core/tracker.h | 113 ++++++++++++++++++ projects/rocprofiler/src/util/logger.h | 10 +- projects/rocprofiler/test/ctrl/tool.cpp | 102 +++++++++------- 8 files changed, 222 insertions(+), 64 deletions(-) create mode 100644 projects/rocprofiler/src/core/tracker.h diff --git a/projects/rocprofiler/inc/rocprofiler.h b/projects/rocprofiler/inc/rocprofiler.h index 7b4ffb3f87..0a2be56714 100644 --- a/projects/rocprofiler/inc/rocprofiler.h +++ b/projects/rocprofiler/inc/rocprofiler.h @@ -212,14 +212,23 @@ hsa_status_t rocprofiler_reset(rocprofiler_t* context, // [in] profiling contex // Queue callbacks for initiating profiling per kernel dispatch and to wait // the profiling data on the queue destroy. +// Dispatch record +typedef struct { + uint64_t dispatch; // dispatch timestamp + uint64_t begin; // begin timestamp + uint64_t end; // end timestamp + uint64_t complete; // completion signal timestamp +} rocprofiler_dispatch_record_t; + // Profiling callback data typedef struct { - hsa_agent_t agent; - uint32_t agent_index; - const hsa_queue_t* queue; - uint64_t queue_index; - uint64_t kernel_object; - const char* kernel_name; + hsa_agent_t agent; // GPU agent handle + uint32_t agent_index; // GPU index + const hsa_queue_t* queue; // HSA queue + uint64_t queue_index; // Index in the queue + uint64_t kernel_object; // Kernel object handle + const char* kernel_name; // Kernel name + const rocprofiler_dispatch_record_t* record; // Dispatch record } rocprofiler_callback_data_t; // Profiling callback type diff --git a/projects/rocprofiler/src/core/context.h b/projects/rocprofiler/src/core/context.h index 5fb9545151..901cb811c1 100644 --- a/projects/rocprofiler/src/core/context.h +++ b/projects/rocprofiler/src/core/context.h @@ -14,6 +14,7 @@ #include "core/types.h" #include "util/exception.h" #include "util/hsa_rsrc_factory.h" +#include "util/logger.h" namespace rocprofiler { struct rocprofiler_contex_t; @@ -155,7 +156,8 @@ class Context { hsa_rsrc_(&util::HsaRsrcFactory::Instance()), api_(hsa_rsrc_->AqlProfileApi()), handler_(handler), - handler_arg_(handler_arg) { + handler_arg_(handler_arg) + { metrics_ = MetricsDict::Create(agent_info); if (metrics_ == NULL) EXC_RAISING(HSA_STATUS_ERROR, "MetricsDict create failed"); Initialize(info, info_count); @@ -334,8 +336,8 @@ class Context { while (!complete) { const hsa_signal_value_t signal_value = hsa_signal_wait_scacquire(tuple.completion_signal, HSA_SIGNAL_CONDITION_LT, 1, timeout, HSA_WAIT_STATE_BLOCKED); - complete = (signal_value == 0); - if (!complete) printf("ROCProfiler: Signal timeout, signal(%d) timeout(0x%lx)\n", (int)signal_value, timeout); + complete = (signal_value < 1); + if (!complete) WARN_LOGGING("timeout"); } for (rocprofiler_feature_t* rinfo : *(tuple.info_vector)) rinfo->data.kind = ROCPROFILER_DATA_KIND_UNINIT; callback_data_t callback_data{tuple.info_vector, tuple.info_vector->size(), NULL}; diff --git a/projects/rocprofiler/src/core/intercept_queue.cpp b/projects/rocprofiler/src/core/intercept_queue.cpp index aee19cf78d..6af4b84b0f 100644 --- a/projects/rocprofiler/src/core/intercept_queue.cpp +++ b/projects/rocprofiler/src/core/intercept_queue.cpp @@ -12,4 +12,6 @@ InterceptQueue::queue_callback_t InterceptQueue::destroy_callback_ = NULL; void* InterceptQueue::callback_data_ = NULL; InterceptQueue::obj_map_t* InterceptQueue::obj_map_ = NULL; const char* InterceptQueue::kernel_none_ = ""; +uint64_t InterceptQueue::timeout_ = UINT64_MAX; +Tracker* InterceptQueue::tracker_ = NULL; } // namespace rocprofiler diff --git a/projects/rocprofiler/src/core/intercept_queue.h b/projects/rocprofiler/src/core/intercept_queue.h index b49ed49294..8e6543c5ea 100644 --- a/projects/rocprofiler/src/core/intercept_queue.h +++ b/projects/rocprofiler/src/core/intercept_queue.h @@ -12,7 +12,9 @@ #include "core/context.h" #include "core/proxy_queue.h" +#include "core/tracker.h" #include "core/types.h" +#include "inc/rocprofiler.h" #include "util/hsa_rsrc_factory.h" namespace rocprofiler { @@ -35,18 +37,19 @@ class InterceptQueue { hsa_status_t status = HSA_STATUS_ERROR; std::lock_guard lck(mutex_); - if (!obj_map_) obj_map_ = new obj_map_t; - ProxyQueue* proxy = ProxyQueue::Create(agent, size, type, callback, data, private_segment_size, group_segment_size, queue, &status); - if (status == HSA_STATUS_SUCCESS) { - InterceptQueue* obj = new InterceptQueue(agent, *queue, proxy); - (*obj_map_)[(uint64_t)(*queue)] = obj; - status = proxy->SetInterceptCB(OnSubmitCB, obj); - } - if (status != HSA_STATUS_SUCCESS) abort(); + if (!tracker_) tracker_ = new Tracker(timeout_); + status = hsa_amd_profiling_set_profiler_enabled(*queue, true); + if (status != HSA_STATUS_SUCCESS) abort(); + + if (!obj_map_) obj_map_ = new obj_map_t; + InterceptQueue* obj = new InterceptQueue(agent, *queue, proxy); + (*obj_map_)[(uint64_t)(*queue)] = obj; + status = proxy->SetInterceptCB(OnSubmitCB, obj); + return status; } @@ -86,12 +89,15 @@ class InterceptQueue { const hsa_kernel_dispatch_packet_t* dispatch_packet = reinterpret_cast(packet); const char* kernel_name = GetKernelName(dispatch_packet); + const auto* entry = tracker_->Add(obj->agent_info_->dev_id, dispatch_packet->completion_signal); + const_cast(dispatch_packet)->completion_signal = entry->signal; rocprofiler_callback_data_t data = {obj->agent_info_->dev_id, obj->agent_info_->dev_index, obj->queue_, user_que_idx, dispatch_packet->kernel_object, - kernel_name}; + kernel_name, + entry->record}; hsa_status_t status = dispatch_callback_(&data, callback_data_, &group); free(const_cast(kernel_name)); if ((status == HSA_STATUS_SUCCESS) && (group.context != NULL)) { @@ -130,6 +136,8 @@ class InterceptQueue { destroy_callback_ = destroy_callback; } + static void SetTimeout(uint64_t timeout) { timeout_ = timeout; } + private: InterceptQueue(const hsa_agent_t& agent, hsa_queue_t* const queue, ProxyQueue* proxy) : queue_(queue), @@ -178,6 +186,9 @@ class InterceptQueue { static void* callback_data_; static obj_map_t* obj_map_; static const char* kernel_none_; + static uint64_t timeout_; + static Tracker* tracker_; + static const bool tracker_on_ = true; hsa_queue_t* const queue_; ProxyQueue* const proxy_; diff --git a/projects/rocprofiler/src/core/rocprofiler.cpp b/projects/rocprofiler/src/core/rocprofiler.cpp index 2656d59945..ca3cb5d980 100644 --- a/projects/rocprofiler/src/core/rocprofiler.cpp +++ b/projects/rocprofiler/src/core/rocprofiler.cpp @@ -146,6 +146,7 @@ CONSTRUCTOR_API void constructor() { if (timeout_str != NULL) { const uint64_t timeout_val = strtoull(timeout_str, NULL, 0); Context::SetTimeout(timeout_val); + InterceptQueue::SetTimeout(timeout_val); } } diff --git a/projects/rocprofiler/src/core/tracker.h b/projects/rocprofiler/src/core/tracker.h new file mode 100644 index 0000000000..2dcf36c50c --- /dev/null +++ b/projects/rocprofiler/src/core/tracker.h @@ -0,0 +1,113 @@ +#ifndef SRC_CORE_TRACKER_H_ +#define SRC_CORE_TRACKER_H_ + +#include +#include +#include + +#include + +#include "inc/rocprofiler.h" +#include "util/exception.h" +#include "util/logger.h" + +namespace rocprofiler { + +class Tracker { + public: + typedef rocprofiler_dispatch_record_t record_t; + struct entry_t; + typedef std::list sig_list_t; + struct entry_t { + Tracker* tracker; + sig_list_t::iterator it; + hsa_agent_t agent; + hsa_signal_t orig; + hsa_signal_t signal; + record_t* record; + }; + + Tracker(uint64_t timeout = UINT64_MAX) : timeout_(timeout) {} + ~Tracker() { + for (entry_t* entry : sig_list_) { + assert(entry != NULL); + while (1) { + const hsa_signal_value_t signal_value = hsa_signal_wait_scacquire( + entry->signal, + HSA_SIGNAL_CONDITION_LT, + 1, + timeout_, + HSA_WAIT_STATE_BLOCKED); + if (signal_value < 1) break; + else WARN_LOGGING("tracker timeout"); + } + Del(entry); + } + } + + // Add tracker entry + entry_t* Add(const hsa_agent_t& agent, const hsa_signal_t& orig) { + entry_t* entry = new entry_t{}; + assert(entry); + entry->tracker = this; + entry->it = sig_list_.insert(sig_list_.begin(), entry); + + entry->agent = agent; + entry->orig = orig; + hsa_status_t status = hsa_signal_create(1, 0, NULL, &(entry->signal)); + if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "hsa_signal_create"); + + record_t* record = new record_t{}; + assert(record); + entry->record = record; + status = hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP, &record->dispatch); + if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP)"); + + hsa_amd_signal_async_handler(entry->signal, HSA_SIGNAL_CONDITION_LT, 1, Handler, entry); + if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "hsa_amd_signal_async_handler"); + + return entry; + } + + private: + // Delete tracker entry + void Del(entry_t* entry) { + hsa_signal_destroy(entry->signal); + sig_list_.erase(entry->it); + delete entry; + } + + // Handler for packet completion + static bool Handler(hsa_signal_value_t value, void* arg) { + entry_t* entry = reinterpret_cast(arg); + record_t* record = entry->record; + + hsa_status_t status = hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP, &record->complete); + if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP)"); + + hsa_amd_profiling_dispatch_time_t dispatch_time{}; + status = hsa_amd_profiling_get_dispatch_time(entry->agent, entry->signal, &dispatch_time); + if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "hsa_amd_profiling_get_dispatch_time"); + + record->begin = dispatch_time.start; + record->end = dispatch_time.end; + + hsa_signal_t orig = entry->orig; + if (orig.handle) { + const hsa_signal_value_t value = hsa_signal_load_relaxed(orig); + hsa_signal_store_relaxed(orig, value - 1); + } + entry->tracker->Del(entry); + + return false; + } + + // Timeout for wait on destruction + uint64_t timeout_; + // Tracked signals list + sig_list_t sig_list_; +}; + +} // namespace rocprofiler + +#endif // SRC_CORE_TRACKER_H_ diff --git a/projects/rocprofiler/src/util/logger.h b/projects/rocprofiler/src/util/logger.h index 8bc70f6af2..2c06d20626 100644 --- a/projects/rocprofiler/src/util/logger.h +++ b/projects/rocprofiler/src/util/logger.h @@ -92,7 +92,8 @@ class Logger { if (messaging) { message_[GetTid()] = ""; } else if (streaming_) { - //Put("\n"); + Put("\n"); + dirty_ = false; } messaging_ = messaging; streaming_ = messaging; @@ -148,6 +149,13 @@ class Logger { << rocprofiler::util::Logger::endl; \ } +#define WARN_LOGGING(stream) \ + { \ + std::cerr << "ROCProfiler: " << stream << std::endl; \ + rocprofiler::util::Logger::Instance() << "warning: " << rocprofiler::util::Logger::begm << stream \ + << rocprofiler::util::Logger::endl; \ + } + #ifdef DEBUG #define DBG_LOGGING(stream) \ { \ diff --git a/projects/rocprofiler/test/ctrl/tool.cpp b/projects/rocprofiler/test/ctrl/tool.cpp index c68cd1a4af..79fd6fffca 100644 --- a/projects/rocprofiler/test/ctrl/tool.cpp +++ b/projects/rocprofiler/test/ctrl/tool.cpp @@ -254,24 +254,36 @@ void dump_context(context_entry_t* entry) { const rocprofiler_feature_t* features = entry->features; const unsigned feature_count = entry->feature_count; - fprintf(file_handle, - "dispatch[%u], queue_index(%lu), kernel_name(\"%s\"):\n", index, entry->data.queue_index, entry->data.kernel_name); + fprintf(file_handle, "dispatch[%u], queue_index(%lu), kernel_name(\"%s\"), time(%lu,%lu,%lu,%lu)\n", + index, + entry->data.queue_index, + entry->data.kernel_name, + entry->data.record->dispatch, + entry->data.record->begin, + entry->data.record->end, + entry->data.record->complete); + fflush(file_handle); - rocprofiler_group_t group = entry->group; - status = rocprofiler_group_get_data(&group); - check_status(status); - // output_group(file, group, "Group[0] data"); - - status = rocprofiler_get_metrics(group.context); - check_status(status); - std::ostringstream oss; - oss << index << "__" << entry->data.kernel_name; - output_results(file_handle, features, feature_count, group.context, oss.str().substr(0, KERNEL_NAME_LEN_MAX).c_str()); - free(const_cast(entry->data.kernel_name)); - - // Finishing cleanup - // Deleting profiling context will delete all allocated resources - rocprofiler_close(group.context); + delete entry->data.record; + entry->data.record = NULL; + + rocprofiler_group_t& group = entry->group; + if (group.context != NULL) { + status = rocprofiler_group_get_data(&group); + check_status(status); + // output_group(file, group, "Group[0] data"); + + status = rocprofiler_get_metrics(group.context); + check_status(status); + std::ostringstream oss; + oss << index << "__" << entry->data.kernel_name; + output_results(file_handle, features, feature_count, group.context, oss.str().substr(0, KERNEL_NAME_LEN_MAX).c_str()); + free(const_cast(entry->data.kernel_name)); + + // Finishing cleanup + // Deleting profiling context will delete all allocated resources + rocprofiler_close(group.context); + } } } @@ -362,20 +374,22 @@ hsa_status_t dispatch_callback(const rocprofiler_callback_data_t* callback_data, properties.handler = (result_prefix != NULL) ? handler : NULL; properties.handler_arg = (void*)entry; - // Open profiling context - status = rocprofiler_open(callback_data->agent, tool_data->features, tool_data->feature_count, - &context, 0 /*ROCPROFILER_MODE_SINGLEGROUP*/, &properties); - check_status(status); - - // Check that we have only one profiling group - uint32_t group_count = 0; - status = rocprofiler_group_count(context, &group_count); - check_status(status); - assert(group_count == 1); - // Get group[0] - const uint32_t group_index = 0; - status = rocprofiler_get_group(context, group_index, group); - check_status(status); + if (tool_data->feature_count > 0) { + // Open profiling context + status = rocprofiler_open(callback_data->agent, tool_data->features, tool_data->feature_count, + &context, 0 /*ROCPROFILER_MODE_SINGLEGROUP*/, &properties); + check_status(status); + + // Check that we have only one profiling group + uint32_t group_count = 0; + status = rocprofiler_group_count(context, &group_count); + check_status(status); + assert(group_count == 1); + // Get group[0] + const uint32_t group_index = 0; + status = rocprofiler_get_group(context, group_index, group); + check_status(status); + } // Fill profiling context entry entry->group = *group; @@ -575,22 +589,20 @@ extern "C" PUBLIC_API void OnLoadTool() // Adding dispatch observer - if (feature_count) { - rocprofiler_queue_callbacks_t callbacks_ptrs{0}; - callbacks_ptrs.dispatch = dispatch_callback; - callbacks_ptrs.destroy = destroy_callback; + rocprofiler_queue_callbacks_t callbacks_ptrs{0}; + callbacks_ptrs.dispatch = dispatch_callback; + callbacks_ptrs.destroy = destroy_callback; - callbacks_data = new callbacks_data_t{}; - callbacks_data->features = features; - callbacks_data->feature_count = feature_count; - callbacks_data->group_index = 0; - callbacks_data->file_handle = result_file_handle; - callbacks_data->gpu_index = (gpu_index_vec->empty()) ? NULL : gpu_index_vec; - callbacks_data->kernel_string = (kernel_string_vec->empty()) ? NULL : kernel_string_vec; - callbacks_data->range = (range_vec->empty()) ? NULL : range_vec;; + callbacks_data = new callbacks_data_t{}; + callbacks_data->features = features; + callbacks_data->feature_count = feature_count; + callbacks_data->group_index = 0; + callbacks_data->file_handle = result_file_handle; + callbacks_data->gpu_index = (gpu_index_vec->empty()) ? NULL : gpu_index_vec; + callbacks_data->kernel_string = (kernel_string_vec->empty()) ? NULL : kernel_string_vec; + callbacks_data->range = (range_vec->empty()) ? NULL : range_vec;; - rocprofiler_set_queue_callbacks(callbacks_ptrs, callbacks_data); - } + rocprofiler_set_queue_callbacks(callbacks_ptrs, callbacks_data); xml::Xml::Destroy(xml); }