From 68a89bf87dbb45f71586975db47cd23416bef0df Mon Sep 17 00:00:00 2001 From: Evgeny Date: Wed, 15 Nov 2017 20:59:24 -0600 Subject: [PATCH] adding completion handler --- inc/rocprofiler.h | 15 +++++- src/core/context.h | 59 ++++++++++++++++++++++- src/core/profile.h | 2 + src/core/rocprofiler.cpp | 13 ++++- test/ctrl/thr_tool.cpp | 2 +- test/ctrl/tool.cpp | 100 ++++++++++++++++++++++++++------------- test/run.sh | 3 +- 7 files changed, 152 insertions(+), 42 deletions(-) diff --git a/inc/rocprofiler.h b/inc/rocprofiler.h index 207dc48950..6a43e8304c 100644 --- a/inc/rocprofiler.h +++ b/inc/rocprofiler.h @@ -138,17 +138,23 @@ typedef struct { rocprofiler_t* context; // context object } rocprofiler_group_t; -// Profiling mode +// Profiling mode mask typedef enum { ROCPROFILER_MODE_STANDALONE = 1, ROCPROFILER_MODE_CREATEQUEUE = 2, + ROCPROFILER_MODE_SINGLEGROUP = 4 } rocprofiler_mode_t; +// Profiling handler, calling on profiling completion +typedef void (*rocprofiler_handler_t)(rocprofiler_group_t group, void* arg); + // Profiling preperties typedef struct { hsa_queue_t* queue; // queue for STANDALONE mode // the queue is created and returned in CREATEQUEUE mode uint32_t queue_depth; // created queue depth + rocprofiler_handler_t handler; // handler on completion + void* handler_arg; // the handler arg } rocprofiler_properties_t; // Create new profiling context @@ -164,6 +170,11 @@ hsa_status_t rocprofiler_open( hsa_status_t rocprofiler_close( rocprofiler_t* context); // [in] profiling context +// Context reset before reusing +hsa_status_t rocprofiler_reset( + rocprofiler_t* context, // [in] profiling context + uint32_t group_index); // group index + //////////////////////////////////////////////////////////////////////////////// // Runtime API observer // @@ -239,7 +250,7 @@ hsa_status_t rocprofiler_get_group_data( rocprofiler_group_t* group); // [in/out] profiling group // Get metrics data -hsa_status_t rocprofiler_get_metrics_data( +hsa_status_t rocprofiler_get_metrics( const rocprofiler_t* context); // [in/out] profiling context // Definition of output data iterator callback diff --git a/src/core/context.h b/src/core/context.h index c08c76e03f..96993d1568 100644 --- a/src/core/context.h +++ b/src/core/context.h @@ -4,6 +4,7 @@ #include "inc/rocprofiler.h" #include +#include #include #include @@ -67,6 +68,8 @@ class Group { Group(const util::AgentInfo* agent_info, Context *context, const uint32_t& index) : pmc_profile_(agent_info), sqtt_profile_(agent_info), + n_profiles_(0), + refs_(1), context_(context), index_(index) {} @@ -91,6 +94,10 @@ class Group { if (status == HSA_STATUS_SUCCESS) { status = sqtt_profile_.Finalize(start_vector_, stop_vector_); } + if (status == HSA_STATUS_SUCCESS) { + if (!pmc_profile_.Empty()) ++n_profiles_; + if (!sqtt_profile_.Empty()) ++n_profiles_; + } return status; } @@ -109,12 +116,23 @@ class Group { Context* GetContext() { return context_; } uint32_t GetIndex() const { return index_; } + rocprofiler_group_t GetGroup() { + return rocprofiler_group_t{index_, &info_vector_[0], (uint32_t)info_vector_.size(), context_}; + } + void ResetRefs() { refs_ = n_profiles_; } + uint32_t DecrRefs() { + --refs_; + return refs_; + } + private: PmcProfile pmc_profile_; SqttProfile sqtt_profile_; info_vector_t info_vector_; pkt_vector_t start_vector_; pkt_vector_t stop_vector_; + uint32_t n_profiles_; + uint32_t refs_; Context* const context_; const uint32_t index_; }; @@ -122,19 +140,38 @@ class Group { // Profiling context class Context { public: + typedef std::mutex mutex_t; typedef std::map info_map_t; - Context(const util::AgentInfo* agent_info, Queue* queue, rocprofiler_info_t* info, const uint32_t info_count) : + Context(const util::AgentInfo* agent_info, Queue* queue, rocprofiler_info_t* info, const uint32_t info_count, rocprofiler_handler_t handler, void* handler_arg) : agent_(agent_info->dev_id), agent_info_(agent_info), queue_(queue), hsa_rsrc_(&util::HsaRsrcFactory::Instance()), - api_(hsa_rsrc_->AqlProfileApi()) + api_(hsa_rsrc_->AqlProfileApi()), + handler_(handler), + handler_arg_(handler_arg) { metrics_ = MetricsDict::Create(agent_info); if (metrics_ == NULL) EXC_RAISING(HSA_STATUS_ERROR, "MetricsDict create failed"); Initialize(info, info_count); Finalize(); + + if (handler != NULL) { + for (unsigned group_index = 0; group_index < set_.size(); ++group_index) { + set_[group_index].ResetRefs(); + const profile_vector_t profile_vector = GetProfiles(group_index); + for (auto& tuple : profile_vector) { + // Handler for stop packet completion + hsa_amd_signal_async_handler( + tuple.completion_signal, + HSA_SIGNAL_CONDITION_LT, + 1, + Handler, + &set_[group_index]); + } + } + } } ~Context() { @@ -232,6 +269,10 @@ class Context { } } + void Reset(const uint32_t& group_index) { + set_[group_index].ResetRefs(); + } + uint32_t GetGroupCount() const { return set_.size(); } rocprofiler_group_t GetGroupInfo(const uint32_t& index) { @@ -322,6 +363,16 @@ class Context { return vec; } + static bool Handler(hsa_signal_value_t value, void* arg) { + Group* group = reinterpret_cast(arg); + std::lock_guard lck(group->GetContext()->mutex_); + uint32_t r = group->DecrRefs(); + if (r == 0) { + group->GetContext()->handler_(group->GetGroup(), group->GetContext()->handler_arg_); + } + return false; + } + static hsa_status_t DataCallback(hsa_ven_amd_aqlprofile_info_type_t ainfo_type, hsa_ven_amd_aqlprofile_info_data_t* ainfo_data, void* data) { @@ -402,6 +453,10 @@ class Context { info_map_t info_map_; // Metrics map std::map metrics_map_; + // Context completion handler + rocprofiler_handler_t handler_; + void* handler_arg_; + mutex_t mutex_; }; } // namespace rocprofiler diff --git a/src/core/profile.h b/src/core/profile.h index 95a1a09f0a..7110841ae3 100644 --- a/src/core/profile.h +++ b/src/core/profile.h @@ -143,6 +143,8 @@ class Profile { } } + bool Empty() const { return info_vector_.empty(); } + protected: virtual hsa_status_t Allocate(util::HsaRsrcFactory* rsrc) = 0; diff --git a/src/core/rocprofiler.cpp b/src/core/rocprofiler.cpp index 1206e59064..4b2eedaa5a 100644 --- a/src/core/rocprofiler.cpp +++ b/src/core/rocprofiler.cpp @@ -127,7 +127,7 @@ PUBLIC_API hsa_status_t rocprofiler_open( } } - *handle = (void*) new rocprofiler::Context(agent_info, queue, info, info_count); + *handle = (void*) new rocprofiler::Context(agent_info, queue, info, info_count, properties->handler, properties->handler_arg); API_METHOD_SUFFIX } @@ -140,6 +140,15 @@ PUBLIC_API hsa_status_t rocprofiler_close(rocprofiler_t* handle) API_METHOD_SUFFIX } +// Reset context +PUBLIC_API hsa_status_t rocprofiler_reset(rocprofiler_t* handle, uint32_t group_index) +{ + API_METHOD_PREFIX + rocprofiler::Context* context = reinterpret_cast(handle); + context->Reset(group_index); + API_METHOD_SUFFIX +} + // Get profiling groups PUBLIC_API hsa_status_t rocprofiler_get_groups(rocprofiler_t* handle, rocprofiler_group_t** group_array, uint32_t* group_count) { API_METHOD_PREFIX @@ -199,7 +208,7 @@ PUBLIC_API hsa_status_t rocprofiler_get_group_data(rocprofiler_group_t* group) { } // Get metrics data -PUBLIC_API hsa_status_t rocprofiler_get_metrics_data(const rocprofiler_t* handle) { +PUBLIC_API hsa_status_t rocprofiler_get_metrics(const rocprofiler_t* handle) { API_METHOD_PREFIX const rocprofiler::Context* context = reinterpret_cast(handle); context->GetMetricsData(); diff --git a/test/ctrl/thr_tool.cpp b/test/ctrl/thr_tool.cpp index fa2f2abfa5..eae432affc 100644 --- a/test/ctrl/thr_tool.cpp +++ b/test/ctrl/thr_tool.cpp @@ -124,7 +124,7 @@ void dump_context(FILE *file, unsigned index) { check_status(status); //print_group(file, group, "Group[0] data"); - status = rocprofiler_get_metrics_data(group->context); + status = rocprofiler_get_metrics(group->context); check_status(status); print_info(file, info, info_count, NULL); diff --git a/test/ctrl/tool.cpp b/test/ctrl/tool.cpp index a849b7c892..a98efa26e0 100644 --- a/test/ctrl/tool.cpp +++ b/test/ctrl/tool.cpp @@ -21,23 +21,24 @@ struct dispatch_data_t { rocprofiler_info_t* info; unsigned info_count; unsigned group_index; + FILE* file_handle; }; struct context_entry_t { + uint32_t index; rocprofiler_group_t* group; rocprofiler_info_t* info; unsigned info_count; rocprofiler_callback_data_t data; + FILE* file_handle; }; pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; unsigned context_array_size = 1; context_entry_t* context_array = NULL; -unsigned context_array_index = 0; -unsigned dump_index = 0; +unsigned context_array_count = 0; const char* file_name = NULL; -FILE* file_handle = NULL; void check_status(hsa_status_t status) { if (status != HSA_STATUS_SUCCESS) { @@ -121,46 +122,56 @@ void print_group(FILE* file, const rocprofiler_group_t* group, const char* str) } } -void store_entry(const context_entry_t& context_entry) { +context_entry_t* alloc_entry() { + context_entry_t* ptr = 0; + if(pthread_mutex_lock(&mutex) != 0) { perror("pthread_mutex_lock"); exit(1); } - if ((context_array == NULL) || (context_array_index >= context_array_size)) { + if ((context_array == NULL) || (context_array_count >= context_array_size)) { context_array_size *= 2; context_array = reinterpret_cast(realloc(context_array, context_array_size * sizeof(context_entry_t))); } - context_array[context_array_index] = context_entry; - context_array_index += 1; + ptr = &context_array[context_array_count]; + *ptr = {}; + ptr->index = context_array_count; + context_array_count += 1; if (pthread_mutex_unlock(&mutex) != 0) { perror("pthread_mutex_unlock"); exit(1); } + + return ptr; } -void dump_context(FILE *file, context_entry_t* entry, unsigned index) { +void dump_context(context_entry_t* entry) { hsa_status_t status = HSA_STATUS_ERROR; - rocprofiler_group_t* group = entry->group; - const rocprofiler_info_t* info = entry->info; - const unsigned info_count = entry->info_count; - fprintf(file, "Dispatch[%u], kernel_object(0x%lx):\n", index, entry->data.kernel_object); - status = rocprofiler_get_group_data(group); - check_status(status); - //print_group(file, group, "Group[0] data"); + if (group) { + uint32_t index = entry->index; + const rocprofiler_info_t* info = entry->info; + const unsigned info_count = entry->info_count; + FILE* file_handle = entry->file_handle; - status = rocprofiler_get_metrics_data(group->context); - check_status(status); - print_info(file, info, info_count, group->context, NULL); - - // Finishing cleanup - // Deleting profiling context will delete all allocated resources - rocprofiler_close(group->context); - - dump_index = index; + fprintf(file_handle, "Dispatch[%u], kernel_object(0x%lx):\n", index, entry->data.kernel_object); + + status = rocprofiler_get_group_data(group); + check_status(status); + //print_group(file, group, "Group[0] data"); + + status = rocprofiler_get_metrics(group->context); + check_status(status); + print_info(file_handle, info, info_count, group->context, NULL); + + // Finishing cleanup + // Deleting profiling context will delete all allocated resources + rocprofiler_close(group->context); + entry->group = NULL; + } } void dumping_data() { @@ -169,8 +180,8 @@ void dumping_data() { exit(1); } - for (unsigned index = 0; index < context_array_index; ++index) { - dump_context(file_handle, &context_array[index], index); + for (unsigned index = 0; index < context_array_count; ++index) { + dump_context(&context_array[index]); } if (pthread_mutex_unlock(&mutex) != 0) { @@ -179,21 +190,41 @@ void dumping_data() { } } +void handler(rocprofiler_group_t group, void* arg) { + if (pthread_mutex_lock(&mutex) != 0) { + perror("pthread_mutex_lock"); + exit(1); + } + + context_entry_t* entry = reinterpret_cast(arg); + dump_context(entry); + + if (pthread_mutex_unlock(&mutex) != 0) { + perror("pthread_mutex_unlock"); + exit(1); + } +} + // profiling callback hsa_status_t dispatch_callback( const rocprofiler_callback_data_t* callback_data, void* user_data, rocprofiler_group_t** group) { + // HSA status hsa_status_t status = HSA_STATUS_ERROR; // Passed tool data dispatch_data_t* tool_data = reinterpret_cast(user_data); // Profiling context rocprofiler_t* context = NULL; + // Context entry + context_entry_t* entry = alloc_entry(); // context properties rocprofiler_properties_t properties{}; + properties.handler = (file_name != NULL) ? handler : NULL; + properties.handler_arg = (void*)entry; // Open profiling context - status = rocprofiler_open(0, tool_data->info, tool_data->info_count, &context, 0, &properties); + status = rocprofiler_open(0, tool_data->info, tool_data->info_count, &context, 0/*ROCPROFILER_MODE_SINGLEGROUP*/, &properties); check_status(status); rocprofiler_group_t* groups = NULL; @@ -203,12 +234,11 @@ hsa_status_t dispatch_callback( assert(group_count == 1); *group = &groups[0]; - context_entry_t entry; - entry.group = *group; - entry.info = tool_data->info; - entry.info_count = tool_data->info_count; - entry.data = *callback_data; - store_entry(entry); + entry->group = *group; + entry->info = tool_data->info; + entry->info_count = tool_data->info_count; + entry->data = *callback_data; + entry->file_handle = tool_data->file_handle; return status; } @@ -223,6 +253,7 @@ CONSTRUCTOR_API void constructor() { // Set output file file_name = getenv("ROCP_OUTPUT"); + FILE* file_handle = NULL; if (file_name != NULL) { file_handle = fopen(file_name, "w"); if (file_handle == NULL) { @@ -319,12 +350,13 @@ CONSTRUCTOR_API void constructor() { dispatch_data->info = info; dispatch_data->info_count = info_count; dispatch_data->group_index = 0; + dispatch_data->file_handle = file_handle; rocprofiler_set_dispatch_observer(dispatch_callback, dispatch_data); } } DESTRUCTOR_API void destructor() { - printf("\nROCPRofiler: %u contexts collected", context_array_index); + printf("\nROCPRofiler: %u contexts collected", context_array_count); if (file_name == NULL) { printf("\n"); } else { diff --git a/test/run.sh b/test/run.sh index f1db179397..2fcbefb107 100755 --- a/test/run.sh +++ b/test/run.sh @@ -13,9 +13,10 @@ export ROCPROFILER_LOG=1 export HSA_TOOLS_LIB=librocprofiler64.so export ROCP_TOOL_LIB=test/libtool.so export ROCP_HSA_INTERCEPT=1 +unset ROCP_PROXY_QUEUE export ROCP_METRICS=metrics.xml export ROCP_INPUT=input.xml -unset ROCP_PROXY_QUEUE +export ROCP_OUTPUT=output.txt echo "Run simple profiling test" if [ -n "$1" ] ; then