From c5e240bdb59594b26c67cfc69fc4f3d9d482087f Mon Sep 17 00:00:00 2001 From: Evgeny Date: Thu, 16 Nov 2017 12:50:14 -0600 Subject: [PATCH] tool sources comments --- test/ctrl/tool.cpp | 98 +++++++++++++++++++++++++++++----------------- 1 file changed, 62 insertions(+), 36 deletions(-) diff --git a/test/ctrl/tool.cpp b/test/ctrl/tool.cpp index a98efa26e0..4d00595f2f 100644 --- a/test/ctrl/tool.cpp +++ b/test/ctrl/tool.cpp @@ -1,3 +1,9 @@ +/////////////////////////////////////////////////////////////////////////////// +// // +// Test tool used as ROC profiler library demo // +// // +/////////////////////////////////////////////////////////////////////////////// + #include #include #include @@ -17,6 +23,7 @@ #define CONSTRUCTOR_API __attribute__((constructor)) #define DESTRUCTOR_API __attribute__((destructor)) +// Disoatch callback data type struct dispatch_data_t { rocprofiler_info_t* info; unsigned info_count; @@ -24,6 +31,7 @@ struct dispatch_data_t { FILE* file_handle; }; +// Context stored entry type struct context_entry_t { uint32_t index; rocprofiler_group_t* group; @@ -33,13 +41,18 @@ struct context_entry_t { FILE* file_handle; }; +// Dispatch callbacks and context handlers synchronization pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; +// Stored contexts array size unsigned context_array_size = 1; +// Stored contexts array context_entry_t* context_array = NULL; +// Number of stored contexts unsigned context_array_count = 0; - +// File for dumping profiling output data const char* file_name = NULL; +// Check returned HSA API status void check_status(hsa_status_t status) { if (status != HSA_STATUS_SUCCESS) { const char* error_string = NULL; @@ -49,6 +62,33 @@ void check_status(hsa_status_t status) { } } +// Allocate entry to store profiling context +context_entry_t* alloc_context_entry() { + context_entry_t* ptr = 0; + + if(pthread_mutex_lock(&mutex) != 0) { + perror("pthread_mutex_lock"); + exit(1); + } + + if ((context_array == NULL) || (context_array_count >= context_array_size)) { + context_array_size *= 2; + context_array = reinterpret_cast(realloc(context_array, context_array_size * sizeof(context_entry_t))); + } + ptr = &context_array[context_array_count]; + *ptr = {}; + ptr->index = context_array_count; + context_array_count += 1; + + if (pthread_mutex_unlock(&mutex) != 0) { + perror("pthread_mutex_unlock"); + exit(1); + } + + return ptr; +} + +// Trace data callbacl for getting trace data from GPU local mamory hsa_status_t trace_data_cb( hsa_ven_amd_aqlprofile_info_type_t info_type, hsa_ven_amd_aqlprofile_info_data_t* info_data, @@ -63,17 +103,21 @@ hsa_status_t trace_data_cb( return status; } +// Align to specified alignment unsigned align_size(unsigned size, unsigned alignment) { return ((size + alignment - 1) & ~(alignment - 1)); } -void print_info(FILE* file, const rocprofiler_info_t* info, const unsigned info_count, rocprofiler_t* context, const char* str) { +// Output profiling results for input features +void output_results(FILE* file, const rocprofiler_info_t* info, const unsigned info_count, rocprofiler_t* context, const char* str) { if (str) fprintf(file, "%s:\n", str); for (unsigned i= 0; i < info_count; ++i) { const rocprofiler_info_t* p = &info[i]; fprintf(file, " %s ", p->name); switch (p->data.kind) { + // Output metrics results case ROCPROFILER_INT64: fprintf(file, "(%lu)\n", p->data.result_int64); break; + // Output trace results case ROCPROFILER_BYTES: { if (p->data.result_bytes.copy) { uint64_t size = 0; @@ -115,38 +159,15 @@ void print_info(FILE* file, const rocprofiler_info_t* info, const unsigned info_ } } -void print_group(FILE* file, const rocprofiler_group_t* group, const char* str) { +// Output group intermeadate profiling results, created internally for complex metrics +void output_group(FILE* file, const rocprofiler_group_t* group, const char* str) { if (str) fprintf(file, "%s:\n", str); for (unsigned i= 0; i < group->info_count; ++i) { - print_info(file, group->info[i], 1, group->context, NULL); + output_results(file, group->info[i], 1, group->context, NULL); } } -context_entry_t* alloc_entry() { - context_entry_t* ptr = 0; - - if(pthread_mutex_lock(&mutex) != 0) { - perror("pthread_mutex_lock"); - exit(1); - } - - if ((context_array == NULL) || (context_array_count >= context_array_size)) { - context_array_size *= 2; - context_array = reinterpret_cast(realloc(context_array, context_array_size * sizeof(context_entry_t))); - } - ptr = &context_array[context_array_count]; - *ptr = {}; - ptr->index = context_array_count; - context_array_count += 1; - - if (pthread_mutex_unlock(&mutex) != 0) { - perror("pthread_mutex_unlock"); - exit(1); - } - - return ptr; -} - +// Dump stored context profiling output data void dump_context(context_entry_t* entry) { hsa_status_t status = HSA_STATUS_ERROR; rocprofiler_group_t* group = entry->group; @@ -161,11 +182,11 @@ void dump_context(context_entry_t* entry) { status = rocprofiler_get_group_data(group); check_status(status); - //print_group(file, group, "Group[0] data"); + //output_group(file, group, "Group[0] data"); status = rocprofiler_get_metrics(group->context); check_status(status); - print_info(file_handle, info, info_count, group->context, NULL); + output_results(file_handle, info, info_count, group->context, NULL); // Finishing cleanup // Deleting profiling context will delete all allocated resources @@ -174,7 +195,8 @@ void dump_context(context_entry_t* entry) { } } -void dumping_data() { +// Dump all stored contexts profiling output data +void dump_context_array() { if (pthread_mutex_lock(&mutex) != 0) { perror("pthread_mutex_lock"); exit(1); @@ -190,6 +212,7 @@ void dumping_data() { } } +// Profiling completion handler void handler(rocprofiler_group_t group, void* arg) { if (pthread_mutex_lock(&mutex) != 0) { perror("pthread_mutex_lock"); @@ -205,7 +228,7 @@ void handler(rocprofiler_group_t group, void* arg) { } } -// profiling callback +// Kernel disoatch callback hsa_status_t dispatch_callback( const rocprofiler_callback_data_t* callback_data, void* user_data, @@ -217,7 +240,7 @@ hsa_status_t dispatch_callback( // Profiling context rocprofiler_t* context = NULL; // Context entry - context_entry_t* entry = alloc_entry(); + context_entry_t* entry = alloc_context_entry(); // context properties rocprofiler_properties_t properties{}; properties.handler = (file_name != NULL) ? handler : NULL; @@ -243,6 +266,7 @@ hsa_status_t dispatch_callback( return status; } +// Tool constructor CONSTRUCTOR_API void constructor() { std::map parameters_dict; parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_COMPUTE_UNIT_TARGET"] = HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_COMPUTE_UNIT_TARGET; @@ -344,8 +368,8 @@ CONSTRUCTOR_API void constructor() { ++index; } + // Adding dispatch observer if (info_count) { - // Adding dispatch observer dispatch_data_t* dispatch_data = new dispatch_data_t{}; dispatch_data->info = info; dispatch_data->info_count = info_count; @@ -355,6 +379,7 @@ CONSTRUCTOR_API void constructor() { } } +// Tool destructor DESTRUCTOR_API void destructor() { printf("\nROCPRofiler: %u contexts collected", context_array_count); if (file_name == NULL) { @@ -362,5 +387,6 @@ DESTRUCTOR_API void destructor() { } else { printf(", dumping to %s\n", file_name); } - dumping_data(); + // Dump profiling output data which hasn't yet dumped by completi onhandler + dump_context_array(); }