From 77cec10648e78c4bbdfcde469932aaa950f5803b Mon Sep 17 00:00:00 2001 From: Evgeny Date: Fri, 22 Dec 2017 10:22:47 -0600 Subject: [PATCH] initializing results on get-data; kernel name in sqtt output file name; --- src/core/context.h | 13 +++++------ test/ctrl/tool.cpp | 54 +++++++++++++++++++++++++++++----------------- 2 files changed, 41 insertions(+), 26 deletions(-) diff --git a/src/core/context.h b/src/core/context.h index d4d2a2e82c..1ed711fc70 100644 --- a/src/core/context.h +++ b/src/core/context.h @@ -191,7 +191,6 @@ class Context { for (unsigned i = 0; i < info_count; ++i) { rocprofiler_feature_t* info = &info_array[i]; input_map[info->name] = info; - info->data.kind = ROCPROFILER_DATA_KIND_UNINIT; } // Adding zero group, always present @@ -200,7 +199,6 @@ class Context { // Processing input features for (unsigned i = 0; i < info_count; ++i) { rocprofiler_feature_t* info = &info_array[i]; - info->data.kind = ROCPROFILER_DATA_KIND_UNINIT; info_map_[info->name] = info; const rocprofiler_feature_kind_t kind = info->kind; const char* name = info->name; @@ -323,6 +321,7 @@ class Context { // Wait for stop packet to complete hsa_signal_wait_scacquire(tuple.completion_signal, HSA_SIGNAL_CONDITION_LT, 1, (uint64_t)-1, HSA_WAIT_STATE_BLOCKED); + for (rocprofiler_feature_t* rinfo : *(tuple.info_vector)) rinfo->data.kind = ROCPROFILER_DATA_KIND_UNINIT; callback_data_t callback_data{tuple.info_vector, tuple.info_vector->size(), NULL}; const hsa_status_t status = api_->hsa_ven_amd_aqlprofile_iterate_data(tuple.profile, DataCallback, &callback_data); @@ -394,7 +393,9 @@ class Context { callback_data->index = index; if (index < info_vector.size()) { - rocprofiler_feature_t* rinfo = info_vector[index]; + rocprofiler_feature_t* const rinfo = info_vector[index]; + rinfo->data.kind = ROCPROFILER_DATA_KIND_UNINIT; + if (ainfo_type == HSA_VEN_AMD_AQLPROFILE_INFO_PMC_DATA) { if (ainfo_data->sample_id == 0) rinfo->data.result_int64 = 0; rinfo->data.result_int64 += ainfo_data->pmc_data.result; @@ -413,19 +414,19 @@ class Context { hsa_status_t status = hsa_memory_copy(dest, src, size); if (status == HSA_STATUS_SUCCESS) { *header = size; - rinfo->data.kind = ROCPROFILER_DATA_KIND_BYTES; - rinfo->data.result_bytes.instance_count = sample_id + 1; callback_data->ptr = dest + align_size(size, sizeof(uint64_t)); + rinfo->data.result_bytes.instance_count = sample_id + 1; + rinfo->data.kind = ROCPROFILER_DATA_KIND_BYTES; } } else status = HSA_STATUS_ERROR; } else { if (sample_id == 0) { - rinfo->data.kind = ROCPROFILER_DATA_KIND_BYTES; rinfo->data.result_bytes.ptr = ainfo_data->sqtt_data.ptr; rinfo->data.result_bytes.instance_count = UINT32_MAX; } rinfo->data.result_bytes.instance_count += 1; + rinfo->data.kind = ROCPROFILER_DATA_KIND_BYTES; } } else status = HSA_STATUS_ERROR; diff --git a/test/ctrl/tool.cpp b/test/ctrl/tool.cpp index 41b46b2e66..8790c11af6 100644 --- a/test/ctrl/tool.cpp +++ b/test/ctrl/tool.cpp @@ -22,6 +22,7 @@ #define PUBLIC_API __attribute__((visibility("default"))) #define CONSTRUCTOR_API __attribute__((constructor)) #define DESTRUCTOR_API __attribute__((destructor)) +#define KERNEL_NAME_LEN_MAX 128 // Disoatch callback data type struct dispatch_data_t { @@ -50,6 +51,8 @@ context_array_t* context_array = NULL; uint32_t context_count = 0; // Profiling results output file name const char* result_prefix = NULL; +// Global results file handle +FILE* result_file_handle = NULL; // Check returned HSA API status void check_status(hsa_status_t status) { @@ -104,11 +107,11 @@ void dealloc_context_entry(context_entry_t* entry) { } // Dump trace data to file -void dump_sqtt_trace(const uint32_t chunk, const void* data, const uint32_t& size) { +void dump_sqtt_trace(const char* label, const uint32_t chunk, const void* data, const uint32_t& size) { if (result_prefix != NULL) { - // Opening SQTT file + // Open SQTT file std::ostringstream oss; - oss << result_prefix << "/thread_trace.se" << chunk << ".out"; + oss << result_prefix << "/thread_trace_" << label << "_se" << chunk << ".out"; FILE* file = fopen(oss.str().c_str(), "w"); if (file == NULL) { perror("result file fopen"); @@ -120,17 +123,25 @@ void dump_sqtt_trace(const uint32_t chunk, const void* data, const uint32_t& siz for (uint32_t i = 0; i < (size / sizeof(short)); ++i) { fprintf(file, "%04x\n", ptr[i]); } + + // Close SQTT file + fclose(file); } } +struct trace_data_arg_t { + FILE* file; + const char* label; +}; + // Trace data callback for getting trace data from GPU local mamory hsa_status_t trace_data_cb(hsa_ven_amd_aqlprofile_info_type_t info_type, hsa_ven_amd_aqlprofile_info_data_t* info_data, void* data) { - FILE* file = reinterpret_cast(data); hsa_status_t status = HSA_STATUS_SUCCESS; + trace_data_arg_t* arg = reinterpret_cast(data); if (info_type == HSA_VEN_AMD_AQLPROFILE_INFO_SQTT_DATA) { - fprintf(file, " SE(%u) size(%u)\n", info_data->sample_id, info_data->sqtt_data.size); - dump_sqtt_trace(info_data->sample_id, info_data->sqtt_data.ptr, info_data->sqtt_data.size); + fprintf(arg->file, " SE(%u) size(%u)\n", info_data->sample_id, info_data->sqtt_data.size); + dump_sqtt_trace(arg->label, info_data->sample_id, info_data->sqtt_data.ptr, info_data->sqtt_data.size); } else status = HSA_STATUS_ERROR; @@ -144,8 +155,7 @@ unsigned align_size(unsigned size, unsigned alignment) { // Output profiling results for input features void output_results(FILE* file, const rocprofiler_feature_t* features, const unsigned feature_count, - rocprofiler_t* context, const char* str) { - if (str) fprintf(file, "%s:\n", str); + rocprofiler_t* context, const char* label) { for (unsigned i = 0; i < feature_count; ++i) { const rocprofiler_feature_t* p = &features[i]; fprintf(file, " %s ", p->name); @@ -163,7 +173,7 @@ void output_results(FILE* file, const rocprofiler_feature_t* features, const uns for (unsigned i = 0; i < p->data.result_bytes.instance_count; ++i) { const uint32_t chunk_size = *reinterpret_cast(ptr); const char* chunk_data = ptr + sizeof(uint64_t); - dump_sqtt_trace(i, chunk_data, chunk_size); + dump_sqtt_trace(label, i, chunk_data, chunk_size); const uint32_t off = align_size(chunk_size, sizeof(uint64_t)); ptr = chunk_data + off; @@ -175,24 +185,24 @@ void output_results(FILE* file, const rocprofiler_feature_t* features, const uns exit(1); } } else { - //fprintf(file, "iterate GPU local memory "); fprintf(file, "(\n"); - rocprofiler_iterate_trace_data(context, trace_data_cb, reinterpret_cast(file)); + trace_data_arg_t trace_data_arg{file, label}; + rocprofiler_iterate_trace_data(context, trace_data_cb, reinterpret_cast(&trace_data_arg)); fprintf(file, " )\n"); } break; } default: - fprintf(file, "undefined data kind(%u)\n", p->data.kind); + fprintf(stderr, "RPL-tool: undefined data kind(%u)\n", p->data.kind); + abort(); } } } // Output group intermeadate profiling results, created internally for complex metrics void output_group(FILE* file, const rocprofiler_group_t* group, const char* str) { - if (str) fprintf(file, "%s:\n", str); for (unsigned i = 0; i < group->feature_count; ++i) { - output_results(file, group->features[i], 1, group->context, NULL); + output_results(file, group->features[i], 1, group->context, str); } } @@ -216,7 +226,9 @@ void dump_context(context_entry_t* entry) { status = rocprofiler_get_metrics(group.context); check_status(status); - output_results(file_handle, features, feature_count, group.context, NULL); + std::ostringstream oss; + oss << index << "__" << entry->data.kernel_name; + output_results(file_handle, features, feature_count, group.context, oss.str().substr(0, KERNEL_NAME_LEN_MAX).c_str()); // Finishing cleanup // Deleting profiling context will delete all allocated resources @@ -315,17 +327,16 @@ CONSTRUCTOR_API void constructor() { // Set output file result_prefix = getenv("ROCP_OUTPUT_DIR"); - FILE* file_handle = NULL; if (result_prefix != NULL) { std::ostringstream oss; oss << result_prefix << "/results.txt"; - file_handle = fopen(oss.str().c_str(), "w"); - if (file_handle == NULL) { + result_file_handle = fopen(oss.str().c_str(), "w"); + if (result_file_handle == NULL) { perror("result file fopen"); exit(1); } } else - file_handle = stdout; + result_file_handle = stdout; // Getting input const char* xml_name = getenv("ROCP_INPUT"); @@ -419,7 +430,7 @@ CONSTRUCTOR_API void constructor() { dispatch_data->features = features; dispatch_data->feature_count = feature_count; dispatch_data->group_index = 0; - dispatch_data->file_handle = file_handle; + dispatch_data->file_handle = result_file_handle; rocprofiler_set_dispatch_callback(dispatch_callback, dispatch_data); } } @@ -434,4 +445,7 @@ DESTRUCTOR_API void destructor() { } // Dump stored profiling output data dump_context_array(); + + // Close output file + if (result_prefix != NULL) fclose(result_file_handle); }