initializing results on get-data; kernel name in sqtt output file name;

Cette révision appartient à :
Evgeny
2017-12-22 10:22:47 -06:00
Parent 06471db7eb
révision 77cec10648
2 fichiers modifiés avec 41 ajouts et 26 suppressions
+34 -20
Voir le fichier
@@ -22,6 +22,7 @@
#define PUBLIC_API __attribute__((visibility("default")))
#define CONSTRUCTOR_API __attribute__((constructor))
#define DESTRUCTOR_API __attribute__((destructor))
#define KERNEL_NAME_LEN_MAX 128
// Disoatch callback data type
struct dispatch_data_t {
@@ -50,6 +51,8 @@ context_array_t* context_array = NULL;
uint32_t context_count = 0;
// Profiling results output file name
const char* result_prefix = NULL;
// Global results file handle
FILE* result_file_handle = NULL;
// Check returned HSA API status
void check_status(hsa_status_t status) {
@@ -104,11 +107,11 @@ void dealloc_context_entry(context_entry_t* entry) {
}
// Dump trace data to file
void dump_sqtt_trace(const uint32_t chunk, const void* data, const uint32_t& size) {
void dump_sqtt_trace(const char* label, const uint32_t chunk, const void* data, const uint32_t& size) {
if (result_prefix != NULL) {
// Opening SQTT file
// Open SQTT file
std::ostringstream oss;
oss << result_prefix << "/thread_trace.se" << chunk << ".out";
oss << result_prefix << "/thread_trace_" << label << "_se" << chunk << ".out";
FILE* file = fopen(oss.str().c_str(), "w");
if (file == NULL) {
perror("result file fopen");
@@ -120,17 +123,25 @@ void dump_sqtt_trace(const uint32_t chunk, const void* data, const uint32_t& siz
for (uint32_t i = 0; i < (size / sizeof(short)); ++i) {
fprintf(file, "%04x\n", ptr[i]);
}
// Close SQTT file
fclose(file);
}
}
struct trace_data_arg_t {
FILE* file;
const char* label;
};
// Trace data callback for getting trace data from GPU local mamory
hsa_status_t trace_data_cb(hsa_ven_amd_aqlprofile_info_type_t info_type,
hsa_ven_amd_aqlprofile_info_data_t* info_data, void* data) {
FILE* file = reinterpret_cast<FILE*>(data);
hsa_status_t status = HSA_STATUS_SUCCESS;
trace_data_arg_t* arg = reinterpret_cast<trace_data_arg_t*>(data);
if (info_type == HSA_VEN_AMD_AQLPROFILE_INFO_SQTT_DATA) {
fprintf(file, " SE(%u) size(%u)\n", info_data->sample_id, info_data->sqtt_data.size);
dump_sqtt_trace(info_data->sample_id, info_data->sqtt_data.ptr, info_data->sqtt_data.size);
fprintf(arg->file, " SE(%u) size(%u)\n", info_data->sample_id, info_data->sqtt_data.size);
dump_sqtt_trace(arg->label, info_data->sample_id, info_data->sqtt_data.ptr, info_data->sqtt_data.size);
} else
status = HSA_STATUS_ERROR;
@@ -144,8 +155,7 @@ unsigned align_size(unsigned size, unsigned alignment) {
// Output profiling results for input features
void output_results(FILE* file, const rocprofiler_feature_t* features, const unsigned feature_count,
rocprofiler_t* context, const char* str) {
if (str) fprintf(file, "%s:\n", str);
rocprofiler_t* context, const char* label) {
for (unsigned i = 0; i < feature_count; ++i) {
const rocprofiler_feature_t* p = &features[i];
fprintf(file, " %s ", p->name);
@@ -163,7 +173,7 @@ void output_results(FILE* file, const rocprofiler_feature_t* features, const uns
for (unsigned i = 0; i < p->data.result_bytes.instance_count; ++i) {
const uint32_t chunk_size = *reinterpret_cast<const uint64_t*>(ptr);
const char* chunk_data = ptr + sizeof(uint64_t);
dump_sqtt_trace(i, chunk_data, chunk_size);
dump_sqtt_trace(label, i, chunk_data, chunk_size);
const uint32_t off = align_size(chunk_size, sizeof(uint64_t));
ptr = chunk_data + off;
@@ -175,24 +185,24 @@ void output_results(FILE* file, const rocprofiler_feature_t* features, const uns
exit(1);
}
} else {
//fprintf(file, "iterate GPU local memory ");
fprintf(file, "(\n");
rocprofiler_iterate_trace_data(context, trace_data_cb, reinterpret_cast<void*>(file));
trace_data_arg_t trace_data_arg{file, label};
rocprofiler_iterate_trace_data(context, trace_data_cb, reinterpret_cast<void*>(&trace_data_arg));
fprintf(file, " )\n");
}
break;
}
default:
fprintf(file, "undefined data kind(%u)\n", p->data.kind);
fprintf(stderr, "RPL-tool: undefined data kind(%u)\n", p->data.kind);
abort();
}
}
}
// Output group intermeadate profiling results, created internally for complex metrics
void output_group(FILE* file, const rocprofiler_group_t* group, const char* str) {
if (str) fprintf(file, "%s:\n", str);
for (unsigned i = 0; i < group->feature_count; ++i) {
output_results(file, group->features[i], 1, group->context, NULL);
output_results(file, group->features[i], 1, group->context, str);
}
}
@@ -216,7 +226,9 @@ void dump_context(context_entry_t* entry) {
status = rocprofiler_get_metrics(group.context);
check_status(status);
output_results(file_handle, features, feature_count, group.context, NULL);
std::ostringstream oss;
oss << index << "__" << entry->data.kernel_name;
output_results(file_handle, features, feature_count, group.context, oss.str().substr(0, KERNEL_NAME_LEN_MAX).c_str());
// Finishing cleanup
// Deleting profiling context will delete all allocated resources
@@ -315,17 +327,16 @@ CONSTRUCTOR_API void constructor() {
// Set output file
result_prefix = getenv("ROCP_OUTPUT_DIR");
FILE* file_handle = NULL;
if (result_prefix != NULL) {
std::ostringstream oss;
oss << result_prefix << "/results.txt";
file_handle = fopen(oss.str().c_str(), "w");
if (file_handle == NULL) {
result_file_handle = fopen(oss.str().c_str(), "w");
if (result_file_handle == NULL) {
perror("result file fopen");
exit(1);
}
} else
file_handle = stdout;
result_file_handle = stdout;
// Getting input
const char* xml_name = getenv("ROCP_INPUT");
@@ -419,7 +430,7 @@ CONSTRUCTOR_API void constructor() {
dispatch_data->features = features;
dispatch_data->feature_count = feature_count;
dispatch_data->group_index = 0;
dispatch_data->file_handle = file_handle;
dispatch_data->file_handle = result_file_handle;
rocprofiler_set_dispatch_callback(dispatch_callback, dispatch_data);
}
}
@@ -434,4 +445,7 @@ DESTRUCTOR_API void destructor() {
}
// Dump stored profiling output data
dump_context_array();
// Close output file
if (result_prefix != NULL) fclose(result_file_handle);
}