Files
rocm-systems/test/ctrl/tool.cpp
T

471 regels
16 KiB
C++

2017-11-16 12:50:14 -06:00
///////////////////////////////////////////////////////////////////////////////
// //
// Test tool used as ROC profiler library demo //
// //
///////////////////////////////////////////////////////////////////////////////
2017-12-20 18:59:05 -06:00
#include <assert.h>
2018-01-02 18:09:37 -06:00
#include <dirent.h>
2017-11-09 17:26:19 -06:00
#include <hsa.h>
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
2018-01-02 18:09:37 -06:00
#include <sys/types.h>
2017-11-09 17:26:19 -06:00
#include <iostream>
#include <map>
2017-11-27 14:41:05 -06:00
#include <sstream>
#include <string>
2017-11-09 17:26:19 -06:00
#include <vector>
#include "inc/rocprofiler.h"
#include "util/xml.h"
#define PUBLIC_API __attribute__((visibility("default")))
#define CONSTRUCTOR_API __attribute__((constructor))
#define DESTRUCTOR_API __attribute__((destructor))
#define KERNEL_NAME_LEN_MAX 128
2017-11-09 17:26:19 -06:00
2017-11-16 12:50:14 -06:00
// Disoatch callback data type
2017-11-09 17:26:19 -06:00
struct dispatch_data_t {
2017-11-27 16:51:03 -06:00
rocprofiler_feature_t* features;
unsigned feature_count;
2017-11-09 17:26:19 -06:00
unsigned group_index;
2017-11-15 20:59:24 -06:00
FILE* file_handle;
2017-11-09 17:26:19 -06:00
};
2017-11-16 12:50:14 -06:00
// Context stored entry type
2017-11-09 17:26:19 -06:00
struct context_entry_t {
2018-01-02 18:09:37 -06:00
int valid;
2017-11-15 20:59:24 -06:00
uint32_t index;
2017-11-27 16:51:03 -06:00
rocprofiler_group_t group;
rocprofiler_feature_t* features;
unsigned feature_count;
2017-11-09 17:26:19 -06:00
rocprofiler_callback_data_t data;
2017-11-15 20:59:24 -06:00
FILE* file_handle;
2017-11-09 17:26:19 -06:00
};
2017-11-16 12:50:14 -06:00
// Dispatch callbacks and context handlers synchronization
2017-12-22 00:12:23 -06:00
pthread_mutex_t mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
2017-11-16 12:50:14 -06:00
// Stored contexts array
2017-12-22 00:12:23 -06:00
typedef std::map<uint32_t, context_entry_t> context_array_t;
context_array_t* context_array = NULL;
// Contexts collected count
uint32_t context_count = 0;
2017-11-27 14:41:05 -06:00
// Profiling results output file name
2017-11-27 16:51:03 -06:00
const char* result_prefix = NULL;
// Global results file handle
FILE* result_file_handle = NULL;
2017-11-09 17:26:19 -06:00
2017-11-16 12:50:14 -06:00
// Check returned HSA API status
2017-11-09 17:26:19 -06:00
void check_status(hsa_status_t status) {
if (status != HSA_STATUS_SUCCESS) {
const char* error_string = NULL;
rocprofiler_error_string(&error_string);
fprintf(stderr, "ERROR: %s\n", error_string);
exit(1);
}
}
2017-11-16 12:50:14 -06:00
// Allocate entry to store profiling context
context_entry_t* alloc_context_entry() {
2017-11-29 13:53:12 -06:00
if (pthread_mutex_lock(&mutex) != 0) {
2017-11-16 12:50:14 -06:00
perror("pthread_mutex_lock");
exit(1);
}
2017-12-22 00:12:23 -06:00
if (context_array == NULL) context_array = new context_array_t;
const uint32_t index = context_count;
auto ret = context_array->insert({index, context_entry_t{}});
if (ret.second == false) {
fprintf(stderr, "context_array corruption, index repeated %u\n", index);
abort();
2017-11-16 12:50:14 -06:00
}
2017-12-22 00:12:23 -06:00
++context_count;
2017-11-16 12:50:14 -06:00
if (pthread_mutex_unlock(&mutex) != 0) {
perror("pthread_mutex_unlock");
exit(1);
}
2017-12-22 00:12:23 -06:00
context_entry_t* entry = &(ret.first->second);
entry->index = index;
return entry;
}
// Allocate entry to store profiling context
void dealloc_context_entry(context_entry_t* entry) {
if (pthread_mutex_lock(&mutex) != 0) {
perror("pthread_mutex_lock");
exit(1);
}
assert(context_array != NULL);
context_array->erase(entry->index);
if (pthread_mutex_unlock(&mutex) != 0) {
perror("pthread_mutex_unlock");
exit(1);
}
2017-11-16 12:50:14 -06:00
}
2017-11-27 14:41:05 -06:00
// Dump trace data to file
void dump_sqtt_trace(const char* label, const uint32_t chunk, const void* data, const uint32_t& size) {
2017-11-27 16:51:03 -06:00
if (result_prefix != NULL) {
// Open SQTT file
2017-11-27 16:51:03 -06:00
std::ostringstream oss;
oss << result_prefix << "/thread_trace_" << label << "_se" << chunk << ".out";
2017-11-27 16:51:03 -06:00
FILE* file = fopen(oss.str().c_str(), "w");
if (file == NULL) {
2018-01-02 18:09:37 -06:00
std::ostringstream errmsg;
errmsg << "fopen error, file '" << oss.str().c_str() << "'";
perror(errmsg.str().c_str());
2017-11-27 16:51:03 -06:00
exit(1);
}
2017-11-29 13:53:12 -06:00
2017-11-27 16:51:03 -06:00
// Write the buffer in terms of shorts (16 bits)
const unsigned short* ptr = reinterpret_cast<const unsigned short*>(data);
for (uint32_t i = 0; i < (size / sizeof(short)); ++i) {
fprintf(file, "%04x\n", ptr[i]);
}
// Close SQTT file
fclose(file);
2017-11-27 14:41:05 -06:00
}
}
struct trace_data_arg_t {
FILE* file;
const char* label;
};
2017-11-27 14:41:05 -06:00
// Trace data callback for getting trace data from GPU local mamory
2017-11-29 13:53:12 -06:00
hsa_status_t trace_data_cb(hsa_ven_amd_aqlprofile_info_type_t info_type,
hsa_ven_amd_aqlprofile_info_data_t* info_data, void* data) {
2017-11-09 17:26:19 -06:00
hsa_status_t status = HSA_STATUS_SUCCESS;
trace_data_arg_t* arg = reinterpret_cast<trace_data_arg_t*>(data);
2017-11-09 17:26:19 -06:00
if (info_type == HSA_VEN_AMD_AQLPROFILE_INFO_SQTT_DATA) {
fprintf(arg->file, " SE(%u) size(%u)\n", info_data->sample_id, info_data->sqtt_data.size);
dump_sqtt_trace(arg->label, info_data->sample_id, info_data->sqtt_data.ptr, info_data->sqtt_data.size);
2017-11-29 13:53:12 -06:00
} else
status = HSA_STATUS_ERROR;
2017-11-09 17:26:19 -06:00
return status;
}
2017-11-16 12:50:14 -06:00
// Align to specified alignment
2017-11-29 13:53:12 -06:00
unsigned align_size(unsigned size, unsigned alignment) {
return ((size + alignment - 1) & ~(alignment - 1));
}
2017-11-09 17:26:19 -06:00
2017-11-16 12:50:14 -06:00
// Output profiling results for input features
2017-11-29 13:53:12 -06:00
void output_results(FILE* file, const rocprofiler_feature_t* features, const unsigned feature_count,
rocprofiler_t* context, const char* label) {
2017-11-29 13:53:12 -06:00
for (unsigned i = 0; i < feature_count; ++i) {
2017-11-27 16:51:03 -06:00
const rocprofiler_feature_t* p = &features[i];
2017-11-09 17:26:19 -06:00
fprintf(file, " %s ", p->name);
switch (p->data.kind) {
2017-11-16 12:50:14 -06:00
// Output metrics results
2017-11-27 16:51:03 -06:00
case ROCPROFILER_DATA_KIND_INT64:
2017-11-09 17:26:19 -06:00
fprintf(file, "(%lu)\n", p->data.result_int64);
break;
2017-11-16 12:50:14 -06:00
// Output trace results
2017-11-27 16:51:03 -06:00
case ROCPROFILER_DATA_KIND_BYTES: {
2017-11-09 17:26:19 -06:00
if (p->data.result_bytes.copy) {
uint64_t size = 0;
const char* ptr = reinterpret_cast<const char*>(p->data.result_bytes.ptr);
2017-11-09 17:26:19 -06:00
for (unsigned i = 0; i < p->data.result_bytes.instance_count; ++i) {
2017-11-27 14:41:05 -06:00
const uint32_t chunk_size = *reinterpret_cast<const uint64_t*>(ptr);
const char* chunk_data = ptr + sizeof(uint64_t);
dump_sqtt_trace(label, i, chunk_data, chunk_size);
2017-11-27 14:41:05 -06:00
const uint32_t off = align_size(chunk_size, sizeof(uint64_t));
ptr = chunk_data + off;
size += chunk_size;
}
fprintf(file, "size(%lu)\n", size);
if (size > p->data.result_bytes.size) {
fprintf(stderr, "SQTT data size is out of the result buffer size\n");
exit(1);
}
2017-11-09 17:26:19 -06:00
} else {
2017-11-30 14:26:16 -06:00
fprintf(file, "(\n");
trace_data_arg_t trace_data_arg{file, label};
rocprofiler_iterate_trace_data(context, trace_data_cb, reinterpret_cast<void*>(&trace_data_arg));
fprintf(file, " )\n");
2017-11-09 17:26:19 -06:00
}
break;
}
default:
fprintf(stderr, "RPL-tool: undefined data kind(%u)\n", p->data.kind);
abort();
2017-11-09 17:26:19 -06:00
}
}
}
2017-11-16 12:50:14 -06:00
// Output group intermeadate profiling results, created internally for complex metrics
void output_group(FILE* file, const rocprofiler_group_t* group, const char* str) {
2017-11-29 13:53:12 -06:00
for (unsigned i = 0; i < group->feature_count; ++i) {
output_results(file, group->features[i], 1, group->context, str);
2017-11-09 17:26:19 -06:00
}
}
2017-11-16 12:50:14 -06:00
// Dump stored context profiling output data
2017-11-15 20:59:24 -06:00
void dump_context(context_entry_t* entry) {
2017-11-09 17:26:19 -06:00
hsa_status_t status = HSA_STATUS_ERROR;
2018-01-02 18:09:37 -06:00
if (entry->valid) {
entry->valid = 0;
const uint32_t index = entry->index;
FILE* file_handle = entry->file_handle;
const rocprofiler_feature_t* features = entry->features;
const unsigned feature_count = entry->feature_count;
fprintf(file_handle,
"dispatch[%u], queue_index(%lu), kernel_object(0x%lx), kernel_name(\"%s\"):\n", index,
entry->data.queue_index, entry->data.kernel_object, entry->data.kernel_name);
rocprofiler_group_t group = entry->group;
status = rocprofiler_group_get_data(&group);
check_status(status);
// output_group(file, group, "Group[0] data");
status = rocprofiler_get_metrics(group.context);
check_status(status);
std::ostringstream oss;
oss << index << "__" << entry->data.kernel_name;
output_results(file_handle, features, feature_count, group.context, oss.str().substr(0, KERNEL_NAME_LEN_MAX).c_str());
// Finishing cleanup
// Deleting profiling context will delete all allocated resources
rocprofiler_close(group.context);
}
2017-11-15 20:59:24 -06:00
}
2017-11-09 17:26:19 -06:00
2017-11-16 12:50:14 -06:00
// Dump all stored contexts profiling output data
void dump_context_array() {
2017-11-15 20:59:24 -06:00
if (pthread_mutex_lock(&mutex) != 0) {
perror("pthread_mutex_lock");
exit(1);
}
2017-11-09 17:26:19 -06:00
if (context_array) for (auto& v : *context_array) dump_context(&v.second);
2017-11-09 17:26:19 -06:00
2017-11-15 20:59:24 -06:00
if (pthread_mutex_unlock(&mutex) != 0) {
perror("pthread_mutex_unlock");
exit(1);
}
2017-11-09 17:26:19 -06:00
}
2017-11-16 12:50:14 -06:00
// Profiling completion handler
2017-11-15 20:59:24 -06:00
void handler(rocprofiler_group_t group, void* arg) {
2017-12-22 00:12:23 -06:00
context_entry_t* entry = reinterpret_cast<context_entry_t*>(arg);
2017-11-09 17:26:19 -06:00
if (pthread_mutex_lock(&mutex) != 0) {
perror("pthread_mutex_lock");
exit(1);
}
2017-12-22 00:12:23 -06:00
if (context_array->find(entry->index) != context_array->end()) {
dump_context(entry);
dealloc_context_entry(entry);
}
2017-11-09 17:26:19 -06:00
if (pthread_mutex_unlock(&mutex) != 0) {
perror("pthread_mutex_unlock");
exit(1);
}
}
2017-11-16 12:50:14 -06:00
// Kernel disoatch callback
2017-11-29 13:53:12 -06:00
hsa_status_t dispatch_callback(const rocprofiler_callback_data_t* callback_data, void* user_data,
rocprofiler_group_t* group) {
2017-11-15 20:59:24 -06:00
// HSA status
2017-11-09 17:26:19 -06:00
hsa_status_t status = HSA_STATUS_ERROR;
// Passed tool data
dispatch_data_t* tool_data = reinterpret_cast<dispatch_data_t*>(user_data);
// Profiling context
rocprofiler_t* context = NULL;
2017-11-15 20:59:24 -06:00
// Context entry
2017-11-16 12:50:14 -06:00
context_entry_t* entry = alloc_context_entry();
2017-11-09 17:26:19 -06:00
// context properties
rocprofiler_properties_t properties{};
2017-11-27 16:51:03 -06:00
properties.handler = (result_prefix != NULL) ? handler : NULL;
2017-11-15 20:59:24 -06:00
properties.handler_arg = (void*)entry;
2017-11-09 17:26:19 -06:00
// Open profiling context
2017-11-29 13:53:12 -06:00
status = rocprofiler_open(callback_data->agent, tool_data->features, tool_data->feature_count,
&context, 0 /*ROCPROFILER_MODE_SINGLEGROUP*/, &properties);
2017-11-09 17:26:19 -06:00
check_status(status);
2017-11-27 16:51:03 -06:00
// Check that we have only one profiling group
2017-11-09 17:26:19 -06:00
uint32_t group_count = 0;
2017-11-27 16:51:03 -06:00
status = rocprofiler_group_count(context, &group_count);
2017-11-09 17:26:19 -06:00
check_status(status);
assert(group_count == 1);
2017-11-27 16:51:03 -06:00
// Get group[0]
const uint32_t group_index = 0;
status = rocprofiler_get_group(context, group_index, group);
check_status(status);
2017-11-09 17:26:19 -06:00
2017-11-27 16:51:03 -06:00
// Fill profiling context entry
2017-11-15 20:59:24 -06:00
entry->group = *group;
2017-11-27 16:51:03 -06:00
entry->features = tool_data->features;
entry->feature_count = tool_data->feature_count;
2017-11-15 20:59:24 -06:00
entry->data = *callback_data;
entry->file_handle = tool_data->file_handle;
2018-01-02 18:09:37 -06:00
entry->valid = 1;
2017-11-09 17:26:19 -06:00
return status;
}
2017-11-16 12:50:14 -06:00
// Tool constructor
2018-01-02 18:09:37 -06:00
CONSTRUCTOR_API void constructor()
{
2017-11-09 17:26:19 -06:00
std::map<std::string, hsa_ven_amd_aqlprofile_parameter_name_t> parameters_dict;
2017-11-29 13:53:12 -06:00
parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_COMPUTE_UNIT_TARGET"] =
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_COMPUTE_UNIT_TARGET;
parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_VM_ID_MASK"] =
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_VM_ID_MASK;
parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK"] =
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_MASK;
parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK"] =
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK;
parameters_dict["HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK2"] =
HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_TOKEN_MASK2;
2017-11-09 17:26:19 -06:00
// Set output file
2017-11-27 16:51:03 -06:00
result_prefix = getenv("ROCP_OUTPUT_DIR");
if (result_prefix != NULL) {
2018-01-02 18:09:37 -06:00
DIR* dir = opendir(result_prefix);
if (dir == NULL) {
std::ostringstream errmsg;
errmsg << "Cannot open output directory '" << result_prefix << "'";
perror(errmsg.str().c_str());
exit(1);
}
2017-11-27 16:51:03 -06:00
std::ostringstream oss;
oss << result_prefix << "/results.txt";
result_file_handle = fopen(oss.str().c_str(), "w");
if (result_file_handle == NULL) {
2018-01-02 18:09:37 -06:00
std::ostringstream errmsg;
errmsg << "fopen error, file '" << oss.str().c_str() << "'";
perror(errmsg.str().c_str());
2017-11-09 17:26:19 -06:00
exit(1);
}
2017-11-29 13:53:12 -06:00
} else
result_file_handle = stdout;
2017-11-09 17:26:19 -06:00
// Getting input
const char* xml_name = getenv("ROCP_INPUT");
if (xml_name == NULL) {
fprintf(stderr, "ROCProfiler: input is not specified, ROCP_INPUT env");
exit(1);
}
printf("ROCProfiler: input from \"%s\"\n", xml_name);
xml::Xml* xml = xml::Xml::Create(xml_name);
if (xml == NULL) {
fprintf(stderr, "Input file not found '%s'\n", xml_name);
exit(1);
}
2017-11-09 17:26:19 -06:00
// Getting metrics
auto metrics_list = xml->GetNodes("top.metric");
std::vector<std::string> metrics_vec;
for (auto* entry : metrics_list) {
const std::string entry_str = entry->opts["name"];
size_t pos1 = 0;
2017-11-29 13:53:12 -06:00
while (pos1 < entry_str.length()) {
2017-11-09 17:26:19 -06:00
const size_t pos2 = entry_str.find(",", pos1);
const std::string metric_name = entry_str.substr(pos1, pos2 - pos1);
metrics_vec.push_back(metric_name);
if (pos2 == std::string::npos) break;
pos1 = pos2 + 1;
}
}
// Getting traces
auto traces_list = xml->GetNodes("top.trace");
2017-11-27 16:51:03 -06:00
const unsigned feature_count = metrics_vec.size() + traces_list.size();
2017-11-29 13:53:12 -06:00
rocprofiler_feature_t* features = new rocprofiler_feature_t[feature_count];
2017-11-27 16:51:03 -06:00
memset(features, 0, feature_count * sizeof(rocprofiler_feature_t));
2017-11-09 17:26:19 -06:00
2017-11-29 13:53:12 -06:00
printf(" %d metrics\n", (int)metrics_vec.size());
2017-11-09 17:26:19 -06:00
for (unsigned i = 0; i < metrics_vec.size(); ++i) {
const std::string& name = metrics_vec[i];
printf("%s%s", (i == 0) ? " " : ", ", name.c_str());
2017-11-27 16:51:03 -06:00
features[i] = {};
features[i].kind = ROCPROFILER_FEATURE_KIND_METRIC;
features[i].name = strdup(name.c_str());
2017-11-09 17:26:19 -06:00
}
if (metrics_vec.size()) printf("\n");
2017-11-29 13:53:12 -06:00
printf(" %d traces\n", (int)traces_list.size());
2017-11-09 17:26:19 -06:00
unsigned index = metrics_vec.size();
for (auto* entry : traces_list) {
auto params_list = xml->GetNodes("top.trace.parameters");
if (params_list.size() != 1) {
fprintf(stderr, "ROCProfiler: Single input 'parameters' section is supported\n");
exit(1);
}
const std::string& name = entry->opts["name"];
const bool to_copy_data = (entry->opts["copy"] == "true");
printf(" %s (\n", name.c_str());
2017-11-27 16:51:03 -06:00
features[index] = {};
features[index].kind = ROCPROFILER_FEATURE_KIND_TRACE;
features[index].name = strdup(name.c_str());
features[index].data.result_bytes.copy = to_copy_data;
2017-11-09 17:26:19 -06:00
for (auto* params : params_list) {
const unsigned parameter_count = params->opts.size();
2017-11-29 13:53:12 -06:00
rocprofiler_parameter_t* parameters = new rocprofiler_parameter_t[parameter_count];
2017-11-09 17:26:19 -06:00
unsigned p_index = 0;
for (auto& v : params->opts) {
const std::string parameter_name = v.first;
if (parameters_dict.find(parameter_name) == parameters_dict.end()) {
fprintf(stderr, "ROCProfiler: unknown trace parameter %s\n", parameter_name.c_str());
exit(1);
}
const uint32_t value = strtol(v.second.c_str(), NULL, 0);
printf(" %s = 0x%x\n", parameter_name.c_str(), value);
parameters[p_index] = {};
parameters[p_index].parameter_name = parameters_dict[parameter_name];
parameters[p_index].value = value;
++p_index;
}
2017-11-27 16:51:03 -06:00
features[index].parameters = parameters;
features[index].parameter_count = parameter_count;
2017-11-09 17:26:19 -06:00
}
printf(" )\n");
++index;
}
2017-11-16 12:50:14 -06:00
// Adding dispatch observer
2017-11-27 16:51:03 -06:00
if (feature_count) {
2017-11-09 17:26:19 -06:00
dispatch_data_t* dispatch_data = new dispatch_data_t{};
2017-11-27 16:51:03 -06:00
dispatch_data->features = features;
dispatch_data->feature_count = feature_count;
2017-11-09 17:26:19 -06:00
dispatch_data->group_index = 0;
dispatch_data->file_handle = result_file_handle;
2017-11-27 16:51:03 -06:00
rocprofiler_set_dispatch_callback(dispatch_callback, dispatch_data);
2017-11-09 17:26:19 -06:00
}
}
2017-11-16 12:50:14 -06:00
// Tool destructor
2017-11-09 17:26:19 -06:00
DESTRUCTOR_API void destructor() {
2018-01-02 18:09:37 -06:00
const bool result_file_opened = (result_prefix != NULL) && (result_file_handle != NULL);
2017-12-22 00:12:23 -06:00
printf("\nROCPRofiler: %u contexts collected", context_count);
2018-01-02 18:09:37 -06:00
if (result_file_opened) printf(", output directory %s", result_prefix);
printf("\n");
2017-12-22 00:12:23 -06:00
// Dump stored profiling output data
dump_context_array();
// Close output file
2018-01-02 18:09:37 -06:00
if (result_file_opened) fclose(result_file_handle);
2017-11-09 17:26:19 -06:00
}