adding completion handler
Этот коммит содержится в:
@@ -138,17 +138,23 @@ typedef struct {
|
||||
rocprofiler_t* context; // context object
|
||||
} rocprofiler_group_t;
|
||||
|
||||
// Profiling mode
|
||||
// Profiling mode mask
|
||||
typedef enum {
|
||||
ROCPROFILER_MODE_STANDALONE = 1,
|
||||
ROCPROFILER_MODE_CREATEQUEUE = 2,
|
||||
ROCPROFILER_MODE_SINGLEGROUP = 4
|
||||
} rocprofiler_mode_t;
|
||||
|
||||
// Profiling handler, calling on profiling completion
|
||||
typedef void (*rocprofiler_handler_t)(rocprofiler_group_t group, void* arg);
|
||||
|
||||
// Profiling preperties
|
||||
typedef struct {
|
||||
hsa_queue_t* queue; // queue for STANDALONE mode
|
||||
// the queue is created and returned in CREATEQUEUE mode
|
||||
uint32_t queue_depth; // created queue depth
|
||||
rocprofiler_handler_t handler; // handler on completion
|
||||
void* handler_arg; // the handler arg
|
||||
} rocprofiler_properties_t;
|
||||
|
||||
// Create new profiling context
|
||||
@@ -164,6 +170,11 @@ hsa_status_t rocprofiler_open(
|
||||
hsa_status_t rocprofiler_close(
|
||||
rocprofiler_t* context); // [in] profiling context
|
||||
|
||||
// Context reset before reusing
|
||||
hsa_status_t rocprofiler_reset(
|
||||
rocprofiler_t* context, // [in] profiling context
|
||||
uint32_t group_index); // group index
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Runtime API observer
|
||||
//
|
||||
@@ -239,7 +250,7 @@ hsa_status_t rocprofiler_get_group_data(
|
||||
rocprofiler_group_t* group); // [in/out] profiling group
|
||||
|
||||
// Get metrics data
|
||||
hsa_status_t rocprofiler_get_metrics_data(
|
||||
hsa_status_t rocprofiler_get_metrics(
|
||||
const rocprofiler_t* context); // [in/out] profiling context
|
||||
|
||||
// Definition of output data iterator callback
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
#include "inc/rocprofiler.h"
|
||||
|
||||
#include <hsa.h>
|
||||
#include <hsa_ext_amd.h>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
@@ -67,6 +68,8 @@ class Group {
|
||||
Group(const util::AgentInfo* agent_info, Context *context, const uint32_t& index) :
|
||||
pmc_profile_(agent_info),
|
||||
sqtt_profile_(agent_info),
|
||||
n_profiles_(0),
|
||||
refs_(1),
|
||||
context_(context),
|
||||
index_(index)
|
||||
{}
|
||||
@@ -91,6 +94,10 @@ class Group {
|
||||
if (status == HSA_STATUS_SUCCESS) {
|
||||
status = sqtt_profile_.Finalize(start_vector_, stop_vector_);
|
||||
}
|
||||
if (status == HSA_STATUS_SUCCESS) {
|
||||
if (!pmc_profile_.Empty()) ++n_profiles_;
|
||||
if (!sqtt_profile_.Empty()) ++n_profiles_;
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
@@ -109,12 +116,23 @@ class Group {
|
||||
Context* GetContext() { return context_; }
|
||||
uint32_t GetIndex() const { return index_; }
|
||||
|
||||
rocprofiler_group_t GetGroup() {
|
||||
return rocprofiler_group_t{index_, &info_vector_[0], (uint32_t)info_vector_.size(), context_};
|
||||
}
|
||||
void ResetRefs() { refs_ = n_profiles_; }
|
||||
uint32_t DecrRefs() {
|
||||
--refs_;
|
||||
return refs_;
|
||||
}
|
||||
|
||||
private:
|
||||
PmcProfile pmc_profile_;
|
||||
SqttProfile sqtt_profile_;
|
||||
info_vector_t info_vector_;
|
||||
pkt_vector_t start_vector_;
|
||||
pkt_vector_t stop_vector_;
|
||||
uint32_t n_profiles_;
|
||||
uint32_t refs_;
|
||||
Context* const context_;
|
||||
const uint32_t index_;
|
||||
};
|
||||
@@ -122,19 +140,38 @@ class Group {
|
||||
// Profiling context
|
||||
class Context {
|
||||
public:
|
||||
typedef std::mutex mutex_t;
|
||||
typedef std::map<std::string, rocprofiler_info_t*> info_map_t;
|
||||
|
||||
Context(const util::AgentInfo* agent_info, Queue* queue, rocprofiler_info_t* info, const uint32_t info_count) :
|
||||
Context(const util::AgentInfo* agent_info, Queue* queue, rocprofiler_info_t* info, const uint32_t info_count, rocprofiler_handler_t handler, void* handler_arg) :
|
||||
agent_(agent_info->dev_id),
|
||||
agent_info_(agent_info),
|
||||
queue_(queue),
|
||||
hsa_rsrc_(&util::HsaRsrcFactory::Instance()),
|
||||
api_(hsa_rsrc_->AqlProfileApi())
|
||||
api_(hsa_rsrc_->AqlProfileApi()),
|
||||
handler_(handler),
|
||||
handler_arg_(handler_arg)
|
||||
{
|
||||
metrics_ = MetricsDict::Create(agent_info);
|
||||
if (metrics_ == NULL) EXC_RAISING(HSA_STATUS_ERROR, "MetricsDict create failed");
|
||||
Initialize(info, info_count);
|
||||
Finalize();
|
||||
|
||||
if (handler != NULL) {
|
||||
for (unsigned group_index = 0; group_index < set_.size(); ++group_index) {
|
||||
set_[group_index].ResetRefs();
|
||||
const profile_vector_t profile_vector = GetProfiles(group_index);
|
||||
for (auto& tuple : profile_vector) {
|
||||
// Handler for stop packet completion
|
||||
hsa_amd_signal_async_handler(
|
||||
tuple.completion_signal,
|
||||
HSA_SIGNAL_CONDITION_LT,
|
||||
1,
|
||||
Handler,
|
||||
&set_[group_index]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
~Context() {
|
||||
@@ -232,6 +269,10 @@ class Context {
|
||||
}
|
||||
}
|
||||
|
||||
void Reset(const uint32_t& group_index) {
|
||||
set_[group_index].ResetRefs();
|
||||
}
|
||||
|
||||
uint32_t GetGroupCount() const { return set_.size(); }
|
||||
|
||||
rocprofiler_group_t GetGroupInfo(const uint32_t& index) {
|
||||
@@ -322,6 +363,16 @@ class Context {
|
||||
return vec;
|
||||
}
|
||||
|
||||
static bool Handler(hsa_signal_value_t value, void* arg) {
|
||||
Group* group = reinterpret_cast<Group*>(arg);
|
||||
std::lock_guard<mutex_t> lck(group->GetContext()->mutex_);
|
||||
uint32_t r = group->DecrRefs();
|
||||
if (r == 0) {
|
||||
group->GetContext()->handler_(group->GetGroup(), group->GetContext()->handler_arg_);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static hsa_status_t DataCallback(hsa_ven_amd_aqlprofile_info_type_t ainfo_type,
|
||||
hsa_ven_amd_aqlprofile_info_data_t* ainfo_data,
|
||||
void* data) {
|
||||
@@ -402,6 +453,10 @@ class Context {
|
||||
info_map_t info_map_;
|
||||
// Metrics map
|
||||
std::map<std::string, const Metric*> metrics_map_;
|
||||
// Context completion handler
|
||||
rocprofiler_handler_t handler_;
|
||||
void* handler_arg_;
|
||||
mutex_t mutex_;
|
||||
};
|
||||
|
||||
} // namespace rocprofiler
|
||||
|
||||
@@ -143,6 +143,8 @@ class Profile {
|
||||
}
|
||||
}
|
||||
|
||||
bool Empty() const { return info_vector_.empty(); }
|
||||
|
||||
protected:
|
||||
virtual hsa_status_t Allocate(util::HsaRsrcFactory* rsrc) = 0;
|
||||
|
||||
|
||||
@@ -127,7 +127,7 @@ PUBLIC_API hsa_status_t rocprofiler_open(
|
||||
}
|
||||
}
|
||||
|
||||
*handle = (void*) new rocprofiler::Context(agent_info, queue, info, info_count);
|
||||
*handle = (void*) new rocprofiler::Context(agent_info, queue, info, info_count, properties->handler, properties->handler_arg);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
@@ -140,6 +140,15 @@ PUBLIC_API hsa_status_t rocprofiler_close(rocprofiler_t* handle)
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
// Reset context
|
||||
PUBLIC_API hsa_status_t rocprofiler_reset(rocprofiler_t* handle, uint32_t group_index)
|
||||
{
|
||||
API_METHOD_PREFIX
|
||||
rocprofiler::Context* context = reinterpret_cast<rocprofiler::Context*>(handle);
|
||||
context->Reset(group_index);
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
// Get profiling groups
|
||||
PUBLIC_API hsa_status_t rocprofiler_get_groups(rocprofiler_t* handle, rocprofiler_group_t** group_array, uint32_t* group_count) {
|
||||
API_METHOD_PREFIX
|
||||
@@ -199,7 +208,7 @@ PUBLIC_API hsa_status_t rocprofiler_get_group_data(rocprofiler_group_t* group) {
|
||||
}
|
||||
|
||||
// Get metrics data
|
||||
PUBLIC_API hsa_status_t rocprofiler_get_metrics_data(const rocprofiler_t* handle) {
|
||||
PUBLIC_API hsa_status_t rocprofiler_get_metrics(const rocprofiler_t* handle) {
|
||||
API_METHOD_PREFIX
|
||||
const rocprofiler::Context* context = reinterpret_cast<const rocprofiler::Context*>(handle);
|
||||
context->GetMetricsData();
|
||||
|
||||
@@ -124,7 +124,7 @@ void dump_context(FILE *file, unsigned index) {
|
||||
check_status(status);
|
||||
//print_group(file, group, "Group[0] data");
|
||||
|
||||
status = rocprofiler_get_metrics_data(group->context);
|
||||
status = rocprofiler_get_metrics(group->context);
|
||||
check_status(status);
|
||||
print_info(file, info, info_count, NULL);
|
||||
|
||||
|
||||
+66
-34
@@ -21,23 +21,24 @@ struct dispatch_data_t {
|
||||
rocprofiler_info_t* info;
|
||||
unsigned info_count;
|
||||
unsigned group_index;
|
||||
FILE* file_handle;
|
||||
};
|
||||
|
||||
struct context_entry_t {
|
||||
uint32_t index;
|
||||
rocprofiler_group_t* group;
|
||||
rocprofiler_info_t* info;
|
||||
unsigned info_count;
|
||||
rocprofiler_callback_data_t data;
|
||||
FILE* file_handle;
|
||||
};
|
||||
|
||||
pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
|
||||
unsigned context_array_size = 1;
|
||||
context_entry_t* context_array = NULL;
|
||||
unsigned context_array_index = 0;
|
||||
unsigned dump_index = 0;
|
||||
unsigned context_array_count = 0;
|
||||
|
||||
const char* file_name = NULL;
|
||||
FILE* file_handle = NULL;
|
||||
|
||||
void check_status(hsa_status_t status) {
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
@@ -121,46 +122,56 @@ void print_group(FILE* file, const rocprofiler_group_t* group, const char* str)
|
||||
}
|
||||
}
|
||||
|
||||
void store_entry(const context_entry_t& context_entry) {
|
||||
context_entry_t* alloc_entry() {
|
||||
context_entry_t* ptr = 0;
|
||||
|
||||
if(pthread_mutex_lock(&mutex) != 0) {
|
||||
perror("pthread_mutex_lock");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if ((context_array == NULL) || (context_array_index >= context_array_size)) {
|
||||
if ((context_array == NULL) || (context_array_count >= context_array_size)) {
|
||||
context_array_size *= 2;
|
||||
context_array = reinterpret_cast<context_entry_t*>(realloc(context_array, context_array_size * sizeof(context_entry_t)));
|
||||
}
|
||||
context_array[context_array_index] = context_entry;
|
||||
context_array_index += 1;
|
||||
ptr = &context_array[context_array_count];
|
||||
*ptr = {};
|
||||
ptr->index = context_array_count;
|
||||
context_array_count += 1;
|
||||
|
||||
if (pthread_mutex_unlock(&mutex) != 0) {
|
||||
perror("pthread_mutex_unlock");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void dump_context(FILE *file, context_entry_t* entry, unsigned index) {
|
||||
void dump_context(context_entry_t* entry) {
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
|
||||
rocprofiler_group_t* group = entry->group;
|
||||
const rocprofiler_info_t* info = entry->info;
|
||||
const unsigned info_count = entry->info_count;
|
||||
fprintf(file, "Dispatch[%u], kernel_object(0x%lx):\n", index, entry->data.kernel_object);
|
||||
|
||||
status = rocprofiler_get_group_data(group);
|
||||
check_status(status);
|
||||
//print_group(file, group, "Group[0] data");
|
||||
if (group) {
|
||||
uint32_t index = entry->index;
|
||||
const rocprofiler_info_t* info = entry->info;
|
||||
const unsigned info_count = entry->info_count;
|
||||
FILE* file_handle = entry->file_handle;
|
||||
|
||||
status = rocprofiler_get_metrics_data(group->context);
|
||||
check_status(status);
|
||||
print_info(file, info, info_count, group->context, NULL);
|
||||
|
||||
// Finishing cleanup
|
||||
// Deleting profiling context will delete all allocated resources
|
||||
rocprofiler_close(group->context);
|
||||
|
||||
dump_index = index;
|
||||
fprintf(file_handle, "Dispatch[%u], kernel_object(0x%lx):\n", index, entry->data.kernel_object);
|
||||
|
||||
status = rocprofiler_get_group_data(group);
|
||||
check_status(status);
|
||||
//print_group(file, group, "Group[0] data");
|
||||
|
||||
status = rocprofiler_get_metrics(group->context);
|
||||
check_status(status);
|
||||
print_info(file_handle, info, info_count, group->context, NULL);
|
||||
|
||||
// Finishing cleanup
|
||||
// Deleting profiling context will delete all allocated resources
|
||||
rocprofiler_close(group->context);
|
||||
entry->group = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void dumping_data() {
|
||||
@@ -169,8 +180,8 @@ void dumping_data() {
|
||||
exit(1);
|
||||
}
|
||||
|
||||
for (unsigned index = 0; index < context_array_index; ++index) {
|
||||
dump_context(file_handle, &context_array[index], index);
|
||||
for (unsigned index = 0; index < context_array_count; ++index) {
|
||||
dump_context(&context_array[index]);
|
||||
}
|
||||
|
||||
if (pthread_mutex_unlock(&mutex) != 0) {
|
||||
@@ -179,21 +190,41 @@ void dumping_data() {
|
||||
}
|
||||
}
|
||||
|
||||
void handler(rocprofiler_group_t group, void* arg) {
|
||||
if (pthread_mutex_lock(&mutex) != 0) {
|
||||
perror("pthread_mutex_lock");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
context_entry_t* entry = reinterpret_cast<context_entry_t*>(arg);
|
||||
dump_context(entry);
|
||||
|
||||
if (pthread_mutex_unlock(&mutex) != 0) {
|
||||
perror("pthread_mutex_unlock");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// profiling callback
|
||||
hsa_status_t dispatch_callback(
|
||||
const rocprofiler_callback_data_t* callback_data,
|
||||
void* user_data,
|
||||
rocprofiler_group_t** group) {
|
||||
// HSA status
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
// Passed tool data
|
||||
dispatch_data_t* tool_data = reinterpret_cast<dispatch_data_t*>(user_data);
|
||||
// Profiling context
|
||||
rocprofiler_t* context = NULL;
|
||||
// Context entry
|
||||
context_entry_t* entry = alloc_entry();
|
||||
// context properties
|
||||
rocprofiler_properties_t properties{};
|
||||
properties.handler = (file_name != NULL) ? handler : NULL;
|
||||
properties.handler_arg = (void*)entry;
|
||||
|
||||
// Open profiling context
|
||||
status = rocprofiler_open(0, tool_data->info, tool_data->info_count, &context, 0, &properties);
|
||||
status = rocprofiler_open(0, tool_data->info, tool_data->info_count, &context, 0/*ROCPROFILER_MODE_SINGLEGROUP*/, &properties);
|
||||
check_status(status);
|
||||
|
||||
rocprofiler_group_t* groups = NULL;
|
||||
@@ -203,12 +234,11 @@ hsa_status_t dispatch_callback(
|
||||
assert(group_count == 1);
|
||||
|
||||
*group = &groups[0];
|
||||
context_entry_t entry;
|
||||
entry.group = *group;
|
||||
entry.info = tool_data->info;
|
||||
entry.info_count = tool_data->info_count;
|
||||
entry.data = *callback_data;
|
||||
store_entry(entry);
|
||||
entry->group = *group;
|
||||
entry->info = tool_data->info;
|
||||
entry->info_count = tool_data->info_count;
|
||||
entry->data = *callback_data;
|
||||
entry->file_handle = tool_data->file_handle;
|
||||
|
||||
return status;
|
||||
}
|
||||
@@ -223,6 +253,7 @@ CONSTRUCTOR_API void constructor() {
|
||||
|
||||
// Set output file
|
||||
file_name = getenv("ROCP_OUTPUT");
|
||||
FILE* file_handle = NULL;
|
||||
if (file_name != NULL) {
|
||||
file_handle = fopen(file_name, "w");
|
||||
if (file_handle == NULL) {
|
||||
@@ -319,12 +350,13 @@ CONSTRUCTOR_API void constructor() {
|
||||
dispatch_data->info = info;
|
||||
dispatch_data->info_count = info_count;
|
||||
dispatch_data->group_index = 0;
|
||||
dispatch_data->file_handle = file_handle;
|
||||
rocprofiler_set_dispatch_observer(dispatch_callback, dispatch_data);
|
||||
}
|
||||
}
|
||||
|
||||
DESTRUCTOR_API void destructor() {
|
||||
printf("\nROCPRofiler: %u contexts collected", context_array_index);
|
||||
printf("\nROCPRofiler: %u contexts collected", context_array_count);
|
||||
if (file_name == NULL) {
|
||||
printf("\n");
|
||||
} else {
|
||||
|
||||
+2
-1
@@ -13,9 +13,10 @@ export ROCPROFILER_LOG=1
|
||||
export HSA_TOOLS_LIB=librocprofiler64.so
|
||||
export ROCP_TOOL_LIB=test/libtool.so
|
||||
export ROCP_HSA_INTERCEPT=1
|
||||
unset ROCP_PROXY_QUEUE
|
||||
export ROCP_METRICS=metrics.xml
|
||||
export ROCP_INPUT=input.xml
|
||||
unset ROCP_PROXY_QUEUE
|
||||
export ROCP_OUTPUT=output.txt
|
||||
|
||||
echo "Run simple profiling test"
|
||||
if [ -n "$1" ] ; then
|
||||
|
||||
Ссылка в новой задаче
Block a user