adding timestamps dispatch/begin/end/complete
Change-Id: I8c23e5f7ff75174243224ff008ba266553ee30c8
[ROCm/rocprofiler commit: 363700581a]
Este commit está contenido en:
@@ -212,14 +212,23 @@ hsa_status_t rocprofiler_reset(rocprofiler_t* context, // [in] profiling contex
|
||||
// Queue callbacks for initiating profiling per kernel dispatch and to wait
|
||||
// the profiling data on the queue destroy.
|
||||
|
||||
// Dispatch record
|
||||
typedef struct {
|
||||
uint64_t dispatch; // dispatch timestamp
|
||||
uint64_t begin; // begin timestamp
|
||||
uint64_t end; // end timestamp
|
||||
uint64_t complete; // completion signal timestamp
|
||||
} rocprofiler_dispatch_record_t;
|
||||
|
||||
// Profiling callback data
|
||||
typedef struct {
|
||||
hsa_agent_t agent;
|
||||
uint32_t agent_index;
|
||||
const hsa_queue_t* queue;
|
||||
uint64_t queue_index;
|
||||
uint64_t kernel_object;
|
||||
const char* kernel_name;
|
||||
hsa_agent_t agent; // GPU agent handle
|
||||
uint32_t agent_index; // GPU index
|
||||
const hsa_queue_t* queue; // HSA queue
|
||||
uint64_t queue_index; // Index in the queue
|
||||
uint64_t kernel_object; // Kernel object handle
|
||||
const char* kernel_name; // Kernel name
|
||||
const rocprofiler_dispatch_record_t* record; // Dispatch record
|
||||
} rocprofiler_callback_data_t;
|
||||
|
||||
// Profiling callback type
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
#include "core/types.h"
|
||||
#include "util/exception.h"
|
||||
#include "util/hsa_rsrc_factory.h"
|
||||
#include "util/logger.h"
|
||||
|
||||
namespace rocprofiler {
|
||||
struct rocprofiler_contex_t;
|
||||
@@ -155,7 +156,8 @@ class Context {
|
||||
hsa_rsrc_(&util::HsaRsrcFactory::Instance()),
|
||||
api_(hsa_rsrc_->AqlProfileApi()),
|
||||
handler_(handler),
|
||||
handler_arg_(handler_arg) {
|
||||
handler_arg_(handler_arg)
|
||||
{
|
||||
metrics_ = MetricsDict::Create(agent_info);
|
||||
if (metrics_ == NULL) EXC_RAISING(HSA_STATUS_ERROR, "MetricsDict create failed");
|
||||
Initialize(info, info_count);
|
||||
@@ -334,8 +336,8 @@ class Context {
|
||||
while (!complete) {
|
||||
const hsa_signal_value_t signal_value = hsa_signal_wait_scacquire(tuple.completion_signal, HSA_SIGNAL_CONDITION_LT, 1, timeout,
|
||||
HSA_WAIT_STATE_BLOCKED);
|
||||
complete = (signal_value == 0);
|
||||
if (!complete) printf("ROCProfiler: Signal timeout, signal(%d) timeout(0x%lx)\n", (int)signal_value, timeout);
|
||||
complete = (signal_value < 1);
|
||||
if (!complete) WARN_LOGGING("timeout");
|
||||
}
|
||||
for (rocprofiler_feature_t* rinfo : *(tuple.info_vector)) rinfo->data.kind = ROCPROFILER_DATA_KIND_UNINIT;
|
||||
callback_data_t callback_data{tuple.info_vector, tuple.info_vector->size(), NULL};
|
||||
|
||||
@@ -12,4 +12,6 @@ InterceptQueue::queue_callback_t InterceptQueue::destroy_callback_ = NULL;
|
||||
void* InterceptQueue::callback_data_ = NULL;
|
||||
InterceptQueue::obj_map_t* InterceptQueue::obj_map_ = NULL;
|
||||
const char* InterceptQueue::kernel_none_ = "";
|
||||
uint64_t InterceptQueue::timeout_ = UINT64_MAX;
|
||||
Tracker* InterceptQueue::tracker_ = NULL;
|
||||
} // namespace rocprofiler
|
||||
|
||||
@@ -12,7 +12,9 @@
|
||||
|
||||
#include "core/context.h"
|
||||
#include "core/proxy_queue.h"
|
||||
#include "core/tracker.h"
|
||||
#include "core/types.h"
|
||||
#include "inc/rocprofiler.h"
|
||||
#include "util/hsa_rsrc_factory.h"
|
||||
|
||||
namespace rocprofiler {
|
||||
@@ -35,18 +37,19 @@ class InterceptQueue {
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
|
||||
if (!obj_map_) obj_map_ = new obj_map_t;
|
||||
|
||||
ProxyQueue* proxy = ProxyQueue::Create(agent, size, type, callback, data, private_segment_size,
|
||||
group_segment_size, queue, &status);
|
||||
if (status == HSA_STATUS_SUCCESS) {
|
||||
InterceptQueue* obj = new InterceptQueue(agent, *queue, proxy);
|
||||
(*obj_map_)[(uint64_t)(*queue)] = obj;
|
||||
status = proxy->SetInterceptCB(OnSubmitCB, obj);
|
||||
}
|
||||
|
||||
if (status != HSA_STATUS_SUCCESS) abort();
|
||||
|
||||
if (!tracker_) tracker_ = new Tracker(timeout_);
|
||||
status = hsa_amd_profiling_set_profiler_enabled(*queue, true);
|
||||
if (status != HSA_STATUS_SUCCESS) abort();
|
||||
|
||||
if (!obj_map_) obj_map_ = new obj_map_t;
|
||||
InterceptQueue* obj = new InterceptQueue(agent, *queue, proxy);
|
||||
(*obj_map_)[(uint64_t)(*queue)] = obj;
|
||||
status = proxy->SetInterceptCB(OnSubmitCB, obj);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
@@ -86,12 +89,15 @@ class InterceptQueue {
|
||||
const hsa_kernel_dispatch_packet_t* dispatch_packet =
|
||||
reinterpret_cast<const hsa_kernel_dispatch_packet_t*>(packet);
|
||||
const char* kernel_name = GetKernelName(dispatch_packet);
|
||||
const auto* entry = tracker_->Add(obj->agent_info_->dev_id, dispatch_packet->completion_signal);
|
||||
const_cast<hsa_kernel_dispatch_packet_t*>(dispatch_packet)->completion_signal = entry->signal;
|
||||
rocprofiler_callback_data_t data = {obj->agent_info_->dev_id,
|
||||
obj->agent_info_->dev_index,
|
||||
obj->queue_,
|
||||
user_que_idx,
|
||||
dispatch_packet->kernel_object,
|
||||
kernel_name};
|
||||
kernel_name,
|
||||
entry->record};
|
||||
hsa_status_t status = dispatch_callback_(&data, callback_data_, &group);
|
||||
free(const_cast<char*>(kernel_name));
|
||||
if ((status == HSA_STATUS_SUCCESS) && (group.context != NULL)) {
|
||||
@@ -130,6 +136,8 @@ class InterceptQueue {
|
||||
destroy_callback_ = destroy_callback;
|
||||
}
|
||||
|
||||
static void SetTimeout(uint64_t timeout) { timeout_ = timeout; }
|
||||
|
||||
private:
|
||||
InterceptQueue(const hsa_agent_t& agent, hsa_queue_t* const queue, ProxyQueue* proxy) :
|
||||
queue_(queue),
|
||||
@@ -178,6 +186,9 @@ class InterceptQueue {
|
||||
static void* callback_data_;
|
||||
static obj_map_t* obj_map_;
|
||||
static const char* kernel_none_;
|
||||
static uint64_t timeout_;
|
||||
static Tracker* tracker_;
|
||||
static const bool tracker_on_ = true;
|
||||
|
||||
hsa_queue_t* const queue_;
|
||||
ProxyQueue* const proxy_;
|
||||
|
||||
@@ -146,6 +146,7 @@ CONSTRUCTOR_API void constructor() {
|
||||
if (timeout_str != NULL) {
|
||||
const uint64_t timeout_val = strtoull(timeout_str, NULL, 0);
|
||||
Context::SetTimeout(timeout_val);
|
||||
InterceptQueue::SetTimeout(timeout_val);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,113 @@
|
||||
#ifndef SRC_CORE_TRACKER_H_
|
||||
#define SRC_CORE_TRACKER_H_
|
||||
|
||||
#include <assert.h>
|
||||
#include <hsa.h>
|
||||
#include <hsa_ext_amd.h>
|
||||
|
||||
#include <list>
|
||||
|
||||
#include "inc/rocprofiler.h"
|
||||
#include "util/exception.h"
|
||||
#include "util/logger.h"
|
||||
|
||||
namespace rocprofiler {
|
||||
|
||||
class Tracker {
|
||||
public:
|
||||
typedef rocprofiler_dispatch_record_t record_t;
|
||||
struct entry_t;
|
||||
typedef std::list<entry_t*> sig_list_t;
|
||||
struct entry_t {
|
||||
Tracker* tracker;
|
||||
sig_list_t::iterator it;
|
||||
hsa_agent_t agent;
|
||||
hsa_signal_t orig;
|
||||
hsa_signal_t signal;
|
||||
record_t* record;
|
||||
};
|
||||
|
||||
Tracker(uint64_t timeout = UINT64_MAX) : timeout_(timeout) {}
|
||||
~Tracker() {
|
||||
for (entry_t* entry : sig_list_) {
|
||||
assert(entry != NULL);
|
||||
while (1) {
|
||||
const hsa_signal_value_t signal_value = hsa_signal_wait_scacquire(
|
||||
entry->signal,
|
||||
HSA_SIGNAL_CONDITION_LT,
|
||||
1,
|
||||
timeout_,
|
||||
HSA_WAIT_STATE_BLOCKED);
|
||||
if (signal_value < 1) break;
|
||||
else WARN_LOGGING("tracker timeout");
|
||||
}
|
||||
Del(entry);
|
||||
}
|
||||
}
|
||||
|
||||
// Add tracker entry
|
||||
entry_t* Add(const hsa_agent_t& agent, const hsa_signal_t& orig) {
|
||||
entry_t* entry = new entry_t{};
|
||||
assert(entry);
|
||||
entry->tracker = this;
|
||||
entry->it = sig_list_.insert(sig_list_.begin(), entry);
|
||||
|
||||
entry->agent = agent;
|
||||
entry->orig = orig;
|
||||
hsa_status_t status = hsa_signal_create(1, 0, NULL, &(entry->signal));
|
||||
if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "hsa_signal_create");
|
||||
|
||||
record_t* record = new record_t{};
|
||||
assert(record);
|
||||
entry->record = record;
|
||||
status = hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP, &record->dispatch);
|
||||
if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP)");
|
||||
|
||||
hsa_amd_signal_async_handler(entry->signal, HSA_SIGNAL_CONDITION_LT, 1, Handler, entry);
|
||||
if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "hsa_amd_signal_async_handler");
|
||||
|
||||
return entry;
|
||||
}
|
||||
|
||||
private:
|
||||
// Delete tracker entry
|
||||
void Del(entry_t* entry) {
|
||||
hsa_signal_destroy(entry->signal);
|
||||
sig_list_.erase(entry->it);
|
||||
delete entry;
|
||||
}
|
||||
|
||||
// Handler for packet completion
|
||||
static bool Handler(hsa_signal_value_t value, void* arg) {
|
||||
entry_t* entry = reinterpret_cast<entry_t*>(arg);
|
||||
record_t* record = entry->record;
|
||||
|
||||
hsa_status_t status = hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP, &record->complete);
|
||||
if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP)");
|
||||
|
||||
hsa_amd_profiling_dispatch_time_t dispatch_time{};
|
||||
status = hsa_amd_profiling_get_dispatch_time(entry->agent, entry->signal, &dispatch_time);
|
||||
if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "hsa_amd_profiling_get_dispatch_time");
|
||||
|
||||
record->begin = dispatch_time.start;
|
||||
record->end = dispatch_time.end;
|
||||
|
||||
hsa_signal_t orig = entry->orig;
|
||||
if (orig.handle) {
|
||||
const hsa_signal_value_t value = hsa_signal_load_relaxed(orig);
|
||||
hsa_signal_store_relaxed(orig, value - 1);
|
||||
}
|
||||
entry->tracker->Del(entry);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// Timeout for wait on destruction
|
||||
uint64_t timeout_;
|
||||
// Tracked signals list
|
||||
sig_list_t sig_list_;
|
||||
};
|
||||
|
||||
} // namespace rocprofiler
|
||||
|
||||
#endif // SRC_CORE_TRACKER_H_
|
||||
@@ -92,7 +92,8 @@ class Logger {
|
||||
if (messaging) {
|
||||
message_[GetTid()] = "";
|
||||
} else if (streaming_) {
|
||||
//Put("\n");
|
||||
Put("\n");
|
||||
dirty_ = false;
|
||||
}
|
||||
messaging_ = messaging;
|
||||
streaming_ = messaging;
|
||||
@@ -148,6 +149,13 @@ class Logger {
|
||||
<< rocprofiler::util::Logger::endl; \
|
||||
}
|
||||
|
||||
#define WARN_LOGGING(stream) \
|
||||
{ \
|
||||
std::cerr << "ROCProfiler: " << stream << std::endl; \
|
||||
rocprofiler::util::Logger::Instance() << "warning: " << rocprofiler::util::Logger::begm << stream \
|
||||
<< rocprofiler::util::Logger::endl; \
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
#define DBG_LOGGING(stream) \
|
||||
{ \
|
||||
|
||||
@@ -254,24 +254,36 @@ void dump_context(context_entry_t* entry) {
|
||||
const rocprofiler_feature_t* features = entry->features;
|
||||
const unsigned feature_count = entry->feature_count;
|
||||
|
||||
fprintf(file_handle,
|
||||
"dispatch[%u], queue_index(%lu), kernel_name(\"%s\"):\n", index, entry->data.queue_index, entry->data.kernel_name);
|
||||
fprintf(file_handle, "dispatch[%u], queue_index(%lu), kernel_name(\"%s\"), time(%lu,%lu,%lu,%lu)\n",
|
||||
index,
|
||||
entry->data.queue_index,
|
||||
entry->data.kernel_name,
|
||||
entry->data.record->dispatch,
|
||||
entry->data.record->begin,
|
||||
entry->data.record->end,
|
||||
entry->data.record->complete);
|
||||
fflush(file_handle);
|
||||
|
||||
rocprofiler_group_t group = entry->group;
|
||||
status = rocprofiler_group_get_data(&group);
|
||||
check_status(status);
|
||||
// output_group(file, group, "Group[0] data");
|
||||
|
||||
status = rocprofiler_get_metrics(group.context);
|
||||
check_status(status);
|
||||
std::ostringstream oss;
|
||||
oss << index << "__" << entry->data.kernel_name;
|
||||
output_results(file_handle, features, feature_count, group.context, oss.str().substr(0, KERNEL_NAME_LEN_MAX).c_str());
|
||||
free(const_cast<char*>(entry->data.kernel_name));
|
||||
|
||||
// Finishing cleanup
|
||||
// Deleting profiling context will delete all allocated resources
|
||||
rocprofiler_close(group.context);
|
||||
delete entry->data.record;
|
||||
entry->data.record = NULL;
|
||||
|
||||
rocprofiler_group_t& group = entry->group;
|
||||
if (group.context != NULL) {
|
||||
status = rocprofiler_group_get_data(&group);
|
||||
check_status(status);
|
||||
// output_group(file, group, "Group[0] data");
|
||||
|
||||
status = rocprofiler_get_metrics(group.context);
|
||||
check_status(status);
|
||||
std::ostringstream oss;
|
||||
oss << index << "__" << entry->data.kernel_name;
|
||||
output_results(file_handle, features, feature_count, group.context, oss.str().substr(0, KERNEL_NAME_LEN_MAX).c_str());
|
||||
free(const_cast<char*>(entry->data.kernel_name));
|
||||
|
||||
// Finishing cleanup
|
||||
// Deleting profiling context will delete all allocated resources
|
||||
rocprofiler_close(group.context);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -362,20 +374,22 @@ hsa_status_t dispatch_callback(const rocprofiler_callback_data_t* callback_data,
|
||||
properties.handler = (result_prefix != NULL) ? handler : NULL;
|
||||
properties.handler_arg = (void*)entry;
|
||||
|
||||
// Open profiling context
|
||||
status = rocprofiler_open(callback_data->agent, tool_data->features, tool_data->feature_count,
|
||||
&context, 0 /*ROCPROFILER_MODE_SINGLEGROUP*/, &properties);
|
||||
check_status(status);
|
||||
|
||||
// Check that we have only one profiling group
|
||||
uint32_t group_count = 0;
|
||||
status = rocprofiler_group_count(context, &group_count);
|
||||
check_status(status);
|
||||
assert(group_count == 1);
|
||||
// Get group[0]
|
||||
const uint32_t group_index = 0;
|
||||
status = rocprofiler_get_group(context, group_index, group);
|
||||
check_status(status);
|
||||
if (tool_data->feature_count > 0) {
|
||||
// Open profiling context
|
||||
status = rocprofiler_open(callback_data->agent, tool_data->features, tool_data->feature_count,
|
||||
&context, 0 /*ROCPROFILER_MODE_SINGLEGROUP*/, &properties);
|
||||
check_status(status);
|
||||
|
||||
// Check that we have only one profiling group
|
||||
uint32_t group_count = 0;
|
||||
status = rocprofiler_group_count(context, &group_count);
|
||||
check_status(status);
|
||||
assert(group_count == 1);
|
||||
// Get group[0]
|
||||
const uint32_t group_index = 0;
|
||||
status = rocprofiler_get_group(context, group_index, group);
|
||||
check_status(status);
|
||||
}
|
||||
|
||||
// Fill profiling context entry
|
||||
entry->group = *group;
|
||||
@@ -575,22 +589,20 @@ extern "C" PUBLIC_API void OnLoadTool()
|
||||
|
||||
|
||||
// Adding dispatch observer
|
||||
if (feature_count) {
|
||||
rocprofiler_queue_callbacks_t callbacks_ptrs{0};
|
||||
callbacks_ptrs.dispatch = dispatch_callback;
|
||||
callbacks_ptrs.destroy = destroy_callback;
|
||||
rocprofiler_queue_callbacks_t callbacks_ptrs{0};
|
||||
callbacks_ptrs.dispatch = dispatch_callback;
|
||||
callbacks_ptrs.destroy = destroy_callback;
|
||||
|
||||
callbacks_data = new callbacks_data_t{};
|
||||
callbacks_data->features = features;
|
||||
callbacks_data->feature_count = feature_count;
|
||||
callbacks_data->group_index = 0;
|
||||
callbacks_data->file_handle = result_file_handle;
|
||||
callbacks_data->gpu_index = (gpu_index_vec->empty()) ? NULL : gpu_index_vec;
|
||||
callbacks_data->kernel_string = (kernel_string_vec->empty()) ? NULL : kernel_string_vec;
|
||||
callbacks_data->range = (range_vec->empty()) ? NULL : range_vec;;
|
||||
callbacks_data = new callbacks_data_t{};
|
||||
callbacks_data->features = features;
|
||||
callbacks_data->feature_count = feature_count;
|
||||
callbacks_data->group_index = 0;
|
||||
callbacks_data->file_handle = result_file_handle;
|
||||
callbacks_data->gpu_index = (gpu_index_vec->empty()) ? NULL : gpu_index_vec;
|
||||
callbacks_data->kernel_string = (kernel_string_vec->empty()) ? NULL : kernel_string_vec;
|
||||
callbacks_data->range = (range_vec->empty()) ? NULL : range_vec;;
|
||||
|
||||
rocprofiler_set_queue_callbacks(callbacks_ptrs, callbacks_data);
|
||||
}
|
||||
rocprofiler_set_queue_callbacks(callbacks_ptrs, callbacks_data);
|
||||
|
||||
xml::Xml::Destroy(xml);
|
||||
}
|
||||
|
||||
Referencia en una nueva incidencia
Block a user