adding timestamps dispatch/begin/end/complete

Change-Id: I8c23e5f7ff75174243224ff008ba266553ee30c8


[ROCm/rocprofiler commit: 363700581a]
Este commit está contenido en:
Evgeny
2018-03-05 20:59:01 -06:00
padre 7b1d142d54
commit 777eb116d4
Se han modificado 8 ficheros con 222 adiciones y 64 borrados
+15 -6
Ver fichero
@@ -212,14 +212,23 @@ hsa_status_t rocprofiler_reset(rocprofiler_t* context, // [in] profiling contex
// Queue callbacks for initiating profiling per kernel dispatch and to wait
// the profiling data on the queue destroy.
// Dispatch record
typedef struct {
uint64_t dispatch; // dispatch timestamp
uint64_t begin; // begin timestamp
uint64_t end; // end timestamp
uint64_t complete; // completion signal timestamp
} rocprofiler_dispatch_record_t;
// Profiling callback data
typedef struct {
hsa_agent_t agent;
uint32_t agent_index;
const hsa_queue_t* queue;
uint64_t queue_index;
uint64_t kernel_object;
const char* kernel_name;
hsa_agent_t agent; // GPU agent handle
uint32_t agent_index; // GPU index
const hsa_queue_t* queue; // HSA queue
uint64_t queue_index; // Index in the queue
uint64_t kernel_object; // Kernel object handle
const char* kernel_name; // Kernel name
const rocprofiler_dispatch_record_t* record; // Dispatch record
} rocprofiler_callback_data_t;
// Profiling callback type
+5 -3
Ver fichero
@@ -14,6 +14,7 @@
#include "core/types.h"
#include "util/exception.h"
#include "util/hsa_rsrc_factory.h"
#include "util/logger.h"
namespace rocprofiler {
struct rocprofiler_contex_t;
@@ -155,7 +156,8 @@ class Context {
hsa_rsrc_(&util::HsaRsrcFactory::Instance()),
api_(hsa_rsrc_->AqlProfileApi()),
handler_(handler),
handler_arg_(handler_arg) {
handler_arg_(handler_arg)
{
metrics_ = MetricsDict::Create(agent_info);
if (metrics_ == NULL) EXC_RAISING(HSA_STATUS_ERROR, "MetricsDict create failed");
Initialize(info, info_count);
@@ -334,8 +336,8 @@ class Context {
while (!complete) {
const hsa_signal_value_t signal_value = hsa_signal_wait_scacquire(tuple.completion_signal, HSA_SIGNAL_CONDITION_LT, 1, timeout,
HSA_WAIT_STATE_BLOCKED);
complete = (signal_value == 0);
if (!complete) printf("ROCProfiler: Signal timeout, signal(%d) timeout(0x%lx)\n", (int)signal_value, timeout);
complete = (signal_value < 1);
if (!complete) WARN_LOGGING("timeout");
}
for (rocprofiler_feature_t* rinfo : *(tuple.info_vector)) rinfo->data.kind = ROCPROFILER_DATA_KIND_UNINIT;
callback_data_t callback_data{tuple.info_vector, tuple.info_vector->size(), NULL};
@@ -12,4 +12,6 @@ InterceptQueue::queue_callback_t InterceptQueue::destroy_callback_ = NULL;
void* InterceptQueue::callback_data_ = NULL;
InterceptQueue::obj_map_t* InterceptQueue::obj_map_ = NULL;
const char* InterceptQueue::kernel_none_ = "";
uint64_t InterceptQueue::timeout_ = UINT64_MAX;
Tracker* InterceptQueue::tracker_ = NULL;
} // namespace rocprofiler
+20 -9
Ver fichero
@@ -12,7 +12,9 @@
#include "core/context.h"
#include "core/proxy_queue.h"
#include "core/tracker.h"
#include "core/types.h"
#include "inc/rocprofiler.h"
#include "util/hsa_rsrc_factory.h"
namespace rocprofiler {
@@ -35,18 +37,19 @@ class InterceptQueue {
hsa_status_t status = HSA_STATUS_ERROR;
std::lock_guard<mutex_t> lck(mutex_);
if (!obj_map_) obj_map_ = new obj_map_t;
ProxyQueue* proxy = ProxyQueue::Create(agent, size, type, callback, data, private_segment_size,
group_segment_size, queue, &status);
if (status == HSA_STATUS_SUCCESS) {
InterceptQueue* obj = new InterceptQueue(agent, *queue, proxy);
(*obj_map_)[(uint64_t)(*queue)] = obj;
status = proxy->SetInterceptCB(OnSubmitCB, obj);
}
if (status != HSA_STATUS_SUCCESS) abort();
if (!tracker_) tracker_ = new Tracker(timeout_);
status = hsa_amd_profiling_set_profiler_enabled(*queue, true);
if (status != HSA_STATUS_SUCCESS) abort();
if (!obj_map_) obj_map_ = new obj_map_t;
InterceptQueue* obj = new InterceptQueue(agent, *queue, proxy);
(*obj_map_)[(uint64_t)(*queue)] = obj;
status = proxy->SetInterceptCB(OnSubmitCB, obj);
return status;
}
@@ -86,12 +89,15 @@ class InterceptQueue {
const hsa_kernel_dispatch_packet_t* dispatch_packet =
reinterpret_cast<const hsa_kernel_dispatch_packet_t*>(packet);
const char* kernel_name = GetKernelName(dispatch_packet);
const auto* entry = tracker_->Add(obj->agent_info_->dev_id, dispatch_packet->completion_signal);
const_cast<hsa_kernel_dispatch_packet_t*>(dispatch_packet)->completion_signal = entry->signal;
rocprofiler_callback_data_t data = {obj->agent_info_->dev_id,
obj->agent_info_->dev_index,
obj->queue_,
user_que_idx,
dispatch_packet->kernel_object,
kernel_name};
kernel_name,
entry->record};
hsa_status_t status = dispatch_callback_(&data, callback_data_, &group);
free(const_cast<char*>(kernel_name));
if ((status == HSA_STATUS_SUCCESS) && (group.context != NULL)) {
@@ -130,6 +136,8 @@ class InterceptQueue {
destroy_callback_ = destroy_callback;
}
static void SetTimeout(uint64_t timeout) { timeout_ = timeout; }
private:
InterceptQueue(const hsa_agent_t& agent, hsa_queue_t* const queue, ProxyQueue* proxy) :
queue_(queue),
@@ -178,6 +186,9 @@ class InterceptQueue {
static void* callback_data_;
static obj_map_t* obj_map_;
static const char* kernel_none_;
static uint64_t timeout_;
static Tracker* tracker_;
static const bool tracker_on_ = true;
hsa_queue_t* const queue_;
ProxyQueue* const proxy_;
@@ -146,6 +146,7 @@ CONSTRUCTOR_API void constructor() {
if (timeout_str != NULL) {
const uint64_t timeout_val = strtoull(timeout_str, NULL, 0);
Context::SetTimeout(timeout_val);
InterceptQueue::SetTimeout(timeout_val);
}
}
+113
Ver fichero
@@ -0,0 +1,113 @@
#ifndef SRC_CORE_TRACKER_H_
#define SRC_CORE_TRACKER_H_
#include <assert.h>
#include <hsa.h>
#include <hsa_ext_amd.h>
#include <list>
#include "inc/rocprofiler.h"
#include "util/exception.h"
#include "util/logger.h"
namespace rocprofiler {
class Tracker {
public:
typedef rocprofiler_dispatch_record_t record_t;
struct entry_t;
typedef std::list<entry_t*> sig_list_t;
struct entry_t {
Tracker* tracker;
sig_list_t::iterator it;
hsa_agent_t agent;
hsa_signal_t orig;
hsa_signal_t signal;
record_t* record;
};
Tracker(uint64_t timeout = UINT64_MAX) : timeout_(timeout) {}
~Tracker() {
for (entry_t* entry : sig_list_) {
assert(entry != NULL);
while (1) {
const hsa_signal_value_t signal_value = hsa_signal_wait_scacquire(
entry->signal,
HSA_SIGNAL_CONDITION_LT,
1,
timeout_,
HSA_WAIT_STATE_BLOCKED);
if (signal_value < 1) break;
else WARN_LOGGING("tracker timeout");
}
Del(entry);
}
}
// Add tracker entry
entry_t* Add(const hsa_agent_t& agent, const hsa_signal_t& orig) {
entry_t* entry = new entry_t{};
assert(entry);
entry->tracker = this;
entry->it = sig_list_.insert(sig_list_.begin(), entry);
entry->agent = agent;
entry->orig = orig;
hsa_status_t status = hsa_signal_create(1, 0, NULL, &(entry->signal));
if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "hsa_signal_create");
record_t* record = new record_t{};
assert(record);
entry->record = record;
status = hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP, &record->dispatch);
if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP)");
hsa_amd_signal_async_handler(entry->signal, HSA_SIGNAL_CONDITION_LT, 1, Handler, entry);
if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "hsa_amd_signal_async_handler");
return entry;
}
private:
// Delete tracker entry
void Del(entry_t* entry) {
hsa_signal_destroy(entry->signal);
sig_list_.erase(entry->it);
delete entry;
}
// Handler for packet completion
static bool Handler(hsa_signal_value_t value, void* arg) {
entry_t* entry = reinterpret_cast<entry_t*>(arg);
record_t* record = entry->record;
hsa_status_t status = hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP, &record->complete);
if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP)");
hsa_amd_profiling_dispatch_time_t dispatch_time{};
status = hsa_amd_profiling_get_dispatch_time(entry->agent, entry->signal, &dispatch_time);
if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "hsa_amd_profiling_get_dispatch_time");
record->begin = dispatch_time.start;
record->end = dispatch_time.end;
hsa_signal_t orig = entry->orig;
if (orig.handle) {
const hsa_signal_value_t value = hsa_signal_load_relaxed(orig);
hsa_signal_store_relaxed(orig, value - 1);
}
entry->tracker->Del(entry);
return false;
}
// Timeout for wait on destruction
uint64_t timeout_;
// Tracked signals list
sig_list_t sig_list_;
};
} // namespace rocprofiler
#endif // SRC_CORE_TRACKER_H_
+9 -1
Ver fichero
@@ -92,7 +92,8 @@ class Logger {
if (messaging) {
message_[GetTid()] = "";
} else if (streaming_) {
//Put("\n");
Put("\n");
dirty_ = false;
}
messaging_ = messaging;
streaming_ = messaging;
@@ -148,6 +149,13 @@ class Logger {
<< rocprofiler::util::Logger::endl; \
}
#define WARN_LOGGING(stream) \
{ \
std::cerr << "ROCProfiler: " << stream << std::endl; \
rocprofiler::util::Logger::Instance() << "warning: " << rocprofiler::util::Logger::begm << stream \
<< rocprofiler::util::Logger::endl; \
}
#ifdef DEBUG
#define DBG_LOGGING(stream) \
{ \
+57 -45
Ver fichero
@@ -254,24 +254,36 @@ void dump_context(context_entry_t* entry) {
const rocprofiler_feature_t* features = entry->features;
const unsigned feature_count = entry->feature_count;
fprintf(file_handle,
"dispatch[%u], queue_index(%lu), kernel_name(\"%s\"):\n", index, entry->data.queue_index, entry->data.kernel_name);
fprintf(file_handle, "dispatch[%u], queue_index(%lu), kernel_name(\"%s\"), time(%lu,%lu,%lu,%lu)\n",
index,
entry->data.queue_index,
entry->data.kernel_name,
entry->data.record->dispatch,
entry->data.record->begin,
entry->data.record->end,
entry->data.record->complete);
fflush(file_handle);
rocprofiler_group_t group = entry->group;
status = rocprofiler_group_get_data(&group);
check_status(status);
// output_group(file, group, "Group[0] data");
status = rocprofiler_get_metrics(group.context);
check_status(status);
std::ostringstream oss;
oss << index << "__" << entry->data.kernel_name;
output_results(file_handle, features, feature_count, group.context, oss.str().substr(0, KERNEL_NAME_LEN_MAX).c_str());
free(const_cast<char*>(entry->data.kernel_name));
// Finishing cleanup
// Deleting profiling context will delete all allocated resources
rocprofiler_close(group.context);
delete entry->data.record;
entry->data.record = NULL;
rocprofiler_group_t& group = entry->group;
if (group.context != NULL) {
status = rocprofiler_group_get_data(&group);
check_status(status);
// output_group(file, group, "Group[0] data");
status = rocprofiler_get_metrics(group.context);
check_status(status);
std::ostringstream oss;
oss << index << "__" << entry->data.kernel_name;
output_results(file_handle, features, feature_count, group.context, oss.str().substr(0, KERNEL_NAME_LEN_MAX).c_str());
free(const_cast<char*>(entry->data.kernel_name));
// Finishing cleanup
// Deleting profiling context will delete all allocated resources
rocprofiler_close(group.context);
}
}
}
@@ -362,20 +374,22 @@ hsa_status_t dispatch_callback(const rocprofiler_callback_data_t* callback_data,
properties.handler = (result_prefix != NULL) ? handler : NULL;
properties.handler_arg = (void*)entry;
// Open profiling context
status = rocprofiler_open(callback_data->agent, tool_data->features, tool_data->feature_count,
&context, 0 /*ROCPROFILER_MODE_SINGLEGROUP*/, &properties);
check_status(status);
// Check that we have only one profiling group
uint32_t group_count = 0;
status = rocprofiler_group_count(context, &group_count);
check_status(status);
assert(group_count == 1);
// Get group[0]
const uint32_t group_index = 0;
status = rocprofiler_get_group(context, group_index, group);
check_status(status);
if (tool_data->feature_count > 0) {
// Open profiling context
status = rocprofiler_open(callback_data->agent, tool_data->features, tool_data->feature_count,
&context, 0 /*ROCPROFILER_MODE_SINGLEGROUP*/, &properties);
check_status(status);
// Check that we have only one profiling group
uint32_t group_count = 0;
status = rocprofiler_group_count(context, &group_count);
check_status(status);
assert(group_count == 1);
// Get group[0]
const uint32_t group_index = 0;
status = rocprofiler_get_group(context, group_index, group);
check_status(status);
}
// Fill profiling context entry
entry->group = *group;
@@ -575,22 +589,20 @@ extern "C" PUBLIC_API void OnLoadTool()
// Adding dispatch observer
if (feature_count) {
rocprofiler_queue_callbacks_t callbacks_ptrs{0};
callbacks_ptrs.dispatch = dispatch_callback;
callbacks_ptrs.destroy = destroy_callback;
rocprofiler_queue_callbacks_t callbacks_ptrs{0};
callbacks_ptrs.dispatch = dispatch_callback;
callbacks_ptrs.destroy = destroy_callback;
callbacks_data = new callbacks_data_t{};
callbacks_data->features = features;
callbacks_data->feature_count = feature_count;
callbacks_data->group_index = 0;
callbacks_data->file_handle = result_file_handle;
callbacks_data->gpu_index = (gpu_index_vec->empty()) ? NULL : gpu_index_vec;
callbacks_data->kernel_string = (kernel_string_vec->empty()) ? NULL : kernel_string_vec;
callbacks_data->range = (range_vec->empty()) ? NULL : range_vec;;
callbacks_data = new callbacks_data_t{};
callbacks_data->features = features;
callbacks_data->feature_count = feature_count;
callbacks_data->group_index = 0;
callbacks_data->file_handle = result_file_handle;
callbacks_data->gpu_index = (gpu_index_vec->empty()) ? NULL : gpu_index_vec;
callbacks_data->kernel_string = (kernel_string_vec->empty()) ? NULL : kernel_string_vec;
callbacks_data->range = (range_vec->empty()) ? NULL : range_vec;;
rocprofiler_set_queue_callbacks(callbacks_ptrs, callbacks_data);
}
rocprofiler_set_queue_callbacks(callbacks_ptrs, callbacks_data);
xml::Xml::Destroy(xml);
}