queue profiled API

Change-Id: I08f496fa76903738e937d2f29dc558ed9def3e0a
Este commit está contenido en:
Evgeny
2018-07-09 20:33:58 -05:00
padre bc7a052294
commit 7ba92e192b
Se han modificado 7 ficheros con 127 adiciones y 35 borrados
+11 -4
Ver fichero
@@ -207,10 +207,10 @@ hsa_status_t rocprofiler_reset(rocprofiler_t* context, // [in] profiling contex
// Dispatch record
typedef struct {
uint64_t dispatch; // dispatch timestamp
uint64_t begin; // begin timestamp
uint64_t end; // end timestamp
uint64_t complete; // completion signal timestamp
uint64_t dispatch; // dispatch timestamp, ns
uint64_t begin; // kernel begin timestamp, ns
uint64_t end; // kernel end timestamp, ns
uint64_t complete; // completion signal timestamp, ns
} rocprofiler_dispatch_record_t;
// Profiling callback data
@@ -371,6 +371,13 @@ hsa_status_t rocprofiler_query_info(
hsa_status_t (*callback)(const rocprofiler_info_data_t info, void *data), // callback
void *data); // [in/out] data passed to callback
// Creates a profiled queue. All dispatches on this queue will be profiled
hsa_status_t rocprofiler_queue_create_profiled(
hsa_agent_t agent_handle,uint32_t size, hsa_queue_type32_t type,
void (*callback)(hsa_status_t status, hsa_queue_t* source, void* data),
void* data, uint32_t private_segment_size, uint32_t group_segment_size,
hsa_queue_t** queue);
#ifdef __cplusplus
} // extern "C" block
#endif // __cplusplus
+2
Ver fichero
@@ -39,4 +39,6 @@ const char* InterceptQueue::kernel_none_ = "";
uint64_t InterceptQueue::timeout_ = UINT64_MAX;
Tracker* InterceptQueue::tracker_ = NULL;
bool InterceptQueue::tracker_on_ = false;
bool InterceptQueue::in_constr_call_ = false;
} // namespace rocprofiler
+70 -27
Ver fichero
@@ -53,22 +53,26 @@ class InterceptQueue {
static void HsaIntercept(HsaApiTable* table);
static hsa_status_t QueueCreate(hsa_agent_t agent, uint32_t size, hsa_queue_type32_t type,
static hsa_status_t InterceptQueueCreate(hsa_agent_t agent, uint32_t size, hsa_queue_type32_t type,
void (*callback)(hsa_status_t status, hsa_queue_t* source,
void* data),
void* data, uint32_t private_segment_size,
uint32_t group_segment_size, hsa_queue_t** queue) {
hsa_status_t status = HSA_STATUS_ERROR;
uint32_t group_segment_size, hsa_queue_t** queue,
const bool& tracker_on) {
std::lock_guard<mutex_t> lck(mutex_);
hsa_status_t status = HSA_STATUS_ERROR;
if (in_constr_call_) EXC_ABORT(status, "recursive InterceptQueueCreate()");
in_constr_call_ = true;
ProxyQueue* proxy = ProxyQueue::Create(agent, size, type, callback, data, private_segment_size,
group_segment_size, queue, &status);
if (status != HSA_STATUS_SUCCESS) abort();
if (status != HSA_STATUS_SUCCESS) EXC_ABORT(status, "ProxyQueue::Create()");
if (tracker_on_) {
if (tracker_on || tracker_on_) {
if (tracker_ == NULL) tracker_ = new Tracker(timeout_);
status = hsa_amd_profiling_set_profiler_enabled(*queue, true);
if (status != HSA_STATUS_SUCCESS) abort();
if (status != HSA_STATUS_SUCCESS) EXC_ABORT(status, "hsa_amd_profiling_set_profiler_enabled()");
}
if (!obj_map_) obj_map_ = new obj_map_t;
@@ -76,25 +80,36 @@ class InterceptQueue {
(*obj_map_)[(uint64_t)(*queue)] = obj;
status = proxy->SetInterceptCB(OnSubmitCB, obj);
in_constr_call_ = false;
return status;
}
static hsa_status_t QueueCreate(hsa_agent_t agent, uint32_t size, hsa_queue_type32_t type,
void (*callback)(hsa_status_t status, hsa_queue_t* source,
void* data),
void* data, uint32_t private_segment_size,
uint32_t group_segment_size, hsa_queue_t** queue) {
return InterceptQueueCreate(agent, size, type, callback, data, private_segment_size, group_segment_size, queue, false);
}
static hsa_status_t QueueCreateTracked(hsa_agent_t agent, uint32_t size, hsa_queue_type32_t type,
void (*callback)(hsa_status_t status, hsa_queue_t* source,
void* data),
void* data, uint32_t private_segment_size,
uint32_t group_segment_size, hsa_queue_t** queue) {
return InterceptQueueCreate(agent, size, type, callback, data, private_segment_size, group_segment_size, queue, true);
}
static hsa_status_t QueueDestroy(hsa_queue_t* queue) {
std::lock_guard<mutex_t> lck(mutex_);
hsa_status_t status = HSA_STATUS_ERROR;
if (destroy_callback_ != NULL) {
status = destroy_callback_(queue, callback_data_);
if (status != HSA_STATUS_SUCCESS) return status;
}
if (destroy_callback_ != NULL) {
status = destroy_callback_(queue, callback_data_);
}
obj_map_t::iterator it = obj_map_->find((uint64_t)queue);
if (it != obj_map_->end()) {
const InterceptQueue* obj = it->second;
assert(queue == obj->queue_);
delete obj;
obj_map_->erase(it);
status = HSA_STATUS_SUCCESS;
if (status == HSA_STATUS_SUCCESS) {
status = DelObj(queue);
}
return status;
@@ -179,17 +194,9 @@ class InterceptQueue {
static bool IsTrackerOn() { return tracker_on_; }
private:
InterceptQueue(const hsa_agent_t& agent, hsa_queue_t* const queue, ProxyQueue* proxy) :
queue_(queue),
proxy_(proxy)
{
agent_info_ = util::HsaRsrcFactory::Instance().GetAgentInfo(agent);
}
~InterceptQueue() { ProxyQueue::Destroy(proxy_); }
static packet_word_t GetHeaderType(const packet_t* packet) {
static hsa_packet_type_t GetHeaderType(const packet_t* packet) {
const packet_word_t* header = reinterpret_cast<const packet_word_t*>(packet);
return (*header >> HSA_PACKET_HEADER_TYPE) & header_type_mask;
return static_cast<hsa_packet_type_t>((*header >> HSA_PACKET_HEADER_TYPE) & header_type_mask);
}
static const char* GetKernelName(const hsa_kernel_dispatch_packet_t* dispatch_packet) {
@@ -219,6 +226,41 @@ class InterceptQueue {
return funcname;
}
// method to get an intercept queue object
static InterceptQueue* GetObj(const hsa_queue_t* queue) {
std::lock_guard<mutex_t> lck(mutex_);
InterceptQueue* obj = NULL;
obj_map_t::const_iterator it = obj_map_->find((uint64_t)queue);
if (it != obj_map_->end()) {
obj = it->second;
assert(queue == obj->queue_);
}
return obj;
}
// method to delete an intercept queue object
static hsa_status_t DelObj(const hsa_queue_t* queue) {
std::lock_guard<mutex_t> lck(mutex_);
hsa_status_t status = HSA_STATUS_ERROR;
obj_map_t::const_iterator it = obj_map_->find((uint64_t)queue);
if (it != obj_map_->end()) {
const InterceptQueue* obj = it->second;
assert(queue == obj->queue_);
delete obj;
obj_map_->erase(it);
status = HSA_STATUS_SUCCESS;;
}
return status;
}
InterceptQueue(const hsa_agent_t& agent, hsa_queue_t* const queue, ProxyQueue* proxy) :
queue_(queue),
proxy_(proxy)
{
agent_info_ = util::HsaRsrcFactory::Instance().GetAgentInfo(agent);
}
~InterceptQueue() { ProxyQueue::Destroy(proxy_); }
static mutex_t mutex_;
static const packet_word_t header_type_mask = (1ul << HSA_PACKET_HEADER_WIDTH_TYPE) - 1;
static rocprofiler_callback_t dispatch_callback_;
@@ -229,6 +271,7 @@ class InterceptQueue {
static uint64_t timeout_;
static Tracker* tracker_;
static bool tracker_on_;
static bool in_constr_call_;
hsa_queue_t* const queue_;
ProxyQueue* const proxy_;
+36 -2
Ver fichero
@@ -232,11 +232,35 @@ PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t fa
rocprofiler::SaveHsaApi(table);
rocprofiler::ProxyQueue::InitFactory();
bool intercept_mode = false;
// Checking environment to enable intercept mode
const char* intercept_env = getenv("ROCP_HSA_INTERCEPT");
if (intercept_env != NULL) {
if (strncmp(intercept_env, "1", 1) == 0) intercept_mode = true;
switch (atoi(intercept_env)) {
// Intercepting disabled
case 0:
intercept_mode = false;
rocprofiler::InterceptQueue::TrackerOn(false);
break;
// Intercepting enabled without timestamping
case 1:
intercept_mode = true;
rocprofiler::InterceptQueue::TrackerOn(false);
break;
// Intercepting enabled with timestamping
case 2:
intercept_mode = true;
rocprofiler::InterceptQueue::TrackerOn(true);
break;
default:
ERR_LOGGING("Bad ROCP_HSA_INTERCEPT env var value (" << intercept_env << ")");
return false;
}
}
if (rocprofiler::LoadTool()) intercept_mode = true;
// Loading a tool lib and setting of intercept mode
intercept_mode = rocprofiler::LoadTool();
// HSA intercepting
if (intercept_mode) {
rocprofiler::ProxyQueue::HsaIntercept(table);
@@ -558,4 +582,14 @@ PUBLIC_API hsa_status_t rocprofiler_query_info(
API_METHOD_SUFFIX
}
// Creates a profiled queue. All dispatches on this queue will be profiled
PUBLIC_API hsa_status_t rocprofiler_queue_create_profiled(
hsa_agent_t agent, uint32_t size, hsa_queue_type32_t type,
void (*callback)(hsa_status_t status, hsa_queue_t* source, void* data),
void* data, uint32_t private_segment_size, uint32_t group_segment_size,
hsa_queue_t** queue)
{
return rocprofiler::InterceptQueue::QueueCreateTracked(agent, size, type, callback, data, private_segment_size, group_segment_size, queue);
}
} // extern "C"
+5 -1
Ver fichero
@@ -99,9 +99,13 @@ class Tracker {
record_t* record = new record_t{};
assert(record);
entry->record = record;
status = hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP, &record->dispatch);
timestamp_t dispatch_timestamp = 0;
status = hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP, &dispatch_timestamp);
if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP)");
record->dispatch = timestamp2ns(dispatch_timestamp);
status = hsa_amd_signal_async_handler(entry->signal, HSA_SIGNAL_CONDITION_LT, 1, Handler, entry);
if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "hsa_amd_signal_async_handler");
+2
Ver fichero
@@ -38,6 +38,8 @@ export ROCP_TOOL_LIB=libtool.so
unset ROCP_PROXY_QUEUE
# ROC profiler metrics config file
export ROCP_METRICS=metrics.xml
# ROC profiler kernels timing
export ROCP_TIMESTAMP_ON=1
# output directory for the tool library, for metrics results file 'results.txt'
# and SQTT trace files 'thread_trace.se<n>.out'
export ROCP_OUTPUT_DIR=./RESULTS
+1 -1
Ver fichero
@@ -600,7 +600,7 @@ hsa_status_t dispatch_callback(const rocprofiler_callback_data_t* callback_data,
feature_count = next_offset - set_offset;
}
if (tool_data->feature_count > 0) {
if (feature_count > 0) {
// Open profiling context
status = rocprofiler_open(callback_data->agent, features, feature_count,
&context, 0 /*ROCPROFILER_MODE_SINGLEGROUP*/, &properties);