queue profiled API
Change-Id: I08f496fa76903738e937d2f29dc558ed9def3e0a
Este commit está contenido en:
+11
-4
@@ -207,10 +207,10 @@ hsa_status_t rocprofiler_reset(rocprofiler_t* context, // [in] profiling contex
|
||||
|
||||
// Dispatch record
|
||||
typedef struct {
|
||||
uint64_t dispatch; // dispatch timestamp
|
||||
uint64_t begin; // begin timestamp
|
||||
uint64_t end; // end timestamp
|
||||
uint64_t complete; // completion signal timestamp
|
||||
uint64_t dispatch; // dispatch timestamp, ns
|
||||
uint64_t begin; // kernel begin timestamp, ns
|
||||
uint64_t end; // kernel end timestamp, ns
|
||||
uint64_t complete; // completion signal timestamp, ns
|
||||
} rocprofiler_dispatch_record_t;
|
||||
|
||||
// Profiling callback data
|
||||
@@ -371,6 +371,13 @@ hsa_status_t rocprofiler_query_info(
|
||||
hsa_status_t (*callback)(const rocprofiler_info_data_t info, void *data), // callback
|
||||
void *data); // [in/out] data passed to callback
|
||||
|
||||
// Creates a profiled queue. All dispatches on this queue will be profiled
|
||||
hsa_status_t rocprofiler_queue_create_profiled(
|
||||
hsa_agent_t agent_handle,uint32_t size, hsa_queue_type32_t type,
|
||||
void (*callback)(hsa_status_t status, hsa_queue_t* source, void* data),
|
||||
void* data, uint32_t private_segment_size, uint32_t group_segment_size,
|
||||
hsa_queue_t** queue);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C" block
|
||||
#endif // __cplusplus
|
||||
|
||||
@@ -39,4 +39,6 @@ const char* InterceptQueue::kernel_none_ = "";
|
||||
uint64_t InterceptQueue::timeout_ = UINT64_MAX;
|
||||
Tracker* InterceptQueue::tracker_ = NULL;
|
||||
bool InterceptQueue::tracker_on_ = false;
|
||||
bool InterceptQueue::in_constr_call_ = false;
|
||||
|
||||
} // namespace rocprofiler
|
||||
|
||||
@@ -53,22 +53,26 @@ class InterceptQueue {
|
||||
|
||||
static void HsaIntercept(HsaApiTable* table);
|
||||
|
||||
static hsa_status_t QueueCreate(hsa_agent_t agent, uint32_t size, hsa_queue_type32_t type,
|
||||
static hsa_status_t InterceptQueueCreate(hsa_agent_t agent, uint32_t size, hsa_queue_type32_t type,
|
||||
void (*callback)(hsa_status_t status, hsa_queue_t* source,
|
||||
void* data),
|
||||
void* data, uint32_t private_segment_size,
|
||||
uint32_t group_segment_size, hsa_queue_t** queue) {
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
uint32_t group_segment_size, hsa_queue_t** queue,
|
||||
const bool& tracker_on) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
|
||||
if (in_constr_call_) EXC_ABORT(status, "recursive InterceptQueueCreate()");
|
||||
in_constr_call_ = true;
|
||||
|
||||
ProxyQueue* proxy = ProxyQueue::Create(agent, size, type, callback, data, private_segment_size,
|
||||
group_segment_size, queue, &status);
|
||||
if (status != HSA_STATUS_SUCCESS) abort();
|
||||
if (status != HSA_STATUS_SUCCESS) EXC_ABORT(status, "ProxyQueue::Create()");
|
||||
|
||||
if (tracker_on_) {
|
||||
if (tracker_on || tracker_on_) {
|
||||
if (tracker_ == NULL) tracker_ = new Tracker(timeout_);
|
||||
status = hsa_amd_profiling_set_profiler_enabled(*queue, true);
|
||||
if (status != HSA_STATUS_SUCCESS) abort();
|
||||
if (status != HSA_STATUS_SUCCESS) EXC_ABORT(status, "hsa_amd_profiling_set_profiler_enabled()");
|
||||
}
|
||||
|
||||
if (!obj_map_) obj_map_ = new obj_map_t;
|
||||
@@ -76,25 +80,36 @@ class InterceptQueue {
|
||||
(*obj_map_)[(uint64_t)(*queue)] = obj;
|
||||
status = proxy->SetInterceptCB(OnSubmitCB, obj);
|
||||
|
||||
in_constr_call_ = false;
|
||||
return status;
|
||||
}
|
||||
|
||||
static hsa_status_t QueueCreate(hsa_agent_t agent, uint32_t size, hsa_queue_type32_t type,
|
||||
void (*callback)(hsa_status_t status, hsa_queue_t* source,
|
||||
void* data),
|
||||
void* data, uint32_t private_segment_size,
|
||||
uint32_t group_segment_size, hsa_queue_t** queue) {
|
||||
return InterceptQueueCreate(agent, size, type, callback, data, private_segment_size, group_segment_size, queue, false);
|
||||
}
|
||||
|
||||
static hsa_status_t QueueCreateTracked(hsa_agent_t agent, uint32_t size, hsa_queue_type32_t type,
|
||||
void (*callback)(hsa_status_t status, hsa_queue_t* source,
|
||||
void* data),
|
||||
void* data, uint32_t private_segment_size,
|
||||
uint32_t group_segment_size, hsa_queue_t** queue) {
|
||||
return InterceptQueueCreate(agent, size, type, callback, data, private_segment_size, group_segment_size, queue, true);
|
||||
}
|
||||
|
||||
static hsa_status_t QueueDestroy(hsa_queue_t* queue) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
|
||||
if (destroy_callback_ != NULL) {
|
||||
status = destroy_callback_(queue, callback_data_);
|
||||
if (status != HSA_STATUS_SUCCESS) return status;
|
||||
}
|
||||
if (destroy_callback_ != NULL) {
|
||||
status = destroy_callback_(queue, callback_data_);
|
||||
}
|
||||
|
||||
obj_map_t::iterator it = obj_map_->find((uint64_t)queue);
|
||||
if (it != obj_map_->end()) {
|
||||
const InterceptQueue* obj = it->second;
|
||||
assert(queue == obj->queue_);
|
||||
delete obj;
|
||||
obj_map_->erase(it);
|
||||
status = HSA_STATUS_SUCCESS;
|
||||
if (status == HSA_STATUS_SUCCESS) {
|
||||
status = DelObj(queue);
|
||||
}
|
||||
|
||||
return status;
|
||||
@@ -179,17 +194,9 @@ class InterceptQueue {
|
||||
static bool IsTrackerOn() { return tracker_on_; }
|
||||
|
||||
private:
|
||||
InterceptQueue(const hsa_agent_t& agent, hsa_queue_t* const queue, ProxyQueue* proxy) :
|
||||
queue_(queue),
|
||||
proxy_(proxy)
|
||||
{
|
||||
agent_info_ = util::HsaRsrcFactory::Instance().GetAgentInfo(agent);
|
||||
}
|
||||
~InterceptQueue() { ProxyQueue::Destroy(proxy_); }
|
||||
|
||||
static packet_word_t GetHeaderType(const packet_t* packet) {
|
||||
static hsa_packet_type_t GetHeaderType(const packet_t* packet) {
|
||||
const packet_word_t* header = reinterpret_cast<const packet_word_t*>(packet);
|
||||
return (*header >> HSA_PACKET_HEADER_TYPE) & header_type_mask;
|
||||
return static_cast<hsa_packet_type_t>((*header >> HSA_PACKET_HEADER_TYPE) & header_type_mask);
|
||||
}
|
||||
|
||||
static const char* GetKernelName(const hsa_kernel_dispatch_packet_t* dispatch_packet) {
|
||||
@@ -219,6 +226,41 @@ class InterceptQueue {
|
||||
return funcname;
|
||||
}
|
||||
|
||||
// method to get an intercept queue object
|
||||
static InterceptQueue* GetObj(const hsa_queue_t* queue) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
InterceptQueue* obj = NULL;
|
||||
obj_map_t::const_iterator it = obj_map_->find((uint64_t)queue);
|
||||
if (it != obj_map_->end()) {
|
||||
obj = it->second;
|
||||
assert(queue == obj->queue_);
|
||||
}
|
||||
return obj;
|
||||
}
|
||||
|
||||
// method to delete an intercept queue object
|
||||
static hsa_status_t DelObj(const hsa_queue_t* queue) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
obj_map_t::const_iterator it = obj_map_->find((uint64_t)queue);
|
||||
if (it != obj_map_->end()) {
|
||||
const InterceptQueue* obj = it->second;
|
||||
assert(queue == obj->queue_);
|
||||
delete obj;
|
||||
obj_map_->erase(it);
|
||||
status = HSA_STATUS_SUCCESS;;
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
InterceptQueue(const hsa_agent_t& agent, hsa_queue_t* const queue, ProxyQueue* proxy) :
|
||||
queue_(queue),
|
||||
proxy_(proxy)
|
||||
{
|
||||
agent_info_ = util::HsaRsrcFactory::Instance().GetAgentInfo(agent);
|
||||
}
|
||||
~InterceptQueue() { ProxyQueue::Destroy(proxy_); }
|
||||
|
||||
static mutex_t mutex_;
|
||||
static const packet_word_t header_type_mask = (1ul << HSA_PACKET_HEADER_WIDTH_TYPE) - 1;
|
||||
static rocprofiler_callback_t dispatch_callback_;
|
||||
@@ -229,6 +271,7 @@ class InterceptQueue {
|
||||
static uint64_t timeout_;
|
||||
static Tracker* tracker_;
|
||||
static bool tracker_on_;
|
||||
static bool in_constr_call_;
|
||||
|
||||
hsa_queue_t* const queue_;
|
||||
ProxyQueue* const proxy_;
|
||||
|
||||
@@ -232,11 +232,35 @@ PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t fa
|
||||
rocprofiler::SaveHsaApi(table);
|
||||
rocprofiler::ProxyQueue::InitFactory();
|
||||
bool intercept_mode = false;
|
||||
|
||||
// Checking environment to enable intercept mode
|
||||
const char* intercept_env = getenv("ROCP_HSA_INTERCEPT");
|
||||
if (intercept_env != NULL) {
|
||||
if (strncmp(intercept_env, "1", 1) == 0) intercept_mode = true;
|
||||
switch (atoi(intercept_env)) {
|
||||
// Intercepting disabled
|
||||
case 0:
|
||||
intercept_mode = false;
|
||||
rocprofiler::InterceptQueue::TrackerOn(false);
|
||||
break;
|
||||
// Intercepting enabled without timestamping
|
||||
case 1:
|
||||
intercept_mode = true;
|
||||
rocprofiler::InterceptQueue::TrackerOn(false);
|
||||
break;
|
||||
// Intercepting enabled with timestamping
|
||||
case 2:
|
||||
intercept_mode = true;
|
||||
rocprofiler::InterceptQueue::TrackerOn(true);
|
||||
break;
|
||||
default:
|
||||
ERR_LOGGING("Bad ROCP_HSA_INTERCEPT env var value (" << intercept_env << ")");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (rocprofiler::LoadTool()) intercept_mode = true;
|
||||
|
||||
// Loading a tool lib and setting of intercept mode
|
||||
intercept_mode = rocprofiler::LoadTool();
|
||||
|
||||
// HSA intercepting
|
||||
if (intercept_mode) {
|
||||
rocprofiler::ProxyQueue::HsaIntercept(table);
|
||||
@@ -558,4 +582,14 @@ PUBLIC_API hsa_status_t rocprofiler_query_info(
|
||||
API_METHOD_SUFFIX
|
||||
}
|
||||
|
||||
// Creates a profiled queue. All dispatches on this queue will be profiled
|
||||
PUBLIC_API hsa_status_t rocprofiler_queue_create_profiled(
|
||||
hsa_agent_t agent, uint32_t size, hsa_queue_type32_t type,
|
||||
void (*callback)(hsa_status_t status, hsa_queue_t* source, void* data),
|
||||
void* data, uint32_t private_segment_size, uint32_t group_segment_size,
|
||||
hsa_queue_t** queue)
|
||||
{
|
||||
return rocprofiler::InterceptQueue::QueueCreateTracked(agent, size, type, callback, data, private_segment_size, group_segment_size, queue);
|
||||
}
|
||||
|
||||
} // extern "C"
|
||||
|
||||
@@ -99,9 +99,13 @@ class Tracker {
|
||||
record_t* record = new record_t{};
|
||||
assert(record);
|
||||
entry->record = record;
|
||||
status = hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP, &record->dispatch);
|
||||
|
||||
timestamp_t dispatch_timestamp = 0;
|
||||
status = hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP, &dispatch_timestamp);
|
||||
if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP)");
|
||||
|
||||
record->dispatch = timestamp2ns(dispatch_timestamp);
|
||||
|
||||
status = hsa_amd_signal_async_handler(entry->signal, HSA_SIGNAL_CONDITION_LT, 1, Handler, entry);
|
||||
if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "hsa_amd_signal_async_handler");
|
||||
|
||||
|
||||
@@ -38,6 +38,8 @@ export ROCP_TOOL_LIB=libtool.so
|
||||
unset ROCP_PROXY_QUEUE
|
||||
# ROC profiler metrics config file
|
||||
export ROCP_METRICS=metrics.xml
|
||||
# ROC profiler kernels timing
|
||||
export ROCP_TIMESTAMP_ON=1
|
||||
# output directory for the tool library, for metrics results file 'results.txt'
|
||||
# and SQTT trace files 'thread_trace.se<n>.out'
|
||||
export ROCP_OUTPUT_DIR=./RESULTS
|
||||
|
||||
@@ -600,7 +600,7 @@ hsa_status_t dispatch_callback(const rocprofiler_callback_data_t* callback_data,
|
||||
feature_count = next_offset - set_offset;
|
||||
}
|
||||
|
||||
if (tool_data->feature_count > 0) {
|
||||
if (feature_count > 0) {
|
||||
// Open profiling context
|
||||
status = rocprofiler_open(callback_data->agent, features, feature_count,
|
||||
&context, 0 /*ROCPROFILER_MODE_SINGLEGROUP*/, &properties);
|
||||
|
||||
Referencia en una nueva incidencia
Block a user