@@ -90,11 +90,6 @@ roctracer_status_t roctracer_op_code(
|
||||
uint32_t* op, // [out] op code
|
||||
uint32_t* kind = NULL); // [out] op kind code
|
||||
|
||||
// Set properties
|
||||
roctracer_status_t roctracer_set_properties(
|
||||
roctracer_domain_t domain, // tracing domain
|
||||
void* propertes); // tracing properties
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Callback API
|
||||
//
|
||||
@@ -214,6 +209,21 @@ roctracer_status_t roctracer_disable_activity();
|
||||
roctracer_status_t roctracer_flush_activity(
|
||||
roctracer_pool_t* pool = NULL); // memory pool, NULL is a default one
|
||||
|
||||
// Load/Un;oad methods
|
||||
// Set properties
|
||||
roctracer_status_t roctracer_set_properties(
|
||||
roctracer_domain_t domain, // tracing domain
|
||||
void* propertes); // tracing properties
|
||||
|
||||
struct HsaApiTable;
|
||||
bool roctracer_load(
|
||||
HsaApiTable* table,
|
||||
uint64_t runtime_version,
|
||||
uint64_t failed_tool_count,
|
||||
const char* const* failed_tool_names);
|
||||
|
||||
void roctracer_unload();
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C" block
|
||||
#endif // __cplusplus
|
||||
|
||||
@@ -74,8 +74,10 @@ extern AmdExtTable AmdExtTable_saved;
|
||||
extern ImageExtTable ImageExtTable_saved;
|
||||
|
||||
struct ops_properties_t {
|
||||
void* table;
|
||||
activity_async_callback_t async_copy_callback_fun;
|
||||
void* async_copy_callback_arg;
|
||||
const char* output_prefix;
|
||||
};
|
||||
|
||||
}; // namespace hsa_support
|
||||
|
||||
@@ -4,6 +4,9 @@
|
||||
set ( TARGET_LIB "${TARGET_NAME}" )
|
||||
set ( LIB_SRC
|
||||
${LIB_DIR}/core/roctracer.cpp
|
||||
${LIB_DIR}/proxy/proxy_queue.cpp
|
||||
${LIB_DIR}/proxy/simple_proxy_queue.cpp
|
||||
${LIB_DIR}/proxy/intercept_queue.cpp
|
||||
${LIB_DIR}/util/hsa_rsrc_factory.cpp
|
||||
)
|
||||
add_library ( ${TARGET_LIB} SHARED ${LIB_SRC} )
|
||||
|
||||
@@ -35,12 +35,18 @@ THE SOFTWARE.
|
||||
#include <sys/syscall.h>
|
||||
|
||||
#include "core/loader.h"
|
||||
#include "core/trace_buffer.h"
|
||||
#include "proxy/tracker.h"
|
||||
#include "ext/hsa_rt_utils.hpp"
|
||||
#include "util/exception.h"
|
||||
#include "util/hsa_rsrc_factory.h"
|
||||
#include "util/logger.h"
|
||||
|
||||
#include "proxy/hsa_queue.h"
|
||||
#include "proxy/intercept_queue.h"
|
||||
#include "proxy/proxy_queue.h"
|
||||
#include "proxy/simple_proxy_queue.h"
|
||||
|
||||
#define PUBLIC_API __attribute__((visibility("default")))
|
||||
#define CONSTRUCTOR_API __attribute__((constructor))
|
||||
#define DESTRUCTOR_API __attribute__((destructor))
|
||||
@@ -85,17 +91,86 @@ THE SOFTWARE.
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Internal library methods
|
||||
//
|
||||
namespace rocprofiler {
|
||||
decltype(hsa_queue_create)* hsa_queue_create_fn;
|
||||
decltype(hsa_queue_destroy)* hsa_queue_destroy_fn;
|
||||
|
||||
decltype(hsa_signal_store_relaxed)* hsa_signal_store_relaxed_fn;
|
||||
decltype(hsa_signal_store_relaxed)* hsa_signal_store_screlease_fn;
|
||||
|
||||
decltype(hsa_queue_load_write_index_relaxed)* hsa_queue_load_write_index_relaxed_fn;
|
||||
decltype(hsa_queue_store_write_index_relaxed)* hsa_queue_store_write_index_relaxed_fn;
|
||||
decltype(hsa_queue_load_read_index_relaxed)* hsa_queue_load_read_index_relaxed_fn;
|
||||
|
||||
decltype(hsa_queue_load_write_index_scacquire)* hsa_queue_load_write_index_scacquire_fn;
|
||||
decltype(hsa_queue_store_write_index_screlease)* hsa_queue_store_write_index_screlease_fn;
|
||||
decltype(hsa_queue_load_read_index_scacquire)* hsa_queue_load_read_index_scacquire_fn;
|
||||
|
||||
decltype(hsa_amd_queue_intercept_create)* hsa_amd_queue_intercept_create_fn;
|
||||
decltype(hsa_amd_queue_intercept_register)* hsa_amd_queue_intercept_register_fn;
|
||||
|
||||
decltype(hsa_amd_memory_async_copy)* hsa_amd_memory_async_copy_fn;
|
||||
decltype(hsa_amd_memory_async_copy_rect)* hsa_amd_memory_async_copy_rect_fn;
|
||||
|
||||
::HsaApiTable* kHsaApiTable;
|
||||
|
||||
void SaveHsaApi(::HsaApiTable* table) {
|
||||
util::HsaRsrcFactory::InitHsaApiTable(table);
|
||||
|
||||
kHsaApiTable = table;
|
||||
hsa_queue_create_fn = table->core_->hsa_queue_create_fn;
|
||||
hsa_queue_destroy_fn = table->core_->hsa_queue_destroy_fn;
|
||||
|
||||
hsa_signal_store_relaxed_fn = table->core_->hsa_signal_store_relaxed_fn;
|
||||
hsa_signal_store_screlease_fn = table->core_->hsa_signal_store_screlease_fn;
|
||||
|
||||
hsa_queue_load_write_index_relaxed_fn = table->core_->hsa_queue_load_write_index_relaxed_fn;
|
||||
hsa_queue_store_write_index_relaxed_fn = table->core_->hsa_queue_store_write_index_relaxed_fn;
|
||||
hsa_queue_load_read_index_relaxed_fn = table->core_->hsa_queue_load_read_index_relaxed_fn;
|
||||
|
||||
hsa_queue_load_write_index_scacquire_fn = table->core_->hsa_queue_load_write_index_scacquire_fn;
|
||||
hsa_queue_store_write_index_screlease_fn = table->core_->hsa_queue_store_write_index_screlease_fn;
|
||||
hsa_queue_load_read_index_scacquire_fn = table->core_->hsa_queue_load_read_index_scacquire_fn;
|
||||
|
||||
hsa_amd_queue_intercept_create_fn = table->amd_ext_->hsa_amd_queue_intercept_create_fn;
|
||||
hsa_amd_queue_intercept_register_fn = table->amd_ext_->hsa_amd_queue_intercept_register_fn;
|
||||
}
|
||||
|
||||
void RestoreHsaApi() {
|
||||
::HsaApiTable* table = kHsaApiTable;
|
||||
table->core_->hsa_queue_create_fn = hsa_queue_create_fn;
|
||||
table->core_->hsa_queue_destroy_fn = hsa_queue_destroy_fn;
|
||||
|
||||
table->core_->hsa_signal_store_relaxed_fn = hsa_signal_store_relaxed_fn;
|
||||
table->core_->hsa_signal_store_screlease_fn = hsa_signal_store_screlease_fn;
|
||||
|
||||
table->core_->hsa_queue_load_write_index_relaxed_fn = hsa_queue_load_write_index_relaxed_fn;
|
||||
table->core_->hsa_queue_store_write_index_relaxed_fn = hsa_queue_store_write_index_relaxed_fn;
|
||||
table->core_->hsa_queue_load_read_index_relaxed_fn = hsa_queue_load_read_index_relaxed_fn;
|
||||
|
||||
table->core_->hsa_queue_load_write_index_scacquire_fn = hsa_queue_load_write_index_scacquire_fn;
|
||||
table->core_->hsa_queue_store_write_index_screlease_fn = hsa_queue_store_write_index_screlease_fn;
|
||||
table->core_->hsa_queue_load_read_index_scacquire_fn = hsa_queue_load_read_index_scacquire_fn;
|
||||
|
||||
table->amd_ext_->hsa_amd_queue_intercept_create_fn = hsa_amd_queue_intercept_create_fn;
|
||||
table->amd_ext_->hsa_amd_queue_intercept_register_fn = hsa_amd_queue_intercept_register_fn;
|
||||
}
|
||||
}
|
||||
|
||||
namespace roctracer {
|
||||
decltype(hsa_amd_memory_async_copy)* hsa_amd_memory_async_copy_fn;
|
||||
decltype(hsa_amd_memory_async_copy_rect)* hsa_amd_memory_async_copy_rect_fn;
|
||||
|
||||
TraceBuffer<trace_entry_t> trace_buffer(0x200000);
|
||||
|
||||
namespace hsa_support {
|
||||
// callbacks table
|
||||
cb_table_t cb_table;
|
||||
// asyc copy activity callback
|
||||
bool async_copy_callback_enabled = false;
|
||||
activity_async_callback_t async_copy_callback_fun = NULL;
|
||||
void* async_copy_callback_arg = NULL;
|
||||
bool async_copy_callback_enabled = false;
|
||||
const char* output_prefix = NULL;
|
||||
// Table of function pointers to HSA Core Runtime
|
||||
CoreApiTable CoreApiTable_saved{};
|
||||
// Table of function pointers to AMD extensions
|
||||
@@ -288,15 +363,6 @@ class MemoryPool {
|
||||
pthread_cond_t read_cond_;
|
||||
};
|
||||
|
||||
CONSTRUCTOR_API void constructor() {
|
||||
util::Logger::Create();
|
||||
}
|
||||
|
||||
DESTRUCTOR_API void destructor() {
|
||||
::util::HsaRsrcFactory::Destroy();
|
||||
util::Logger::Destroy();
|
||||
}
|
||||
|
||||
// Correlation id storage
|
||||
static thread_local activity_correlation_id_t correlation_id_tls = 0;
|
||||
typedef std::map<activity_correlation_id_t, activity_correlation_id_t> correlation_id_map_t;
|
||||
@@ -367,18 +433,73 @@ void HCC_AsyncActivityCallback(uint32_t op_id, void* record, void* arg) {
|
||||
pool->Write(*record_ptr);
|
||||
}
|
||||
|
||||
bool hsa_async_copy_handler(hsa_signal_value_t value, void* arg) {
|
||||
::proxy::Tracker::entry_t* entry = reinterpret_cast<::proxy::Tracker::entry_t*>(arg);
|
||||
// Open output file
|
||||
FILE* open_output_file(const char* prefix, const char* name) {
|
||||
FILE* file_handle = NULL;
|
||||
if (prefix != NULL) {
|
||||
std::ostringstream oss;
|
||||
oss << prefix << "/" << name;
|
||||
file_handle = fopen(oss.str().c_str(), "w");
|
||||
if (file_handle == NULL) {
|
||||
std::ostringstream errmsg;
|
||||
errmsg << "ROCTracer: fopen error, file '" << oss.str().c_str() << "'";
|
||||
perror(errmsg.str().c_str());
|
||||
abort();
|
||||
}
|
||||
} else file_handle = stdout;
|
||||
return file_handle;
|
||||
}
|
||||
|
||||
FILE* kernel_file_handle = NULL;
|
||||
void hsa_kernel_handler(::proxy::Tracker::entry_t* entry) {
|
||||
static uint64_t index = 0;
|
||||
if (index == 0) {
|
||||
kernel_file_handle = open_output_file(hsa_support::output_prefix, "results.txt");
|
||||
}
|
||||
fprintf(kernel_file_handle, "dispatch[%lu], gpu-id(%u), tid(%u), kernel-name(\"%s\"), time(%lu,%lu,%lu,%lu)\n",
|
||||
index,
|
||||
::util::HsaRsrcFactory::Instance().GetAgentInfo(entry->agent)->dev_index,
|
||||
entry->kernel.tid,
|
||||
entry->kernel.name,
|
||||
entry->dispatch,
|
||||
entry->begin,
|
||||
entry->end,
|
||||
entry->complete);
|
||||
#if 0
|
||||
fprintf(file_handle, "dispatch[%u], gpu-id(%u), queue-id(%u), queue-index(%lu), tid(%lu), grd(%u), wgr(%u), lds(%u), scr(%u), vgpr(%u), sgpr(%u), fbar(%u), sig(0x%lx), kernel-name(\"%s\")",
|
||||
index,
|
||||
HsaRsrcFactory::Instance().GetAgentInfo(entry->agent)->dev_index,
|
||||
entry->data.queue_id,
|
||||
entry->data.queue_index,
|
||||
entry->data.thread_id,
|
||||
entry->kernel_properties.grid_size,
|
||||
entry->kernel_properties.workgroup_size,
|
||||
entry->kernel_properties.lds_size,
|
||||
entry->kernel_properties.scratch_size,
|
||||
entry->kernel_properties.vgpr_count,
|
||||
entry->kernel_properties.sgpr_count,
|
||||
entry->kernel_properties.fbarrier_count,
|
||||
entry->kernel_properties.signal.handle,
|
||||
nik_name.c_str());
|
||||
if (record) fprintf(file_handle, ", time(%lu,%lu,%lu,%lu)",
|
||||
record->dispatch,
|
||||
record->begin,
|
||||
record->end,
|
||||
record->complete);
|
||||
fprintf(file_handle, "\n");
|
||||
fflush(file_handle);
|
||||
#endif
|
||||
index++;
|
||||
}
|
||||
|
||||
void hsa_async_copy_handler(::proxy::Tracker::entry_t* entry) {
|
||||
activity_record_t record{};
|
||||
record.domain = ACTIVITY_DOMAIN_HSA_OPS; // activity domain id
|
||||
record.correlation_id = entry->index; // activity ID
|
||||
record.begin_ns = entry->record->begin; // host begin timestamp
|
||||
record.end_ns = entry->record->end; // host end timestamp
|
||||
record.begin_ns = entry->begin; // host begin timestamp
|
||||
record.end_ns = entry->end; // host end timestamp
|
||||
record.device_id = 0; // device id
|
||||
|
||||
hsa_support::async_copy_callback_fun(hsa_support::HSA_OP_ID_async_copy, &record, hsa_support::async_copy_callback_arg);
|
||||
return false;
|
||||
}
|
||||
|
||||
hsa_status_t hsa_amd_memory_async_copy_interceptor(
|
||||
@@ -388,17 +509,15 @@ hsa_status_t hsa_amd_memory_async_copy_interceptor(
|
||||
{
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
if (hsa_support::async_copy_callback_enabled) {
|
||||
::proxy::Tracker* tracker = &::proxy::Tracker::Instance();
|
||||
::proxy::Tracker::entry_t* tracker_entry = tracker->Alloc(hsa_agent_t{}, completion_signal);
|
||||
trace_entry_t* entry = trace_buffer.GetEntry();
|
||||
::proxy::Tracker::Enable(COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry);
|
||||
status = hsa_amd_memory_async_copy_fn(dst, dst_agent, src,
|
||||
src_agent, size, num_dep_signals,
|
||||
dep_signals, tracker_entry->signal);
|
||||
if (status == HSA_STATUS_SUCCESS) {
|
||||
tracker->EnableMemcopy(tracker_entry, hsa_async_copy_handler, reinterpret_cast<void*>(tracker_entry));
|
||||
} else {
|
||||
tracker->Delete(tracker_entry);
|
||||
}
|
||||
} else {
|
||||
dep_signals, entry->signal);
|
||||
if (status != HSA_STATUS_SUCCESS) EXC_ABORT(status, "hsa_amd_memory_async_copy interceptor");
|
||||
}
|
||||
else
|
||||
{
|
||||
status = hsa_amd_memory_async_copy_fn(dst, dst_agent, src,
|
||||
src_agent, size, num_dep_signals,
|
||||
dep_signals, completion_signal);
|
||||
@@ -414,18 +533,16 @@ hsa_status_t hsa_amd_memory_async_copy_rect_interceptor(
|
||||
{
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
if (hsa_support::async_copy_callback_enabled) {
|
||||
::proxy::Tracker* tracker = &::proxy::Tracker::Instance();
|
||||
::proxy::Tracker::entry_t* tracker_entry = tracker->Alloc(hsa_agent_t{}, completion_signal);
|
||||
trace_entry_t* entry = trace_buffer.GetEntry();
|
||||
::proxy::Tracker::Enable(COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry);
|
||||
status = hsa_amd_memory_async_copy_rect_fn(dst, dst_offset, src,
|
||||
src_offset, range, copy_agent,
|
||||
dir, num_dep_signals, dep_signals,
|
||||
tracker_entry->signal);
|
||||
if (status == HSA_STATUS_SUCCESS) {
|
||||
tracker->EnableMemcopy(tracker_entry, hsa_async_copy_handler, reinterpret_cast<void*>(tracker_entry));
|
||||
} else {
|
||||
tracker->Delete(tracker_entry);
|
||||
}
|
||||
} else {
|
||||
entry->signal);
|
||||
if (status != HSA_STATUS_SUCCESS) EXC_ABORT(status, "hsa_amd_memory_async_copy interceptor");
|
||||
}
|
||||
else
|
||||
{
|
||||
status = hsa_amd_memory_async_copy_rect_fn(dst, dst_offset, src,
|
||||
src_offset, range, copy_agent,
|
||||
dir, num_dep_signals, dep_signals,
|
||||
@@ -443,10 +560,6 @@ memory_pool_mutex_t memory_pool_mutex;
|
||||
|
||||
LOADER_INSTANTIATE();
|
||||
|
||||
std::atomic<proxy::Tracker*> proxy::Tracker::instance_{};
|
||||
proxy::Tracker::mutex_t proxy::Tracker::glob_mutex_;
|
||||
proxy::Tracker::counter_t proxy::Tracker::counter_ = 0;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Public library methods
|
||||
//
|
||||
@@ -685,6 +798,7 @@ static void roctracer_enable_activity_impl(
|
||||
switch (domain) {
|
||||
case ACTIVITY_DOMAIN_HSA_OPS: {
|
||||
roctracer::hsa_support::async_copy_callback_enabled = true;
|
||||
rocprofiler::InterceptQueue::Enable(true);
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HSA_API: break;
|
||||
@@ -747,6 +861,7 @@ static void roctracer_disable_activity_impl(
|
||||
switch (domain) {
|
||||
case ACTIVITY_DOMAIN_HSA_OPS: {
|
||||
roctracer::hsa_support::async_copy_callback_enabled = false;
|
||||
rocprofiler::InterceptQueue::Enable(false);
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HSA_API: break;
|
||||
@@ -812,8 +927,25 @@ PUBLIC_API roctracer_status_t roctracer_set_properties(
|
||||
case ACTIVITY_DOMAIN_HSA_OPS: {
|
||||
// HSA OPS properties
|
||||
roctracer::hsa_ops_properties_t* ops_properties = reinterpret_cast<roctracer::hsa_ops_properties_t*>(properties);
|
||||
HsaApiTable* table = reinterpret_cast<HsaApiTable*>(ops_properties->table);
|
||||
roctracer::hsa_support::async_copy_callback_fun = ops_properties->async_copy_callback_fun;
|
||||
roctracer::hsa_support::async_copy_callback_arg = ops_properties->async_copy_callback_arg;
|
||||
roctracer::hsa_support::output_prefix = ops_properties->output_prefix;
|
||||
|
||||
// HSA dispatches intercepting
|
||||
rocprofiler::SaveHsaApi(table);
|
||||
rocprofiler::ProxyQueue::InitFactory();
|
||||
rocprofiler::ProxyQueue::HsaIntercept(table);
|
||||
rocprofiler::InterceptQueue::HsaIntercept(table);
|
||||
|
||||
// HSA async-copy tracing
|
||||
hsa_status_t status = hsa_amd_profiling_async_copy_enable(true);
|
||||
if (status != HSA_STATUS_SUCCESS) EXC_ABORT(status, "hsa_amd_profiling_async_copy_enable");
|
||||
roctracer::hsa_amd_memory_async_copy_fn = table->amd_ext_->hsa_amd_memory_async_copy_fn;
|
||||
roctracer::hsa_amd_memory_async_copy_rect_fn = table->amd_ext_->hsa_amd_memory_async_copy_rect_fn;
|
||||
table->amd_ext_->hsa_amd_memory_async_copy_fn = roctracer::hsa_amd_memory_async_copy_interceptor;
|
||||
table->amd_ext_->hsa_amd_memory_async_copy_rect_fn = roctracer::hsa_amd_memory_async_copy_rect_interceptor;
|
||||
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HSA_API: {
|
||||
@@ -834,17 +966,39 @@ PUBLIC_API roctracer_status_t roctracer_set_properties(
|
||||
}
|
||||
|
||||
// HSA-runtime tool on-load method
|
||||
PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count,
|
||||
PUBLIC_API bool roctracer_load(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count,
|
||||
const char* const* failed_tool_names) {
|
||||
// enabled HSA async-copy tracing
|
||||
hsa_status_t status = hsa_amd_profiling_async_copy_enable(true);
|
||||
if (status != HSA_STATUS_SUCCESS) EXC_ABORT(status, "hsa_amd_profiling_async_copy_enable");
|
||||
roctracer::hsa_amd_memory_async_copy_fn = table->amd_ext_->hsa_amd_memory_async_copy_fn;
|
||||
roctracer::hsa_amd_memory_async_copy_rect_fn = table->amd_ext_->hsa_amd_memory_async_copy_rect_fn;
|
||||
table->amd_ext_->hsa_amd_memory_async_copy_fn = roctracer::hsa_amd_memory_async_copy_interceptor;
|
||||
table->amd_ext_->hsa_amd_memory_async_copy_rect_fn = roctracer::hsa_amd_memory_async_copy_rect_interceptor;
|
||||
static bool is_loaded = false;
|
||||
if (is_loaded) return true;
|
||||
is_loaded = true;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
PUBLIC_API void roctracer_unload() {
|
||||
static bool is_unloaded = false;
|
||||
if (is_unloaded) return;
|
||||
is_unloaded = true;
|
||||
|
||||
roctracer::trace_buffer.Flush(roctracer::COPY_ENTRY_TYPE, roctracer::hsa_async_copy_handler);
|
||||
roctracer::trace_buffer.Flush(roctracer::KERNEL_ENTRY_TYPE, roctracer::hsa_kernel_handler);
|
||||
if ((roctracer::hsa_support::output_prefix != NULL) && (roctracer::kernel_file_handle != NULL)) fclose(roctracer::kernel_file_handle);
|
||||
}
|
||||
|
||||
PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version, uint64_t failed_tool_count,
|
||||
const char* const* failed_tool_names) {
|
||||
return roctracer_load(table, runtime_version, failed_tool_count, failed_tool_names);
|
||||
}
|
||||
PUBLIC_API void OnUnload() { roctracer_unload(); }
|
||||
|
||||
CONSTRUCTOR_API void constructor() {
|
||||
roctracer::util::Logger::Create();
|
||||
}
|
||||
|
||||
DESTRUCTOR_API void destructor() {
|
||||
roctracer_unload();
|
||||
util::HsaRsrcFactory::Destroy();
|
||||
roctracer::util::Logger::Destroy();
|
||||
}
|
||||
|
||||
} // extern "C"
|
||||
|
||||
@@ -0,0 +1,79 @@
|
||||
#ifndef SRC_CORE_TRACE_BUFFER_H_
|
||||
#define SRC_CORE_TRACE_BUFFER_H_
|
||||
|
||||
namespace roctracer {
|
||||
enum {
|
||||
TRACE_ENTRY_INV = 0,
|
||||
TRACE_ENTRY_INIT = 1,
|
||||
TRACE_ENTRY_COMPL = 2
|
||||
};
|
||||
enum {
|
||||
API_ENTRY_TYPE,
|
||||
COPY_ENTRY_TYPE,
|
||||
KERNEL_ENTRY_TYPE
|
||||
};
|
||||
|
||||
struct trace_entry_t {
|
||||
std::atomic<uint32_t> valid;
|
||||
uint32_t type;
|
||||
uint64_t dispatch;
|
||||
uint64_t begin; // kernel begin timestamp, ns
|
||||
uint64_t end; // kernel end timestamp, ns
|
||||
uint64_t complete;
|
||||
hsa_agent_t agent;
|
||||
hsa_signal_t orig;
|
||||
hsa_signal_t signal;
|
||||
union {
|
||||
struct {
|
||||
} copy;
|
||||
struct {
|
||||
const char* name;
|
||||
hsa_agent_t agent;
|
||||
uint32_t tid;
|
||||
} kernel;
|
||||
};
|
||||
};
|
||||
|
||||
template <typename Entry>
|
||||
class TraceBuffer {
|
||||
public:
|
||||
typedef void (*callabck_t)(Entry*);
|
||||
|
||||
TraceBuffer(uint32_t size) {
|
||||
size_ = size;
|
||||
data_ = new Entry[size_];;
|
||||
memset(data_, 0, size_ * sizeof(Entry));
|
||||
read_pointer_ = data_;
|
||||
}
|
||||
|
||||
Entry* GetEntry() {
|
||||
Entry* ptr = read_pointer_.fetch_add(1);
|
||||
if (ptr >= (data_ + size_)) {
|
||||
fprintf(stderr, "GetEntry: trace buffer is out of range\n");
|
||||
abort();
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void Flush(uint32_t type, callabck_t fun) {
|
||||
Entry* ptr = data_;
|
||||
for (; (ptr < read_pointer_) && (ptr < (data_ + size_)); ptr++) {
|
||||
if (ptr->type == type) {
|
||||
if (ptr->valid == TRACE_ENTRY_COMPL) {
|
||||
fun(ptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (ptr >= (data_ + size_)) {
|
||||
fprintf(stderr, "Flush: trace buffer is out of range\n");
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
Entry* data_;
|
||||
uint32_t size_;
|
||||
std::atomic<Entry*> read_pointer_;
|
||||
};
|
||||
} // namespace roctracer
|
||||
|
||||
#endif // SRC_CORE_TRACE_BUFFER_H_
|
||||
@@ -0,0 +1,67 @@
|
||||
/******************************************************************************
|
||||
Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef _SRC_CORE_HSA_PROXY_QUEUE_H
|
||||
#define _SRC_CORE_HSA_PROXY_QUEUE_H
|
||||
|
||||
#include <hsa.h>
|
||||
#include <atomic>
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
|
||||
#include "proxy/proxy_queue.h"
|
||||
#include "util/exception.h"
|
||||
|
||||
namespace rocprofiler {
|
||||
extern decltype(hsa_queue_destroy)* hsa_queue_destroy_fn;
|
||||
extern decltype(hsa_amd_queue_intercept_create)* hsa_amd_queue_intercept_create_fn;
|
||||
extern decltype(hsa_amd_queue_intercept_register)* hsa_amd_queue_intercept_register_fn;
|
||||
|
||||
class HsaProxyQueue : public ProxyQueue {
|
||||
public:
|
||||
hsa_status_t SetInterceptCB(on_submit_cb_t on_submit_cb, void* data) {
|
||||
return hsa_amd_queue_intercept_register_fn(queue_, on_submit_cb, data);
|
||||
}
|
||||
|
||||
void Submit(const packet_t* packet) {
|
||||
EXC_RAISING(HSA_STATUS_ERROR, "HsaProxyQueue::Submit() is not supported");
|
||||
}
|
||||
|
||||
private:
|
||||
hsa_status_t Init(hsa_agent_t agent, uint32_t size, hsa_queue_type32_t type,
|
||||
void (*callback)(hsa_status_t status, hsa_queue_t* source, void* data),
|
||||
void* data, uint32_t private_segment_size, uint32_t group_segment_size,
|
||||
hsa_queue_t** queue) {
|
||||
const auto status = hsa_amd_queue_intercept_create_fn(
|
||||
agent, size, type, callback, data, private_segment_size, group_segment_size, &queue_);
|
||||
*queue = queue_;
|
||||
return status;
|
||||
}
|
||||
|
||||
hsa_status_t Cleanup() const { return hsa_queue_destroy_fn(queue_); }
|
||||
|
||||
hsa_queue_t* queue_;
|
||||
};
|
||||
|
||||
} // namespace rocprofiler
|
||||
|
||||
#endif // _SRC_CORE_HSA_PROXY_QUEUE_H
|
||||
@@ -0,0 +1,46 @@
|
||||
/******************************************************************************
|
||||
Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef _SRC_CORE_HSA_QUEUE_H
|
||||
#define _SRC_CORE_HSA_QUEUE_H
|
||||
|
||||
#include <atomic>
|
||||
|
||||
#include "proxy/queue.h"
|
||||
|
||||
namespace rocprofiler {
|
||||
|
||||
class HsaQueue : public Queue {
|
||||
public:
|
||||
HsaQueue(const util::AgentInfo* agent_info, hsa_queue_t* queue) : queue_(queue) {}
|
||||
|
||||
void Submit(const packet_t* packet) {
|
||||
util::HsaRsrcFactory::Instance().Submit(queue_, packet);
|
||||
}
|
||||
|
||||
private:
|
||||
hsa_queue_t* queue_;
|
||||
};
|
||||
|
||||
} // namespace rocprofiler
|
||||
|
||||
#endif // _SRC_CORE_HSA_QUEUE_H
|
||||
@@ -0,0 +1,42 @@
|
||||
/******************************************************************************
|
||||
Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*******************************************************************************/
|
||||
|
||||
#include "proxy/intercept_queue.h"
|
||||
|
||||
namespace rocprofiler {
|
||||
void InterceptQueue::HsaIntercept(HsaApiTable* table) {
|
||||
table->core_->hsa_queue_create_fn = rocprofiler::InterceptQueue::QueueCreate;
|
||||
table->core_->hsa_queue_destroy_fn = rocprofiler::InterceptQueue::QueueDestroy;
|
||||
}
|
||||
|
||||
InterceptQueue::mutex_t InterceptQueue::mutex_;
|
||||
//rocprofiler_callback_t InterceptQueue::dispatch_callback_ = NULL;
|
||||
//InterceptQueue::queue_callback_t InterceptQueue::create_callback_ = NULL;
|
||||
//InterceptQueue::queue_callback_t InterceptQueue::destroy_callback_ = NULL;
|
||||
//void* InterceptQueue::callback_data_ = NULL;
|
||||
InterceptQueue::obj_map_t* InterceptQueue::obj_map_ = NULL;
|
||||
const char* InterceptQueue::kernel_none_ = "";
|
||||
bool InterceptQueue::in_create_call_ = false;
|
||||
InterceptQueue::queue_id_t InterceptQueue::current_queue_id = 0;
|
||||
bool InterceptQueue::is_enabled = false;
|
||||
|
||||
} // namespace rocprofiler
|
||||
@@ -0,0 +1,301 @@
|
||||
/******************************************************************************
|
||||
Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef _SRC_CORE_INTERCEPT_QUEUE_H
|
||||
#define _SRC_CORE_INTERCEPT_QUEUE_H
|
||||
|
||||
#include <amd_hsa_kernel_code.h>
|
||||
#include <cxxabi.h>
|
||||
#include <dlfcn.h>
|
||||
#include <sys/syscall.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
|
||||
#include "core/trace_buffer.h"
|
||||
#include "proxy/tracker.h"
|
||||
#include "proxy/proxy_queue.h"
|
||||
#include "util/hsa_rsrc_factory.h"
|
||||
#include "util/exception.h"
|
||||
|
||||
namespace roctracer { extern TraceBuffer<trace_entry_t> trace_buffer; }
|
||||
|
||||
namespace rocprofiler {
|
||||
extern decltype(hsa_queue_create)* hsa_queue_create_fn;
|
||||
extern decltype(hsa_queue_destroy)* hsa_queue_destroy_fn;
|
||||
|
||||
class InterceptQueue {
|
||||
public:
|
||||
typedef std::recursive_mutex mutex_t;
|
||||
typedef std::map<uint64_t, InterceptQueue*> obj_map_t;
|
||||
typedef hsa_status_t (*queue_callback_t)(hsa_queue_t*, void* data);
|
||||
typedef void (*queue_event_callback_t)(hsa_status_t status, hsa_queue_t *queue, void *arg);
|
||||
typedef uint32_t queue_id_t;
|
||||
|
||||
static void HsaIntercept(HsaApiTable* table);
|
||||
|
||||
static hsa_status_t InterceptQueueCreate(hsa_agent_t agent, uint32_t size, hsa_queue_type32_t type,
|
||||
void (*callback)(hsa_status_t status, hsa_queue_t* source,
|
||||
void* data),
|
||||
void* data, uint32_t private_segment_size,
|
||||
uint32_t group_segment_size, hsa_queue_t** queue,
|
||||
const bool& tracker_on) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
|
||||
if (in_create_call_) EXC_ABORT(status, "recursive InterceptQueueCreate()");
|
||||
in_create_call_ = true;
|
||||
|
||||
ProxyQueue* proxy = ProxyQueue::Create(agent, size, type, queue_event_callback, data, private_segment_size,
|
||||
group_segment_size, queue, &status);
|
||||
if (status != HSA_STATUS_SUCCESS) EXC_ABORT(status, "ProxyQueue::Create()");
|
||||
|
||||
status = util::HsaRsrcFactory::HsaApi()->hsa_amd_profiling_set_profiler_enabled(*queue, true);
|
||||
if (status != HSA_STATUS_SUCCESS) EXC_ABORT(status, "hsa_amd_profiling_set_profiler_enabled()");
|
||||
|
||||
if (!obj_map_) obj_map_ = new obj_map_t;
|
||||
InterceptQueue* obj = new InterceptQueue(agent, *queue, proxy);
|
||||
obj->queue_event_callback_ = callback;
|
||||
obj->queue_id = current_queue_id;
|
||||
(*obj_map_)[(uint64_t)(*queue)] = obj;
|
||||
|
||||
status = (is_enabled) ? proxy->SetInterceptCB(OnSubmitCB, obj) : proxy->SetInterceptCB(OnSubmitCB_dummy, obj);
|
||||
|
||||
#if 0
|
||||
if (create_callback_ != NULL) {
|
||||
status = create_callback_(*queue, callback_data_);
|
||||
}
|
||||
#endif
|
||||
|
||||
++current_queue_id;
|
||||
in_create_call_ = false;
|
||||
return status;
|
||||
}
|
||||
|
||||
static hsa_status_t QueueCreate(hsa_agent_t agent, uint32_t size, hsa_queue_type32_t type,
|
||||
void (*callback)(hsa_status_t status, hsa_queue_t* source,
|
||||
void* data),
|
||||
void* data, uint32_t private_segment_size,
|
||||
uint32_t group_segment_size, hsa_queue_t** queue) {
|
||||
return InterceptQueueCreate(agent, size, type, callback, data, private_segment_size, group_segment_size, queue, false);
|
||||
}
|
||||
|
||||
static hsa_status_t QueueCreateTracked(hsa_agent_t agent, uint32_t size, hsa_queue_type32_t type,
|
||||
void (*callback)(hsa_status_t status, hsa_queue_t* source,
|
||||
void* data),
|
||||
void* data, uint32_t private_segment_size,
|
||||
uint32_t group_segment_size, hsa_queue_t** queue) {
|
||||
return InterceptQueueCreate(agent, size, type, callback, data, private_segment_size, group_segment_size, queue, true);
|
||||
}
|
||||
|
||||
static hsa_status_t QueueDestroy(hsa_queue_t* queue) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
hsa_status_t status = HSA_STATUS_SUCCESS;
|
||||
#if 0
|
||||
if (destroy_callback_ != NULL) {
|
||||
status = destroy_callback_(queue, callback_data_);
|
||||
}
|
||||
#endif
|
||||
if (status == HSA_STATUS_SUCCESS) {
|
||||
status = DelObj(queue);
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static void OnSubmitCB_dummy(const void* in_packets, uint64_t count, uint64_t user_que_idx, void* data,
|
||||
hsa_amd_queue_intercept_packet_writer writer) {
|
||||
const packet_t* packets_arr = reinterpret_cast<const packet_t*>(in_packets);
|
||||
|
||||
// Submitting the original packets if profiling was not enabled
|
||||
if (writer != NULL) {
|
||||
writer(packets_arr, count);
|
||||
} else {
|
||||
InterceptQueue* obj = reinterpret_cast<InterceptQueue*>(data);
|
||||
Queue* proxy = obj->proxy_;
|
||||
proxy->Submit(packets_arr, count);
|
||||
}
|
||||
}
|
||||
|
||||
static void OnSubmitCB(const void* in_packets, uint64_t count, uint64_t user_que_idx, void* data,
|
||||
hsa_amd_queue_intercept_packet_writer writer) {
|
||||
const packet_t* packets_arr = reinterpret_cast<const packet_t*>(in_packets);
|
||||
InterceptQueue* obj = reinterpret_cast<InterceptQueue*>(data);
|
||||
Queue* proxy = obj->proxy_;
|
||||
|
||||
// Travers input packets
|
||||
for (uint64_t j = 0; j < count; ++j) {
|
||||
const packet_t* packet = &packets_arr[j];
|
||||
|
||||
// Checking for dispatch packet type
|
||||
if (GetHeaderType(packet) == HSA_PACKET_TYPE_KERNEL_DISPATCH) {
|
||||
const hsa_kernel_dispatch_packet_t* dispatch_packet =
|
||||
reinterpret_cast<const hsa_kernel_dispatch_packet_t*>(packet);
|
||||
|
||||
// Prepareing dispatch callback data
|
||||
const hsa_signal_t completion_signal = dispatch_packet->completion_signal;
|
||||
const amd_kernel_code_t* kernel_code = GetKernelCode(dispatch_packet);
|
||||
const uint64_t kernel_symbol = kernel_code->runtime_loader_kernel_symbol;
|
||||
const char* kernel_name = GetKernelName(kernel_symbol);
|
||||
|
||||
// Adding kernel timing tracker
|
||||
::proxy::Tracker::entry_t* entry = roctracer::trace_buffer.GetEntry();
|
||||
entry->kernel.tid = syscall(__NR_gettid);
|
||||
entry->kernel.name = kernel_name;
|
||||
::proxy::Tracker::Enable(roctracer::KERNEL_ENTRY_TYPE, obj->agent_info_->dev_id, completion_signal, entry);
|
||||
const_cast<hsa_kernel_dispatch_packet_t*>(dispatch_packet)->completion_signal = entry->signal;
|
||||
}
|
||||
}
|
||||
|
||||
// Submitting the original packets if profiling was not enabled
|
||||
if (writer != NULL) {
|
||||
writer(packets_arr, count);
|
||||
} else {
|
||||
proxy->Submit(packets_arr, count);
|
||||
}
|
||||
}
|
||||
#if 0
|
||||
static void SetCallbacks(rocprofiler_callback_t dispatch_callback,
|
||||
queue_callback_t create_callback,
|
||||
queue_callback_t destroy_callback,
|
||||
void* data)
|
||||
{
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
callback_data_ = data;
|
||||
dispatch_callback_ = dispatch_callback;
|
||||
create_callback_ = create_callback;
|
||||
destroy_callback_ = destroy_callback;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void Enable(bool val) { is_enabled = val; }
|
||||
|
||||
private:
|
||||
static void queue_event_callback(hsa_status_t status, hsa_queue_t *queue, void *arg) {
|
||||
if (status != HSA_STATUS_SUCCESS) EXC_ABORT(status, "queue error handling is not supported");
|
||||
InterceptQueue* obj = GetObj(queue);
|
||||
if (obj->queue_event_callback_) obj->queue_event_callback_(status, obj->queue_, arg);
|
||||
}
|
||||
|
||||
static hsa_packet_type_t GetHeaderType(const packet_t* packet) {
|
||||
const packet_word_t* header = reinterpret_cast<const packet_word_t*>(packet);
|
||||
return static_cast<hsa_packet_type_t>((*header >> HSA_PACKET_HEADER_TYPE) & header_type_mask);
|
||||
}
|
||||
|
||||
static const amd_kernel_code_t* GetKernelCode(const hsa_kernel_dispatch_packet_t* dispatch_packet) {
|
||||
const amd_kernel_code_t* kernel_code = NULL;
|
||||
hsa_status_t status =
|
||||
util::HsaRsrcFactory::Instance().LoaderApi()->hsa_ven_amd_loader_query_host_address(
|
||||
reinterpret_cast<const void*>(dispatch_packet->kernel_object),
|
||||
reinterpret_cast<const void**>(&kernel_code));
|
||||
if (HSA_STATUS_SUCCESS != status) {
|
||||
kernel_code = reinterpret_cast<amd_kernel_code_t*>(dispatch_packet->kernel_object);
|
||||
}
|
||||
return kernel_code;
|
||||
}
|
||||
|
||||
static const char* GetKernelName(const uint64_t kernel_symbol) {
|
||||
amd_runtime_loader_debug_info_t* dbg_info =
|
||||
reinterpret_cast<amd_runtime_loader_debug_info_t*>(kernel_symbol);
|
||||
const char* kernel_name = (dbg_info != NULL) ? dbg_info->kernel_name : NULL;
|
||||
return (kernel_name != NULL) ? strdup(kernel_name) : strdup(kernel_none_);
|
||||
#if 0
|
||||
// Kernel name is mangled name
|
||||
// apply __cxa_demangle() to demangle it
|
||||
const char* funcname = NULL;
|
||||
if (kernel_name != NULL) {
|
||||
size_t funcnamesize = 0;
|
||||
int status;
|
||||
const char* ret = abi::__cxa_demangle(kernel_name, NULL, &funcnamesize, &status);
|
||||
funcname = (ret != 0) ? ret : strdup(kernel_name);
|
||||
}
|
||||
if (funcname == NULL) funcname = strdup(kernel_none_);
|
||||
|
||||
return funcname;
|
||||
#endif
|
||||
}
|
||||
|
||||
// method to get an intercept queue object
|
||||
static InterceptQueue* GetObj(const hsa_queue_t* queue) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
InterceptQueue* obj = NULL;
|
||||
obj_map_t::const_iterator it = obj_map_->find((uint64_t)queue);
|
||||
if (it != obj_map_->end()) {
|
||||
obj = it->second;
|
||||
assert(queue == obj->queue_);
|
||||
}
|
||||
return obj;
|
||||
}
|
||||
|
||||
// method to delete an intercept queue object
|
||||
static hsa_status_t DelObj(const hsa_queue_t* queue) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
obj_map_t::const_iterator it = obj_map_->find((uint64_t)queue);
|
||||
if (it != obj_map_->end()) {
|
||||
const InterceptQueue* obj = it->second;
|
||||
assert(queue == obj->queue_);
|
||||
delete obj;
|
||||
obj_map_->erase(it);
|
||||
status = HSA_STATUS_SUCCESS;;
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
InterceptQueue(const hsa_agent_t& agent, hsa_queue_t* const queue, ProxyQueue* proxy) :
|
||||
queue_(queue),
|
||||
proxy_(proxy)
|
||||
{
|
||||
agent_info_ = util::HsaRsrcFactory::Instance().GetAgentInfo(agent);
|
||||
queue_event_callback_ = NULL;
|
||||
}
|
||||
|
||||
~InterceptQueue() {
|
||||
ProxyQueue::Destroy(proxy_);
|
||||
}
|
||||
|
||||
static bool is_enabled;
|
||||
|
||||
static mutex_t mutex_;
|
||||
static const packet_word_t header_type_mask = (1ul << HSA_PACKET_HEADER_WIDTH_TYPE) - 1;
|
||||
#if 0
|
||||
static queue_callback_t create_callback_;
|
||||
static queue_callback_t destroy_callback_;
|
||||
static void* callback_data_;
|
||||
#endif
|
||||
static obj_map_t* obj_map_;
|
||||
static const char* kernel_none_;
|
||||
static bool in_create_call_;
|
||||
static queue_id_t current_queue_id;
|
||||
|
||||
hsa_queue_t* const queue_;
|
||||
ProxyQueue* const proxy_;
|
||||
const util::AgentInfo* agent_info_;
|
||||
queue_event_callback_t queue_event_callback_;
|
||||
queue_id_t queue_id;
|
||||
};
|
||||
|
||||
} // namespace rocprofiler
|
||||
|
||||
#endif // _SRC_CORE_INTERCEPT_QUEUE_H
|
||||
@@ -0,0 +1,63 @@
|
||||
/******************************************************************************
|
||||
Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*******************************************************************************/
|
||||
|
||||
#include "proxy/proxy_queue.h"
|
||||
|
||||
#include "proxy/hsa_proxy_queue.h"
|
||||
#include "proxy/simple_proxy_queue.h"
|
||||
|
||||
namespace rocprofiler {
|
||||
void ProxyQueue::HsaIntercept(HsaApiTable* table) {
|
||||
if (rocp_type_) SimpleProxyQueue::HsaIntercept(table);
|
||||
}
|
||||
|
||||
ProxyQueue* ProxyQueue::Create(hsa_agent_t agent, uint32_t size, hsa_queue_type32_t type,
|
||||
void (*callback)(hsa_status_t status, hsa_queue_t* source,
|
||||
void* data),
|
||||
void* data, uint32_t private_segment_size,
|
||||
uint32_t group_segment_size, hsa_queue_t** queue,
|
||||
hsa_status_t* status) {
|
||||
hsa_status_t suc = HSA_STATUS_ERROR;
|
||||
ProxyQueue* instance =
|
||||
(rocp_type_) ? (ProxyQueue*) new SimpleProxyQueue() : (ProxyQueue*) new HsaProxyQueue();
|
||||
if (instance != NULL) {
|
||||
suc = instance->Init(agent, size, type, callback, data, private_segment_size,
|
||||
group_segment_size, queue);
|
||||
if (suc != HSA_STATUS_SUCCESS) {
|
||||
delete instance;
|
||||
instance = NULL;
|
||||
}
|
||||
}
|
||||
*status = suc;
|
||||
assert(*status == HSA_STATUS_SUCCESS);
|
||||
return instance;
|
||||
}
|
||||
|
||||
hsa_status_t ProxyQueue::Destroy(const ProxyQueue* obj) {
|
||||
assert(obj != NULL);
|
||||
auto suc = obj->Cleanup();
|
||||
delete obj;
|
||||
return suc;
|
||||
}
|
||||
|
||||
bool ProxyQueue::rocp_type_ = false;
|
||||
} // namespace rocprofiler
|
||||
@@ -0,0 +1,77 @@
|
||||
/******************************************************************************
|
||||
Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef _SRC_CORE_PROXY_QUEUE_H
|
||||
#define _SRC_CORE_PROXY_QUEUE_H
|
||||
|
||||
#include <hsa.h>
|
||||
#include <hsa_api_trace.h>
|
||||
#include <atomic>
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
|
||||
#include "proxy/queue.h"
|
||||
#include "proxy/types.h"
|
||||
|
||||
struct HsaApiTable;
|
||||
|
||||
namespace rocprofiler {
|
||||
typedef void (*hsa_amd_queue_intercept_packet_writer)(const void* packets, uint64_t count);
|
||||
typedef void (*on_submit_cb_t)(const void* packet, uint64_t count, uint64_t que_idx, void* data,
|
||||
hsa_amd_queue_intercept_packet_writer writer);
|
||||
|
||||
class ProxyQueue : public Queue {
|
||||
public:
|
||||
static void InitFactory() {
|
||||
const char* type = getenv("ROCP_PROXY_QUEUE");
|
||||
if (type != NULL) {
|
||||
if (strncmp(type, "rocp", 4) == 0) rocp_type_ = true;
|
||||
}
|
||||
}
|
||||
|
||||
static void HsaIntercept(HsaApiTable* table);
|
||||
|
||||
static ProxyQueue* Create(hsa_agent_t agent, uint32_t size, hsa_queue_type32_t type,
|
||||
void (*callback)(hsa_status_t status, hsa_queue_t* source, void* data),
|
||||
void* data, uint32_t private_segment_size, uint32_t group_segment_size,
|
||||
hsa_queue_t** queue, hsa_status_t* status);
|
||||
|
||||
static hsa_status_t Destroy(const ProxyQueue* obj);
|
||||
|
||||
virtual hsa_status_t Init(hsa_agent_t agent, uint32_t size, hsa_queue_type32_t type,
|
||||
void (*callback)(hsa_status_t status, hsa_queue_t* source, void* data),
|
||||
void* data, uint32_t private_segment_size, uint32_t group_segment_size,
|
||||
hsa_queue_t** queue) = 0;
|
||||
virtual hsa_status_t Cleanup() const = 0;
|
||||
virtual hsa_status_t SetInterceptCB(on_submit_cb_t on_submit_cb, void* data) = 0;
|
||||
virtual void Submit(const packet_t* packet) = 0;
|
||||
|
||||
protected:
|
||||
virtual ~ProxyQueue(){};
|
||||
|
||||
private:
|
||||
static bool rocp_type_;
|
||||
};
|
||||
|
||||
} // namespace rocprofiler
|
||||
|
||||
#endif // _SRC_CORE_PROXY_QUEUE_H
|
||||
@@ -0,0 +1,42 @@
|
||||
/******************************************************************************
|
||||
Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef _SRC_CORE_QUEUE_H
|
||||
#define _SRC_CORE_QUEUE_H
|
||||
|
||||
#include "proxy/types.h"
|
||||
|
||||
namespace rocprofiler {
|
||||
|
||||
class Queue {
|
||||
public:
|
||||
Queue() {}
|
||||
virtual ~Queue() {}
|
||||
virtual void Submit(const packet_t* packet) = 0;
|
||||
virtual void Submit(const packet_t* packet, const size_t& count) {
|
||||
for (const packet_t* p = packet; p < packet + count; ++p) Submit(p);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace rocprofiler
|
||||
|
||||
#endif // _SRC_CORE_QUEUE_H
|
||||
@@ -0,0 +1,40 @@
|
||||
/******************************************************************************
|
||||
Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*******************************************************************************/
|
||||
|
||||
#include "proxy/simple_proxy_queue.h"
|
||||
|
||||
namespace rocprofiler {
|
||||
void SimpleProxyQueue::HsaIntercept(HsaApiTable* table) {
|
||||
table->core_->hsa_signal_store_relaxed_fn = rocprofiler::SimpleProxyQueue::SignalStore;
|
||||
table->core_->hsa_signal_store_screlease_fn = rocprofiler::SimpleProxyQueue::SignalStore;
|
||||
|
||||
table->core_->hsa_queue_load_write_index_relaxed_fn = rocprofiler::SimpleProxyQueue::GetQueueIndex;
|
||||
table->core_->hsa_queue_store_write_index_relaxed_fn = rocprofiler::SimpleProxyQueue::SetQueueIndex;
|
||||
table->core_->hsa_queue_load_read_index_relaxed_fn = rocprofiler::SimpleProxyQueue::GetSubmitIndex;
|
||||
|
||||
table->core_->hsa_queue_load_write_index_scacquire_fn = rocprofiler::SimpleProxyQueue::GetQueueIndex;
|
||||
table->core_->hsa_queue_store_write_index_screlease_fn = rocprofiler::SimpleProxyQueue::SetQueueIndex;
|
||||
table->core_->hsa_queue_load_read_index_scacquire_fn = rocprofiler::SimpleProxyQueue::GetSubmitIndex;
|
||||
}
|
||||
|
||||
SimpleProxyQueue::queue_map_t* SimpleProxyQueue::queue_map_ = NULL;
|
||||
} // namespace rocprofiler
|
||||
@@ -0,0 +1,261 @@
|
||||
/******************************************************************************
|
||||
Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef _SRC_CORE_SIMPLE_PROXY_QUEUE_H
|
||||
#define _SRC_CORE_SIMPLE_PROXY_QUEUE_H
|
||||
|
||||
#include <hsa.h>
|
||||
#include <atomic>
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
|
||||
#include "proxy/proxy_queue.h"
|
||||
#include "util/hsa_rsrc_factory.h"
|
||||
|
||||
#ifndef ROCP_PROXY_LOCK
|
||||
# define ROCP_PROXY_LOCK 1
|
||||
#endif
|
||||
|
||||
namespace rocprofiler {
|
||||
extern decltype(hsa_queue_create)* hsa_queue_create_fn;
|
||||
extern decltype(hsa_queue_destroy)* hsa_queue_destroy_fn;
|
||||
|
||||
extern decltype(hsa_signal_store_relaxed)* hsa_signal_store_relaxed_fn;
|
||||
extern decltype(hsa_signal_store_relaxed)* hsa_signal_store_screlease_fn;
|
||||
|
||||
extern decltype(hsa_queue_load_write_index_relaxed)* hsa_queue_load_write_index_relaxed_fn;
|
||||
extern decltype(hsa_queue_store_write_index_relaxed)* hsa_queue_store_write_index_relaxed_fn;
|
||||
extern decltype(hsa_queue_load_read_index_relaxed)* hsa_queue_load_read_index_relaxed_fn;
|
||||
|
||||
extern decltype(hsa_queue_load_write_index_scacquire)* hsa_queue_load_write_index_scacquire_fn;
|
||||
extern decltype(hsa_queue_store_write_index_screlease)* hsa_queue_store_write_index_screlease_fn;
|
||||
extern decltype(hsa_queue_load_read_index_scacquire)* hsa_queue_load_read_index_scacquire_fn;
|
||||
|
||||
typedef decltype(hsa_signal_t::handle) signal_handle_t;
|
||||
|
||||
|
||||
class SimpleProxyQueue : public ProxyQueue {
|
||||
public:
|
||||
static void HsaIntercept(HsaApiTable* table);
|
||||
|
||||
static void SignalStore(hsa_signal_t signal, hsa_signal_value_t que_idx) {
|
||||
auto it = queue_map_->find(signal.handle);
|
||||
if (it != queue_map_->end()) {
|
||||
SimpleProxyQueue* instance = it->second;
|
||||
instance->mutex_lock();
|
||||
const uint64_t begin = instance->submit_index_;
|
||||
const uint64_t end = que_idx + 1;
|
||||
instance->submit_index_ = end;
|
||||
instance->mutex_unlock();
|
||||
for (uint64_t j = begin; j < end; ++j) {
|
||||
// Submited packet
|
||||
const uint32_t idx = j & instance->queue_mask_;
|
||||
packet_t* packet = reinterpret_cast<packet_t*>(instance->queue_->base_address) + idx;
|
||||
if (instance->on_submit_cb_ != NULL)
|
||||
instance->on_submit_cb_(packet, 1, j, instance->on_submit_cb_data_, NULL);
|
||||
else
|
||||
instance->Submit(packet);
|
||||
}
|
||||
} else {
|
||||
hsa_signal_store_relaxed_fn(signal, que_idx);
|
||||
}
|
||||
}
|
||||
|
||||
static uint64_t GetSubmitIndex(const hsa_queue_t* queue) {
|
||||
uint64_t index = 0;
|
||||
auto it = queue_map_->find(queue->doorbell_signal.handle);
|
||||
if (it != queue_map_->end()) {
|
||||
SimpleProxyQueue* instance = it->second;
|
||||
index = instance->submit_index_;
|
||||
} else {
|
||||
index = hsa_queue_load_read_index_relaxed_fn(queue);
|
||||
}
|
||||
return index;
|
||||
}
|
||||
|
||||
static uint64_t GetQueueIndex(const hsa_queue_t* queue) {
|
||||
uint64_t index = 0;
|
||||
auto it = queue_map_->find(queue->doorbell_signal.handle);
|
||||
if (it != queue_map_->end()) {
|
||||
SimpleProxyQueue* instance = it->second;
|
||||
instance->mutex_lock();
|
||||
index = instance->queue_index_;
|
||||
} else {
|
||||
index = hsa_queue_load_write_index_relaxed_fn(queue);
|
||||
}
|
||||
return index;
|
||||
}
|
||||
|
||||
static void SetQueueIndex(const hsa_queue_t* queue, uint64_t value) {
|
||||
auto it = queue_map_->find(queue->doorbell_signal.handle);
|
||||
if (it != queue_map_->end()) {
|
||||
SimpleProxyQueue* instance = it->second;
|
||||
instance->queue_index_ = value;
|
||||
instance->mutex_unlock();
|
||||
} else {
|
||||
hsa_queue_store_write_index_relaxed_fn(queue, value);
|
||||
}
|
||||
}
|
||||
|
||||
hsa_status_t SetInterceptCB(on_submit_cb_t on_submit_cb, void* data) {
|
||||
on_submit_cb_ = on_submit_cb;
|
||||
on_submit_cb_data_ = data;
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
void Submit(const packet_t* packet) {
|
||||
// Compute the write index of queue
|
||||
const uint64_t que_idx = hsa_queue_load_write_index_relaxed_fn(queue_);
|
||||
|
||||
// Waiting untill there is a free space in the queue
|
||||
while (que_idx >= (hsa_queue_load_read_index_relaxed_fn(queue_) + size_));
|
||||
|
||||
// Increment the write index
|
||||
hsa_queue_store_write_index_relaxed_fn(queue_, que_idx + 1);
|
||||
|
||||
const uint32_t mask = queue_->size - 1;
|
||||
const uint32_t idx = que_idx & mask;
|
||||
|
||||
// Copy packet to the queue
|
||||
const packet_word_t* src = reinterpret_cast<const packet_word_t*>(packet);
|
||||
packet_word_t* dst = reinterpret_cast<packet_word_t*>(base_address_ + idx);
|
||||
for (unsigned i = 1; i < sizeof(packet_t) / sizeof(packet_word_t); ++i) {
|
||||
dst[i] = src[i];
|
||||
}
|
||||
|
||||
// To maintain global order to ensure the prior copy of the packet contents is made visible
|
||||
// before the header is updated.
|
||||
// With in-order CP it will wait until the first packet in the blob will be valid.
|
||||
std::atomic<packet_word_t>* header_atomic_ptr =
|
||||
reinterpret_cast<std::atomic<packet_word_t>*>(&dst[0]);
|
||||
header_atomic_ptr->store(src[0], std::memory_order_release);
|
||||
|
||||
// Doorbell signaling to submit the packet
|
||||
hsa_signal_store_relaxed_fn(doorbell_signal_, que_idx);
|
||||
}
|
||||
|
||||
SimpleProxyQueue()
|
||||
: agent_info_(NULL),
|
||||
queue_(NULL),
|
||||
base_address_(NULL),
|
||||
doorbell_signal_({}),
|
||||
queue_index_(0),
|
||||
queue_mask_(0),
|
||||
submit_index_(0),
|
||||
on_submit_cb_(NULL),
|
||||
on_submit_cb_data_(NULL)
|
||||
{
|
||||
printf("ROCProfiler: SimpleProxyQueue is enabled\n");
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
~SimpleProxyQueue() {}
|
||||
|
||||
private:
|
||||
typedef std::map<signal_handle_t, SimpleProxyQueue*> queue_map_t;
|
||||
|
||||
hsa_status_t Init(hsa_agent_t agent, uint32_t size, hsa_queue_type32_t type,
|
||||
void (*callback)(hsa_status_t status, hsa_queue_t* source, void* data),
|
||||
void* data, uint32_t private_segment_size, uint32_t group_segment_size,
|
||||
hsa_queue_t** queue) {
|
||||
size_ = size;
|
||||
auto status = Init(agent, size);
|
||||
*queue = queue_;
|
||||
return status;
|
||||
}
|
||||
|
||||
hsa_status_t Init(hsa_agent_t agent, uint32_t size) {
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
agent_info_ = util::HsaRsrcFactory::Instance().GetAgentInfo(agent);
|
||||
if (agent_info_ != NULL) {
|
||||
if (agent_info_->dev_type == HSA_DEVICE_TYPE_GPU) {
|
||||
status = hsa_queue_create_fn(agent, size, HSA_QUEUE_TYPE_MULTI, NULL, NULL, UINT32_MAX,
|
||||
UINT32_MAX, &queue_);
|
||||
if (status == HSA_STATUS_SUCCESS) {
|
||||
base_address_ = reinterpret_cast<packet_t*>(queue_->base_address);
|
||||
doorbell_signal_ = queue_->doorbell_signal;
|
||||
data_array_ = calloc(size + 1, sizeof(packet_t));
|
||||
uintptr_t addr = (uintptr_t)data_array_;
|
||||
queue_->base_address = (void*)((addr + align_mask_) & ~align_mask_);
|
||||
status = hsa_signal_create(1, 0, NULL, &(queue_->doorbell_signal));
|
||||
if (status != HSA_STATUS_SUCCESS) abort();
|
||||
queue_mask_ = size - 1;
|
||||
|
||||
if (queue_map_ == NULL) queue_map_ = new queue_map_t;
|
||||
(*queue_map_)[queue_->doorbell_signal.handle] = this;
|
||||
}
|
||||
else abort();
|
||||
}
|
||||
}
|
||||
if (status != HSA_STATUS_SUCCESS) abort();
|
||||
return status;
|
||||
}
|
||||
|
||||
hsa_status_t Cleanup() const {
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
hsa_signal_t queue_signal = queue_->doorbell_signal;
|
||||
|
||||
// Destroy original HSA queue
|
||||
queue_->base_address = base_address_;
|
||||
queue_->doorbell_signal = doorbell_signal_;
|
||||
status = hsa_queue_destroy_fn(queue_);
|
||||
if (status != HSA_STATUS_SUCCESS) abort();
|
||||
|
||||
// Destroy overloaded virtual queue data and signal
|
||||
free(data_array_);
|
||||
status = hsa_signal_destroy(queue_signal);
|
||||
if (status != HSA_STATUS_SUCCESS) abort();
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
void mutex_lock() {
|
||||
#if ROCP_PROXY_LOCK
|
||||
mutex_.lock();
|
||||
#endif
|
||||
}
|
||||
|
||||
void mutex_unlock() {
|
||||
#if ROCP_PROXY_LOCK
|
||||
mutex_.unlock();
|
||||
#endif
|
||||
}
|
||||
|
||||
uint32_t size_;
|
||||
static queue_map_t* queue_map_;
|
||||
const util::AgentInfo* agent_info_;
|
||||
hsa_queue_t* queue_;
|
||||
static const uintptr_t align_mask_ = sizeof(packet_t) - 1;
|
||||
packet_t* base_address_;
|
||||
hsa_signal_t doorbell_signal_;
|
||||
uint64_t queue_index_;
|
||||
uint64_t queue_mask_;
|
||||
uint64_t submit_index_;
|
||||
std::mutex mutex_;
|
||||
on_submit_cb_t on_submit_cb_;
|
||||
void* on_submit_cb_data_;
|
||||
void* data_array_;
|
||||
};
|
||||
|
||||
} // namespace rocprofiler
|
||||
|
||||
#endif // _SRC_CORE_SIMPLE_PROXY_QUEUE_H
|
||||
@@ -20,8 +20,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef SRC_CORE_TRACKER_H_
|
||||
#define SRC_CORE_TRACKER_H_
|
||||
#ifndef SRC_PROXY_TRACKER_H_
|
||||
#define SRC_PROXY_TRACKER_H_
|
||||
|
||||
#include <amd_hsa_signal.h>
|
||||
#include <assert.h>
|
||||
@@ -29,180 +29,58 @@ THE SOFTWARE.
|
||||
#include <hsa_ext_amd.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <list>
|
||||
#include <mutex>
|
||||
|
||||
#include "util/hsa_rsrc_factory.h"
|
||||
#include "util/exception.h"
|
||||
#include "util/logger.h"
|
||||
#include "core/trace_buffer.h"
|
||||
|
||||
namespace proxy {
|
||||
// Dispatch record
|
||||
typedef struct {
|
||||
uint64_t dispatch; // dispatch timestamp, ns
|
||||
uint64_t begin; // kernel begin timestamp, ns
|
||||
uint64_t end; // kernel end timestamp, ns
|
||||
uint64_t complete; // completion signal timestamp, ns
|
||||
} async_record_t;
|
||||
|
||||
class Tracker {
|
||||
public:
|
||||
typedef std::mutex mutex_t;
|
||||
typedef util::HsaRsrcFactory::timestamp_t timestamp_t;
|
||||
typedef async_record_t record_t;
|
||||
struct entry_t;
|
||||
typedef std::list<entry_t*> sig_list_t;
|
||||
typedef sig_list_t::iterator sig_list_it_t;
|
||||
typedef uint64_t counter_t;
|
||||
|
||||
struct entry_t {
|
||||
counter_t index;
|
||||
std::atomic<bool> valid;
|
||||
Tracker* tracker;
|
||||
sig_list_t::iterator it;
|
||||
hsa_agent_t agent;
|
||||
hsa_signal_t orig;
|
||||
hsa_signal_t signal;
|
||||
record_t* record;
|
||||
std::atomic<void*> handler;
|
||||
void* arg;
|
||||
bool is_memcopy;
|
||||
};
|
||||
|
||||
static Tracker* Create() {
|
||||
std::lock_guard<mutex_t> lck(glob_mutex_);
|
||||
Tracker* obj = instance_.load(std::memory_order_relaxed);
|
||||
if (obj == NULL) {
|
||||
obj = new Tracker;
|
||||
if (obj == NULL) EXC_ABORT(HSA_STATUS_ERROR, "Tracker creation failed");
|
||||
instance_.store(obj, std::memory_order_release);
|
||||
}
|
||||
return obj;
|
||||
}
|
||||
|
||||
static Tracker& Instance() {
|
||||
Tracker* obj = instance_.load(std::memory_order_acquire);
|
||||
if (obj == NULL) obj = Create();
|
||||
return *obj;
|
||||
}
|
||||
|
||||
static void Destroy() {
|
||||
std::lock_guard<mutex_t> lck(glob_mutex_);
|
||||
if (instance_ != NULL) delete instance_;
|
||||
instance_ = NULL;
|
||||
}
|
||||
typedef roctracer::trace_entry_t entry_t;
|
||||
|
||||
// Add tracker entry
|
||||
entry_t* Alloc(const hsa_agent_t& agent, const hsa_signal_t& orig) {
|
||||
inline static void Enable(uint32_t type, const hsa_agent_t& agent, const hsa_signal_t& signal, entry_t* entry) {
|
||||
hsa_status_t status = HSA_STATUS_ERROR;
|
||||
util::HsaRsrcFactory* hsa_rsrc = &(util::HsaRsrcFactory::Instance());
|
||||
|
||||
// Creating a new tracker entry
|
||||
entry_t* entry = new entry_t{};
|
||||
assert(entry);
|
||||
entry->tracker = this;
|
||||
entry->type = type;
|
||||
entry->agent = agent;
|
||||
entry->orig = orig;
|
||||
|
||||
// Creating a record with the dispatch timestamps
|
||||
record_t* record = new record_t{};
|
||||
assert(record);
|
||||
record->dispatch = hsa_rsrc_->TimestampNs();
|
||||
entry->record = record;
|
||||
entry->orig = signal;
|
||||
entry->dispatch = hsa_rsrc->TimestampNs();
|
||||
entry->valid.store(roctracer::TRACE_ENTRY_INIT, std::memory_order_release);
|
||||
|
||||
// Creating a proxy signal
|
||||
status = hsa_signal_create(1, 0, NULL, &(entry->signal));
|
||||
if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "hsa_signal_create");
|
||||
status = hsa_amd_signal_async_handler(entry->signal, HSA_SIGNAL_CONDITION_LT, 1, Handler, entry);
|
||||
if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "hsa_amd_signal_async_handler");
|
||||
|
||||
// Adding antry to the list
|
||||
mutex_.lock();
|
||||
entry->it = sig_list_.insert(sig_list_.end(), entry);
|
||||
entry->index = counter_++;
|
||||
mutex_.unlock();
|
||||
|
||||
return entry;
|
||||
}
|
||||
|
||||
// Delete tracker entry
|
||||
void Delete(entry_t* entry) {
|
||||
hsa_signal_destroy(entry->signal);
|
||||
mutex_.lock();
|
||||
sig_list_.erase(entry->it);
|
||||
mutex_.unlock();
|
||||
delete entry;
|
||||
}
|
||||
|
||||
// Enable tracker entry
|
||||
void Enable(entry_t* entry, void* handler, void* arg) {
|
||||
// Set entry handler and release the entry
|
||||
entry->arg = arg;
|
||||
entry->handler.store(handler, std::memory_order_release);
|
||||
|
||||
// Debug trace
|
||||
if (trace_on_) {
|
||||
auto outstanding = outstanding_.fetch_add(1);
|
||||
fprintf(stdout, "Tracker::Add: entry %p, record %p, outst %lu\n", entry, entry->record, outstanding);
|
||||
fflush(stdout);
|
||||
}
|
||||
}
|
||||
|
||||
void EnableDispatch(entry_t* entry, hsa_amd_signal_handler handler, void* arg) {
|
||||
entry->is_memcopy = false;
|
||||
Enable(entry, reinterpret_cast<void*>(handler), arg);
|
||||
}
|
||||
void EnableMemcopy(entry_t* entry, hsa_amd_signal_handler handler, void* arg) {
|
||||
entry->is_memcopy = true;
|
||||
Enable(entry, reinterpret_cast<void*>(handler), arg);
|
||||
}
|
||||
|
||||
private:
|
||||
Tracker() :
|
||||
outstanding_(0),
|
||||
hsa_rsrc_(&(util::HsaRsrcFactory::Instance()))
|
||||
{}
|
||||
|
||||
~Tracker() {
|
||||
auto it = sig_list_.begin();
|
||||
auto end = sig_list_.end();
|
||||
while (it != end) {
|
||||
auto cur = it++;
|
||||
hsa_rsrc_->SignalWait((*cur)->signal);
|
||||
Erase(cur);
|
||||
}
|
||||
}
|
||||
|
||||
// Delete an entry by iterator
|
||||
void Erase(const sig_list_it_t& it) { Delete(*it); }
|
||||
|
||||
// Entry completion
|
||||
inline void Complete(hsa_signal_value_t signal_value, entry_t* entry) {
|
||||
record_t* record = entry->record;
|
||||
|
||||
// Debug trace
|
||||
if (trace_on_) {
|
||||
auto outstanding = outstanding_.fetch_sub(1);
|
||||
fprintf(stdout, "Tracker::Handler: entry %p, record %p, outst %lu\n", entry, entry->record, outstanding);
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
inline static void Complete(hsa_signal_value_t signal_value, entry_t* entry) {
|
||||
// Query begin/end and complete timestamps
|
||||
if (entry->is_memcopy) {
|
||||
util::HsaRsrcFactory* hsa_rsrc = &(util::HsaRsrcFactory::Instance());
|
||||
if (entry->type == roctracer::COPY_ENTRY_TYPE) {
|
||||
hsa_amd_profiling_async_copy_time_t async_copy_time{};
|
||||
hsa_status_t status = hsa_amd_profiling_get_async_copy_time(entry->signal, &async_copy_time);
|
||||
if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "hsa_amd_profiling_get_async_copy_time");
|
||||
record->begin = hsa_rsrc_->SysclockToNs(async_copy_time.start);
|
||||
record->end = hsa_rsrc_->SysclockToNs(async_copy_time.end);
|
||||
entry->begin = hsa_rsrc->SysclockToNs(async_copy_time.start);
|
||||
entry->end = hsa_rsrc->SysclockToNs(async_copy_time.end);
|
||||
} else {
|
||||
hsa_amd_profiling_dispatch_time_t dispatch_time{};
|
||||
hsa_status_t status = hsa_amd_profiling_get_dispatch_time(entry->agent, entry->signal, &dispatch_time);
|
||||
if (status != HSA_STATUS_SUCCESS) EXC_RAISING(status, "hsa_amd_profiling_get_dispatch_time");
|
||||
record->begin = hsa_rsrc_->SysclockToNs(dispatch_time.start);
|
||||
record->end = hsa_rsrc_->SysclockToNs(dispatch_time.end);
|
||||
entry->begin = hsa_rsrc->SysclockToNs(dispatch_time.start);
|
||||
entry->end = hsa_rsrc->SysclockToNs(dispatch_time.end);
|
||||
}
|
||||
|
||||
record->complete = hsa_rsrc_->TimestampNs();
|
||||
entry->valid.store(true, std::memory_order_release);
|
||||
entry->complete = hsa_rsrc->TimestampNs();
|
||||
entry->valid.store(roctracer::TRACE_ENTRY_COMPL, std::memory_order_release);
|
||||
|
||||
// Original intercepted signal completion
|
||||
hsa_signal_t orig = entry->orig;
|
||||
@@ -216,72 +94,22 @@ class Tracker {
|
||||
if (signal_value != new_value) EXC_ABORT(HSA_STATUS_ERROR, "Tracker::Complete bad signal value");
|
||||
hsa_signal_store_screlease(orig, signal_value);
|
||||
}
|
||||
}
|
||||
|
||||
inline static void HandleEntry(hsa_signal_value_t signal_value, entry_t* entry) {
|
||||
// Call entry handler
|
||||
void* handler = static_cast<void*>(entry->handler);
|
||||
reinterpret_cast<hsa_amd_signal_handler>(handler)(signal_value, entry->arg);
|
||||
// Delete tracker entry
|
||||
entry->tracker->Delete(entry);
|
||||
hsa_signal_destroy(entry->signal);
|
||||
}
|
||||
|
||||
// Handler for packet completion
|
||||
static bool Handler(hsa_signal_value_t signal_value, void* arg) {
|
||||
// Acquire entry
|
||||
entry_t* entry = reinterpret_cast<entry_t*>(arg);
|
||||
volatile std::atomic<void*>* ptr = &entry->handler;
|
||||
while (ptr->load(std::memory_order_acquire) == NULL) sched_yield();
|
||||
while (entry->valid.load(std::memory_order_acquire) != roctracer::TRACE_ENTRY_INIT) sched_yield();
|
||||
|
||||
// Complete entry
|
||||
Tracker* tracker = entry->tracker;
|
||||
tracker->Complete(signal_value, entry);
|
||||
|
||||
if (ordering_enabled_ == false) {
|
||||
HandleEntry(signal_value, entry);
|
||||
} else {
|
||||
// Acquire last entry
|
||||
entry_t* back = tracker->sig_list_.back();
|
||||
volatile std::atomic<void*>* ptr = &back->handler;
|
||||
while (ptr->load(std::memory_order_acquire) == NULL) sched_yield();
|
||||
|
||||
tracker->handler_mutex_.lock();
|
||||
sig_list_it_t it = tracker->sig_list_.begin();
|
||||
sig_list_it_t end = back->it;
|
||||
while (it != end) {
|
||||
entry = *(it++);
|
||||
if (entry->valid.load(std::memory_order_acquire)) {
|
||||
HandleEntry(signal_value, entry);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
tracker->handler_mutex_.unlock();
|
||||
}
|
||||
Tracker::Complete(signal_value, entry);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// instance
|
||||
static std::atomic<Tracker*> instance_;
|
||||
static mutex_t glob_mutex_;
|
||||
static counter_t counter_;
|
||||
|
||||
// Tracked signals list
|
||||
sig_list_t sig_list_;
|
||||
// Inter-thread synchronization
|
||||
mutex_t mutex_;
|
||||
mutex_t handler_mutex_;
|
||||
// Outstanding dispatches
|
||||
std::atomic<uint64_t> outstanding_;
|
||||
// HSA resources factory
|
||||
util::HsaRsrcFactory* hsa_rsrc_;
|
||||
// Handling ordering enabled
|
||||
static const bool ordering_enabled_ = false;
|
||||
// Enable tracing
|
||||
static const bool trace_on_ = false;
|
||||
};
|
||||
|
||||
} // namespace rocprofiler
|
||||
|
||||
#endif // SRC_CORE_TRACKER_H_
|
||||
#endif // SRC_PROXY_TRACKER_H_
|
||||
|
||||
@@ -0,0 +1,50 @@
|
||||
/******************************************************************************
|
||||
Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef SRC_CORE_TYPES_H_
|
||||
#define SRC_CORE_TYPES_H_
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include <hsa_ven_amd_aqlprofile.h>
|
||||
|
||||
namespace rocprofiler {
|
||||
typedef hsa_ven_amd_aqlprofile_pfn_t pfn_t;
|
||||
typedef hsa_ven_amd_aqlprofile_event_t event_t;
|
||||
typedef hsa_ven_amd_aqlprofile_parameter_t parameter_t;
|
||||
typedef hsa_ven_amd_aqlprofile_profile_t profile_t;
|
||||
typedef hsa_ext_amd_aql_pm4_packet_t packet_t;
|
||||
typedef uint32_t packet_word_t;
|
||||
typedef uint64_t timestamp_t;
|
||||
|
||||
inline std::ostream& operator<< (std::ostream& out, const event_t& event) {
|
||||
out << "[block_name(" << event.block_name << "). block_index(" << event.block_index << "). counter_id(" << event.counter_id << ")]";
|
||||
return out;
|
||||
}
|
||||
inline std::ostream& operator<< (std::ostream& out, const parameter_t& parameter) {
|
||||
out << "[parameter_name(" << parameter.parameter_name << "). value(" << parameter.value << ")]";
|
||||
return out;
|
||||
}
|
||||
|
||||
} // namespace rocprofiler
|
||||
|
||||
#endif // SRC_CORE_TYPES_H_
|
||||
@@ -145,7 +145,7 @@ HsaRsrcFactory::HsaRsrcFactory(bool initialize_hsa) : initialize_hsa_(initialize
|
||||
CHECK_STATUS("loader API table query failed", status);
|
||||
|
||||
// Instantiate HSA timer
|
||||
timer_ = new HsaTimer;
|
||||
timer_ = new HsaTimer(&hsa_api_);
|
||||
CHECK_STATUS("HSA timer allocation failed",
|
||||
(timer_ == NULL) ? HSA_STATUS_ERROR : HSA_STATUS_SUCCESS);
|
||||
|
||||
@@ -172,7 +172,6 @@ void HsaRsrcFactory::InitHsaApiTable(HsaApiTable* table) {
|
||||
hsa_api_.hsa_init = table->core_->hsa_init_fn;
|
||||
hsa_api_.hsa_shut_down = table->core_->hsa_shut_down_fn;
|
||||
hsa_api_.hsa_agent_get_info = table->core_->hsa_agent_get_info_fn;
|
||||
|
||||
hsa_api_.hsa_iterate_agents = table->core_->hsa_iterate_agents_fn;
|
||||
|
||||
hsa_api_.hsa_queue_create = table->core_->hsa_queue_create_fn;
|
||||
@@ -180,14 +179,13 @@ void HsaRsrcFactory::InitHsaApiTable(HsaApiTable* table) {
|
||||
hsa_api_.hsa_queue_load_write_index_relaxed = table->core_->hsa_queue_load_write_index_relaxed_fn;
|
||||
hsa_api_.hsa_queue_store_write_index_relaxed = table->core_->hsa_queue_store_write_index_relaxed_fn;
|
||||
hsa_api_.hsa_queue_load_read_index_relaxed = table->core_->hsa_queue_load_read_index_relaxed_fn;
|
||||
|
||||
hsa_api_.hsa_signal_create = table->core_->hsa_signal_create_fn;
|
||||
hsa_api_.hsa_signal_destroy = table->core_->hsa_signal_destroy_fn;
|
||||
hsa_api_.hsa_signal_load_relaxed = table->core_->hsa_signal_load_relaxed_fn;
|
||||
hsa_api_.hsa_signal_store_relaxed = table->core_->hsa_signal_store_relaxed_fn;
|
||||
hsa_api_.hsa_signal_store_screlease = table->core_->hsa_signal_store_screlease_fn;
|
||||
hsa_api_.hsa_signal_wait_scacquire = table->core_->hsa_signal_wait_scacquire_fn;
|
||||
|
||||
hsa_api_.hsa_system_get_major_extension_table = table->core_->hsa_system_get_major_extension_table_fn;
|
||||
hsa_api_.hsa_signal_store_screlease = table->core_->hsa_signal_store_screlease_fn;
|
||||
|
||||
hsa_api_.hsa_code_object_reader_create_from_file = table->core_->hsa_code_object_reader_create_from_file_fn;
|
||||
hsa_api_.hsa_executable_create_alt = table->core_->hsa_executable_create_alt_fn;
|
||||
@@ -195,21 +193,23 @@ void HsaRsrcFactory::InitHsaApiTable(HsaApiTable* table) {
|
||||
hsa_api_.hsa_executable_freeze = table->core_->hsa_executable_freeze_fn;
|
||||
hsa_api_.hsa_executable_get_symbol = table->core_->hsa_executable_get_symbol_fn;
|
||||
|
||||
hsa_api_.hsa_system_get_info = table->core_->hsa_system_get_info_fn;
|
||||
hsa_api_.hsa_system_get_major_extension_table = table->core_->hsa_system_get_major_extension_table_fn;
|
||||
|
||||
hsa_api_.hsa_amd_agent_iterate_memory_pools = table->amd_ext_->hsa_amd_agent_iterate_memory_pools_fn;
|
||||
hsa_api_.hsa_amd_memory_pool_get_info = table->amd_ext_->hsa_amd_memory_pool_get_info_fn;
|
||||
hsa_api_.hsa_amd_memory_pool_allocate = table->amd_ext_->hsa_amd_memory_pool_allocate_fn;
|
||||
hsa_api_.hsa_amd_agents_allow_access = table->amd_ext_->hsa_amd_agents_allow_access_fn;
|
||||
|
||||
hsa_api_.hsa_amd_memory_async_copy = table->amd_ext_->hsa_amd_memory_async_copy_fn;
|
||||
|
||||
hsa_api_.hsa_amd_signal_async_handler = table->amd_ext_->hsa_amd_signal_async_handler_fn;
|
||||
hsa_api_.hsa_amd_profiling_set_profiler_enabled = table->amd_ext_->hsa_amd_profiling_set_profiler_enabled_fn;
|
||||
hsa_api_.hsa_amd_profiling_get_async_copy_time = table->amd_ext_->hsa_amd_profiling_get_async_copy_time_fn;
|
||||
hsa_api_.hsa_amd_profiling_get_dispatch_time = table->amd_ext_->hsa_amd_profiling_get_dispatch_time_fn;
|
||||
} else {
|
||||
hsa_api_.hsa_init = hsa_init;
|
||||
hsa_api_.hsa_shut_down = hsa_shut_down;
|
||||
hsa_api_.hsa_agent_get_info = hsa_agent_get_info;
|
||||
|
||||
hsa_api_.hsa_iterate_agents = hsa_iterate_agents;
|
||||
|
||||
hsa_api_.hsa_queue_create = hsa_queue_create;
|
||||
@@ -217,19 +217,13 @@ void HsaRsrcFactory::InitHsaApiTable(HsaApiTable* table) {
|
||||
hsa_api_.hsa_queue_load_write_index_relaxed = hsa_queue_load_write_index_relaxed;
|
||||
hsa_api_.hsa_queue_store_write_index_relaxed = hsa_queue_store_write_index_relaxed;
|
||||
hsa_api_.hsa_queue_load_read_index_relaxed = hsa_queue_load_read_index_relaxed;
|
||||
|
||||
hsa_api_.hsa_signal_create = hsa_signal_create;
|
||||
hsa_api_.hsa_signal_destroy = hsa_signal_destroy;
|
||||
hsa_api_.hsa_signal_load_relaxed = hsa_signal_load_relaxed;
|
||||
hsa_api_.hsa_signal_store_relaxed = hsa_signal_store_relaxed;
|
||||
hsa_api_.hsa_signal_wait_scacquire = hsa_signal_wait_scacquire;
|
||||
|
||||
hsa_api_.hsa_amd_agent_iterate_memory_pools = hsa_amd_agent_iterate_memory_pools;
|
||||
hsa_api_.hsa_amd_memory_pool_get_info = hsa_amd_memory_pool_get_info;
|
||||
hsa_api_.hsa_amd_memory_pool_allocate = hsa_amd_memory_pool_allocate;
|
||||
hsa_api_.hsa_amd_agents_allow_access = hsa_amd_agents_allow_access;
|
||||
|
||||
hsa_api_.hsa_amd_memory_async_copy = hsa_amd_memory_async_copy;
|
||||
|
||||
hsa_api_.hsa_system_get_major_extension_table = hsa_system_get_major_extension_table;
|
||||
hsa_api_.hsa_signal_store_screlease = hsa_signal_store_screlease;
|
||||
|
||||
hsa_api_.hsa_code_object_reader_create_from_file = hsa_code_object_reader_create_from_file;
|
||||
hsa_api_.hsa_executable_create_alt = hsa_executable_create_alt;
|
||||
@@ -237,11 +231,19 @@ void HsaRsrcFactory::InitHsaApiTable(HsaApiTable* table) {
|
||||
hsa_api_.hsa_executable_freeze = hsa_executable_freeze;
|
||||
hsa_api_.hsa_executable_get_symbol = hsa_executable_get_symbol;
|
||||
|
||||
hsa_api_.hsa_system_get_info = hsa_system_get_info;
|
||||
hsa_api_.hsa_system_get_major_extension_table = hsa_system_get_major_extension_table;
|
||||
|
||||
hsa_api_.hsa_amd_agent_iterate_memory_pools = hsa_amd_agent_iterate_memory_pools;
|
||||
hsa_api_.hsa_amd_memory_pool_get_info = hsa_amd_memory_pool_get_info;
|
||||
hsa_api_.hsa_amd_memory_pool_allocate = hsa_amd_memory_pool_allocate;
|
||||
hsa_api_.hsa_amd_agents_allow_access = hsa_amd_agents_allow_access;
|
||||
hsa_api_.hsa_amd_memory_async_copy = hsa_amd_memory_async_copy;
|
||||
|
||||
hsa_api_.hsa_amd_signal_async_handler = hsa_amd_signal_async_handler;
|
||||
hsa_api_.hsa_amd_profiling_set_profiler_enabled = hsa_amd_profiling_set_profiler_enabled;
|
||||
hsa_api_.hsa_amd_profiling_get_async_copy_time = hsa_amd_profiling_get_async_copy_time;
|
||||
hsa_api_.hsa_amd_profiling_get_dispatch_time = hsa_amd_profiling_get_dispatch_time;
|
||||
hsa_api_.hsa_signal_load_relaxed = hsa_signal_load_relaxed;
|
||||
hsa_api_.hsa_signal_store_screlease = hsa_signal_store_screlease;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -71,41 +71,43 @@ static const size_t MEM_PAGE_MASK = MEM_PAGE_BYTES - 1;
|
||||
typedef decltype(hsa_agent_t::handle) hsa_agent_handle_t;
|
||||
|
||||
struct hsa_pfn_t {
|
||||
decltype(::hsa_init)* hsa_init;
|
||||
decltype(::hsa_shut_down)* hsa_shut_down;
|
||||
decltype(::hsa_agent_get_info)* hsa_agent_get_info;
|
||||
decltype(hsa_init)* hsa_init;
|
||||
decltype(hsa_shut_down)* hsa_shut_down;
|
||||
decltype(hsa_agent_get_info)* hsa_agent_get_info;
|
||||
decltype(hsa_iterate_agents)* hsa_iterate_agents;
|
||||
|
||||
decltype(::hsa_iterate_agents)* hsa_iterate_agents;
|
||||
decltype(hsa_queue_create)* hsa_queue_create;
|
||||
decltype(hsa_queue_destroy)* hsa_queue_destroy;
|
||||
decltype(hsa_queue_load_write_index_relaxed)* hsa_queue_load_write_index_relaxed;
|
||||
decltype(hsa_queue_store_write_index_relaxed)* hsa_queue_store_write_index_relaxed;
|
||||
decltype(hsa_queue_load_read_index_relaxed)* hsa_queue_load_read_index_relaxed;
|
||||
|
||||
decltype(::hsa_queue_create)* hsa_queue_create;
|
||||
decltype(::hsa_queue_destroy)* hsa_queue_destroy;
|
||||
decltype(::hsa_queue_load_write_index_relaxed)* hsa_queue_load_write_index_relaxed;
|
||||
decltype(::hsa_queue_store_write_index_relaxed)* hsa_queue_store_write_index_relaxed;
|
||||
decltype(::hsa_queue_load_read_index_relaxed)* hsa_queue_load_read_index_relaxed;
|
||||
decltype(::hsa_signal_create)* hsa_signal_create;
|
||||
decltype(::hsa_signal_destroy)* hsa_signal_destroy;
|
||||
decltype(::hsa_signal_store_relaxed)* hsa_signal_store_relaxed;
|
||||
decltype(::hsa_signal_wait_scacquire)* hsa_signal_wait_scacquire;
|
||||
decltype(hsa_signal_create)* hsa_signal_create;
|
||||
decltype(hsa_signal_destroy)* hsa_signal_destroy;
|
||||
decltype(hsa_signal_load_relaxed)* hsa_signal_load_relaxed;
|
||||
decltype(hsa_signal_store_relaxed)* hsa_signal_store_relaxed;
|
||||
decltype(hsa_signal_wait_scacquire)* hsa_signal_wait_scacquire;
|
||||
decltype(hsa_signal_store_screlease)* hsa_signal_store_screlease;
|
||||
|
||||
decltype(::hsa_amd_agent_iterate_memory_pools)* hsa_amd_agent_iterate_memory_pools;
|
||||
decltype(::hsa_amd_memory_pool_get_info)* hsa_amd_memory_pool_get_info;
|
||||
decltype(::hsa_amd_memory_pool_allocate)* hsa_amd_memory_pool_allocate;
|
||||
decltype(::hsa_amd_agents_allow_access)* hsa_amd_agents_allow_access;
|
||||
decltype(::hsa_amd_memory_async_copy)* hsa_amd_memory_async_copy;
|
||||
decltype(hsa_code_object_reader_create_from_file)* hsa_code_object_reader_create_from_file;
|
||||
decltype(hsa_executable_create_alt)* hsa_executable_create_alt;
|
||||
decltype(hsa_executable_load_agent_code_object)* hsa_executable_load_agent_code_object;
|
||||
decltype(hsa_executable_freeze)* hsa_executable_freeze;
|
||||
decltype(hsa_executable_get_symbol)* hsa_executable_get_symbol;
|
||||
|
||||
decltype(::hsa_system_get_major_extension_table)* hsa_system_get_major_extension_table;
|
||||
decltype(hsa_system_get_info)* hsa_system_get_info;
|
||||
decltype(hsa_system_get_major_extension_table)* hsa_system_get_major_extension_table;
|
||||
|
||||
decltype(::hsa_code_object_reader_create_from_file)* hsa_code_object_reader_create_from_file;
|
||||
decltype(::hsa_executable_create_alt)* hsa_executable_create_alt;
|
||||
decltype(::hsa_executable_load_agent_code_object)* hsa_executable_load_agent_code_object;
|
||||
decltype(::hsa_executable_freeze)* hsa_executable_freeze;
|
||||
decltype(::hsa_executable_get_symbol)* hsa_executable_get_symbol;
|
||||
decltype(hsa_amd_agent_iterate_memory_pools)* hsa_amd_agent_iterate_memory_pools;
|
||||
decltype(hsa_amd_memory_pool_get_info)* hsa_amd_memory_pool_get_info;
|
||||
decltype(hsa_amd_memory_pool_allocate)* hsa_amd_memory_pool_allocate;
|
||||
decltype(hsa_amd_agents_allow_access)* hsa_amd_agents_allow_access;
|
||||
decltype(hsa_amd_memory_async_copy)* hsa_amd_memory_async_copy;
|
||||
|
||||
decltype(::hsa_amd_signal_async_handler)* hsa_amd_signal_async_handler;
|
||||
decltype(::hsa_amd_profiling_get_async_copy_time)* hsa_amd_profiling_get_async_copy_time;
|
||||
decltype(::hsa_amd_profiling_get_dispatch_time)* hsa_amd_profiling_get_dispatch_time;
|
||||
decltype(::hsa_signal_load_relaxed)* hsa_signal_load_relaxed;
|
||||
decltype(::hsa_signal_store_screlease)* hsa_signal_store_screlease;
|
||||
decltype(hsa_amd_signal_async_handler)* hsa_amd_signal_async_handler;
|
||||
decltype(hsa_amd_profiling_set_profiler_enabled)* hsa_amd_profiling_set_profiler_enabled;
|
||||
decltype(hsa_amd_profiling_get_async_copy_time)* hsa_amd_profiling_get_async_copy_time;
|
||||
decltype(hsa_amd_profiling_get_dispatch_time)* hsa_amd_profiling_get_dispatch_time;
|
||||
};
|
||||
|
||||
// Encapsulates information about a Hsa Agent such as its
|
||||
@@ -167,9 +169,9 @@ class HsaTimer {
|
||||
static const timestamp_t TIMESTAMP_MAX = UINT64_MAX;
|
||||
typedef long double freq_t;
|
||||
|
||||
HsaTimer() {
|
||||
HsaTimer(const hsa_pfn_t* hsa_api) : hsa_api_(hsa_api) {
|
||||
timestamp_t sysclock_hz = 0;
|
||||
hsa_status_t status = hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sysclock_hz);
|
||||
hsa_status_t status = hsa_api_->hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sysclock_hz);
|
||||
CHECK_STATUS("hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY)", status);
|
||||
sysclock_factor_ = (freq_t)1000000000 / (freq_t)sysclock_hz;
|
||||
}
|
||||
@@ -185,7 +187,7 @@ class HsaTimer {
|
||||
// Return timestamp in 'ns'
|
||||
timestamp_t timestamp_ns() const {
|
||||
timestamp_t sysclock;
|
||||
hsa_status_t status = hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP, &sysclock);
|
||||
hsa_status_t status = hsa_api_->hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP, &sysclock);
|
||||
CHECK_STATUS("hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP)", status);
|
||||
return sysclock_to_ns(sysclock);
|
||||
}
|
||||
@@ -193,6 +195,8 @@ class HsaTimer {
|
||||
private:
|
||||
// Timestamp frequency factor
|
||||
freq_t sysclock_factor_;
|
||||
// HSA API table
|
||||
const hsa_pfn_t* const hsa_api_;
|
||||
};
|
||||
|
||||
class HsaRsrcFactory {
|
||||
|
||||
@@ -32,6 +32,7 @@ THE SOFTWARE.
|
||||
#include <inc/roctracer_hcc.h>
|
||||
#include <inc/ext/hsa_rt_utils.hpp>
|
||||
#include <src/core/loader.h>
|
||||
#include <src/core/trace_buffer.h>
|
||||
#include <util/xml.h>
|
||||
|
||||
#define PUBLIC_API __attribute__((visibility("default")))
|
||||
@@ -78,6 +79,19 @@ void fatal(const std::string msg) {
|
||||
abort();
|
||||
}
|
||||
|
||||
struct api_trace_entry_t {
|
||||
uint32_t valid;
|
||||
uint32_t type;
|
||||
uint32_t cid;
|
||||
timestamp_t begin;
|
||||
timestamp_t end;
|
||||
uint32_t pid;
|
||||
uint32_t tid;
|
||||
hsa_api_data_t data;
|
||||
};
|
||||
|
||||
roctracer::TraceBuffer<api_trace_entry_t> api_trace_buffer(0x200000);
|
||||
|
||||
// HSA API callback function
|
||||
void hsa_api_callback(
|
||||
uint32_t domain,
|
||||
@@ -87,23 +101,35 @@ void hsa_api_callback(
|
||||
{
|
||||
(void)arg;
|
||||
const hsa_api_data_t* data = reinterpret_cast<const hsa_api_data_t*>(callback_data);
|
||||
|
||||
if (data->phase == ACTIVITY_API_PHASE_ENTER) {
|
||||
hsa_begin_timestamp = timer->timestamp_fn_ns();
|
||||
} else {
|
||||
const timestamp_t end_timestamp = (cid == HSA_API_ID_hsa_shut_down) ? hsa_begin_timestamp : timer->timestamp_fn_ns();
|
||||
std::ostringstream os;
|
||||
os << hsa_begin_timestamp << ":" << end_timestamp << " " << GetPid() << ":" << GetTid() << " " << hsa_api_data_pair_t(cid, *data);
|
||||
fprintf(hsa_api_file_handle, "%s\n", os.str().c_str());
|
||||
api_trace_entry_t* entry = api_trace_buffer.GetEntry();
|
||||
entry->valid = roctracer::TRACE_ENTRY_COMPL;
|
||||
entry->cid = cid;
|
||||
entry->begin = hsa_begin_timestamp;
|
||||
entry->end = end_timestamp;
|
||||
entry->pid = GetPid();
|
||||
entry->tid = GetTid();
|
||||
entry->data = *data;
|
||||
}
|
||||
}
|
||||
|
||||
void hsa_api_flush_cb(api_trace_entry_t* entry) {
|
||||
std::ostringstream os;
|
||||
os << entry->begin << ":" << entry->end << " " << entry->pid << ":" << entry->tid << " " << hsa_api_data_pair_t(entry->cid, entry->data);
|
||||
fprintf(hsa_api_file_handle, "%s\n", os.str().c_str());
|
||||
}
|
||||
|
||||
void hsa_activity_callback(
|
||||
uint32_t op,
|
||||
activity_record_t* record,
|
||||
void* arg)
|
||||
{
|
||||
fprintf(hsa_async_copy_file_handle, "%lu:%lu async-copy%lu\n", record->begin_ns, record->end_ns, record->correlation_id);
|
||||
static uint64_t index = 0;
|
||||
fprintf(hsa_async_copy_file_handle, "%lu:%lu async-copy%lu\n", record->begin_ns, record->end_ns, index);
|
||||
index++;
|
||||
}
|
||||
|
||||
void hip_api_callback(
|
||||
@@ -164,14 +190,14 @@ void hcc_activity_callback(const char* begin, const char* end, void* arg) {
|
||||
const roctracer_record_t* end_record = reinterpret_cast<const roctracer_record_t*>(end);
|
||||
|
||||
while (record < end_record) {
|
||||
const char * name = roctracer_op_string(record->domain, record->op, record->kind);
|
||||
if (record->domain == ACTIVITY_DOMAIN_HCC_OPS) {
|
||||
const char * name = roctracer_op_string(record->domain, record->op, record->kind);
|
||||
fprintf(hcc_activity_file_handle, "%lu:%lu %d:%lu %s:%lu\n",
|
||||
record->begin_ns, record->end_ns, record->device_id, record->queue_id, name, record->correlation_id);
|
||||
} else {
|
||||
#if 0
|
||||
fprintf(stderr, "Bad domain %d\n", record->domain);
|
||||
abort();
|
||||
fprintf(hip_api_file_handle, "%lu:%lu %u:%u %s()\n",
|
||||
record->begin_ns, record->end_ns, record->process_id, record->thread_id, name);
|
||||
#endif
|
||||
}
|
||||
ROCTRACER_CALL(roctracer_next_record(record, &record));
|
||||
@@ -312,8 +338,10 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
|
||||
// initialize HSA tracing
|
||||
roctracer_set_properties(ACTIVITY_DOMAIN_HSA_API, (void*)table);
|
||||
roctracer::hsa_ops_properties_t ops_properties{
|
||||
table,
|
||||
reinterpret_cast<activity_async_callback_t>(hsa_activity_callback),
|
||||
NULL};
|
||||
NULL,
|
||||
output_prefix};
|
||||
roctracer_set_properties(ACTIVITY_DOMAIN_HSA_OPS, &ops_properties);
|
||||
|
||||
fprintf(stdout, " HSA-trace("); fflush(stdout);
|
||||
@@ -340,23 +368,32 @@ extern "C" PUBLIC_API bool OnLoad(HsaApiTable* table, uint64_t runtime_version,
|
||||
fprintf(stdout, " HIP-trace()\n"); fflush(stdout);
|
||||
// Allocating tracing pool
|
||||
roctracer_properties_t properties{};
|
||||
properties.buffer_size = 0x1000;
|
||||
properties.buffer_size = 0x80000;
|
||||
properties.buffer_callback_fun = hcc_activity_callback;
|
||||
ROCTRACER_CALL(roctracer_open_pool(&properties));
|
||||
ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HCC_OPS));
|
||||
ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_API));
|
||||
//ROCTRACER_CALL(roctracer_enable_domain_activity(ACTIVITY_DOMAIN_HIP_API));
|
||||
ROCTRACER_CALL(roctracer_enable_domain_callback(ACTIVITY_DOMAIN_HIP_API, hip_api_callback, NULL));
|
||||
}
|
||||
|
||||
return true;
|
||||
return roctracer_load(table, runtime_version, failed_tool_count, failed_tool_names);
|
||||
}
|
||||
|
||||
// HSA-runtime tool on-unload method
|
||||
extern "C" PUBLIC_API void OnUnload() {
|
||||
static bool is_unloaded = false;
|
||||
if (is_unloaded) {
|
||||
return;
|
||||
}
|
||||
is_unloaded = true;
|
||||
roctracer_unload();
|
||||
|
||||
if (trace_hsa) {
|
||||
ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HSA_API));
|
||||
ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HSA_OPS));
|
||||
|
||||
api_trace_buffer.Flush(0, hsa_api_flush_cb);
|
||||
|
||||
fclose(hsa_api_file_handle);
|
||||
fclose(hsa_async_copy_file_handle);
|
||||
}
|
||||
@@ -364,9 +401,13 @@ extern "C" PUBLIC_API void OnUnload() {
|
||||
ROCTRACER_CALL(roctracer_disable_domain_callback(ACTIVITY_DOMAIN_HIP_API));
|
||||
ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HIP_API));
|
||||
ROCTRACER_CALL(roctracer_disable_domain_activity(ACTIVITY_DOMAIN_HCC_OPS));
|
||||
ROCTRACER_CALL(roctracer_flush_activity());
|
||||
ROCTRACER_CALL(roctracer_close_pool());
|
||||
|
||||
fclose(hip_api_file_handle);
|
||||
fclose(hcc_activity_file_handle);
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" CONSTRUCTOR_API void constructor() {}
|
||||
extern "C" DESTRUCTOR_API void destructor() { OnUnload(); }
|
||||
|
||||
Ссылка в новой задаче
Block a user