JSON kernel name propogation and stats
Change-Id: I60cf4c7608272941e2499bd251850416ac254f32
[ROCm/rocprofiler commit: a5f52b40f5]
Bu işleme şunda yer alıyor:
@@ -172,9 +172,10 @@ usage() {
|
||||
echo " --stats - generating kernel execution stats, file <output name>.stats.csv"
|
||||
echo ""
|
||||
echo " --roctx-trace - to enable rocTX application code annotation trace, \"Markers and Ranges\" JSON trace section."
|
||||
echo " --sys-trace - to trace HIP/HSA APIs and GPU activity, generates stats and JSON trace chrome-tracing compatible"
|
||||
echo " --hip-trace - to trace HIP, generates API execution stats and JSON file chrome-tracing compatible"
|
||||
echo " --hsa-trace - to trace HSA, generates API execution stats and JSON file chrome-tracing compatible"
|
||||
echo " --sys-trace - to trace HIP/HSA APIs and GPU activity, generates stats and JSON trace chrome-tracing compatible"
|
||||
echo " '--hsa-trace' can be used in addition to select activity tracing from HSA (ROCr runtime) level"
|
||||
echo " --kfd-trace - to trace KFD, generates KFD Thunk API execution stats and JSON file chrome-tracing compatible"
|
||||
echo " Generated files: <output name>.<domain>_stats.txt <output name>.json"
|
||||
echo " Traced API list can be set by input .txt or .xml files."
|
||||
|
||||
@@ -69,6 +69,9 @@ class SQLiteDB:
|
||||
cursor.execute('ALTER TABLE %s ADD COLUMN "%s" %s' % (table_name, data_label, data_type))
|
||||
cursor.execute('UPDATE %s SET %s = (%s);' % (table_name, data_label, data_expr))
|
||||
|
||||
def change_rec_name(self, table_name, rec_id, rec_name):
|
||||
self.connection.execute('UPDATE ' + table_name + ' SET Name = ? WHERE "Index" = ?', (rec_name, rec_id))
|
||||
|
||||
# populate DB table entry
|
||||
def insert_entry(self, table, val_list):
|
||||
(cursor, stm) = table
|
||||
@@ -98,8 +101,8 @@ class SQLiteDB:
|
||||
def _get_raws_indexed(self, table_name):
|
||||
cursor = self.connection.execute('SELECT * FROM ' + table_name + ' order by "Index" asc;')
|
||||
return cursor.fetchall()
|
||||
def _get_raw_by_id(self, table_name, req_id):
|
||||
cursor = self.connection.execute('SELECT * FROM ' + table_name + ' WHERE "Index"=?', (req_id,))
|
||||
def _get_raw_by_id(self, table_name, rec_id):
|
||||
cursor = self.connection.execute('SELECT * FROM ' + table_name + ' WHERE "Index"=?', (rec_id,))
|
||||
raws = cursor.fetchall()
|
||||
if len(raws) != 1:
|
||||
raise Exception('Index is not unique, table "' + table_name + '"')
|
||||
|
||||
@@ -24,12 +24,6 @@ import os, sys, re
|
||||
from sqlitedb import SQLiteDB
|
||||
import dform
|
||||
|
||||
# Parsing results in the format:
|
||||
#dispatch[0], queue_index(0), kernel_name("SimpleConvolution"), time(1048928000311041,1048928006154674,1048928006168274,1048928006170503):
|
||||
# GRBM_GUI_ACTIVE (74332)
|
||||
# SQ_WAVES (4096)
|
||||
# SQ_INSTS_VMEM_RD (36864)
|
||||
|
||||
EXT_PID = 0
|
||||
COPY_PID = 1
|
||||
HIP_PID = 2
|
||||
@@ -255,6 +249,15 @@ api_table_descr = [
|
||||
['BeginNs', 'EndNs', 'pid', 'tid', 'Name', 'args', 'Index'],
|
||||
{'BeginNs':'INTEGER', 'EndNs':'INTEGER', 'pid':'INTEGER', 'tid':'INTEGER', 'Name':'TEXT', 'args':'TEXT', 'Index':'INTEGER'}
|
||||
]
|
||||
# Filling API records DB table
|
||||
# table_name - created DB table name
|
||||
# db - DB handle
|
||||
# indir - input directory
|
||||
# api_name - traced API name
|
||||
# api_pid - assigned JSON PID
|
||||
# dep_pid - PID of dependet domain
|
||||
# dep_list - list of dependet dospatch events
|
||||
# dep_filtr - registered dependencies by record ID
|
||||
def fill_api_db(table_name, db, indir, api_name, api_pid, dep_pid, dep_list, dep_filtr, expl_id):
|
||||
global hsa_activity_found
|
||||
copy_raws = []
|
||||
@@ -265,6 +268,8 @@ def fill_api_db(table_name, db, indir, api_name, api_pid, dep_pid, dep_list, dep
|
||||
file_name = indir + '/' + api_name + '_api_trace.txt'
|
||||
ptrn_val = re.compile(r'(\d+):(\d+) (\d+):(\d+) ([^\(]+)(\(.*)$')
|
||||
ptrn_ac = re.compile(r'hsa_amd_memory_async_copy')
|
||||
ptrn1_kernel = re.compile(r'^.*kernel\(');
|
||||
ptrn2_kernel = re.compile(r'\)\) .*$');
|
||||
|
||||
if not os.path.isfile(file_name): return 0
|
||||
|
||||
@@ -280,6 +285,7 @@ def fill_api_db(table_name, db, indir, api_name, api_pid, dep_pid, dep_list, dep
|
||||
if m: START_US = int(m.group(1)) / 1000
|
||||
START_US = 0
|
||||
|
||||
# parsing an input trace file and creating a DB table
|
||||
record_id = 0
|
||||
table_handle = db.add_table(table_name, api_table_descr)
|
||||
with open(file_name, mode='r') as fd:
|
||||
@@ -294,14 +300,17 @@ def fill_api_db(table_name, db, indir, api_name, api_pid, dep_pid, dep_list, dep
|
||||
rec_vals[2] = api_pid
|
||||
rec_vals.append(record_id)
|
||||
db.insert_entry(table_handle, rec_vals)
|
||||
|
||||
# dependencies filling
|
||||
if ptrn_ac.search(rec_vals[4]) or record_id in dep_filtr:
|
||||
beg_ns = int(rec_vals[0])
|
||||
end_ns = int(rec_vals[1])
|
||||
from_us = (beg_ns / 1000) + ((end_ns - beg_ns) / 1000)
|
||||
dep_from_us_list.append(from_us)
|
||||
dep_tid_list.append(int(rec_vals[3]))
|
||||
dep_id_list.append(record_id)
|
||||
dep_id_list.append(record_id)
|
||||
|
||||
# memcopy data
|
||||
if len(copy_raws) != 0:
|
||||
copy_data = list(copy_raws[copy_index])
|
||||
args_str = rec_vals[5]
|
||||
@@ -311,13 +320,23 @@ def fill_api_db(table_name, db, indir, api_name, api_pid, dep_pid, dep_list, dep
|
||||
copy_csv += str(copy_index) + ', ' + copy_line + '\n'
|
||||
copy_index += 1
|
||||
|
||||
# kernel name extraction
|
||||
if record_id in dep_filtr:
|
||||
kernel_name = rec_vals[rec_len - 2]
|
||||
(kernel_name, n_subs) = ptrn1_kernel.subn('', kernel_name, count=1);
|
||||
if n_subs != 0:
|
||||
(kernel_name, n_subs) = ptrn2_kernel.subn(')', kernel_name, count=1)
|
||||
if n_subs != 0: db.change_rec_name('OPS', record_id, '"' + kernel_name + '"')
|
||||
|
||||
record_id += 1
|
||||
else: fatal(api_name + " bad record: '" + record + "'")
|
||||
|
||||
# inserting of dispatch events correlated to the dependent dispatches
|
||||
for (tid, from_ns) in dep_list:
|
||||
db.insert_entry(table_handle, [from_ns, from_ns, api_pid, tid, 'hsa_dispatch', '', record_id])
|
||||
record_id += 1
|
||||
|
||||
# registering dependencies informatino
|
||||
if dep_pid != NONE_PID:
|
||||
if not dep_pid in dep_dict: dep_dict[dep_pid] = {}
|
||||
dep_dict[dep_pid]['pid'] = api_pid
|
||||
@@ -325,6 +344,7 @@ def fill_api_db(table_name, db, indir, api_name, api_pid, dep_pid, dep_list, dep
|
||||
dep_dict[dep_pid]['from'] = dep_from_us_list
|
||||
if expl_id: dep_dict[dep_pid]['id'] = dep_id_list
|
||||
|
||||
# generating memcopy CSV
|
||||
if copy_csv != '':
|
||||
file_name = os.environ['PWD'] + '/results_mcopy.csv'
|
||||
with open(file_name, mode='w') as fd:
|
||||
@@ -376,43 +396,70 @@ ops_table_descr = [
|
||||
['BeginNs', 'EndNs', 'dev-id', 'queue-id', 'Name', 'pid', 'tid', 'Index'],
|
||||
{'Index':'INTEGER', 'Name':'TEXT', 'args':'TEXT', 'BeginNs':'INTEGER', 'EndNs':'INTEGER', 'dev-id':'INTEGER', 'queue-id':'INTEGER', 'pid':'INTEGER', 'tid':'INTEGER'}
|
||||
]
|
||||
def fill_ops_db(table_name, db, indir):
|
||||
def fill_ops_db(kernel_table_name, mcopy_table_name, db, indir):
|
||||
global max_gpu_id
|
||||
file_name = indir + '/' + 'hcc_ops_trace.txt'
|
||||
ptrn_val = re.compile(r'(\d+):(\d+) (\d+):(\d+) (.*)$')
|
||||
ptrn_id = re.compile(r'^[^:]+:(\d+)$')
|
||||
ptrn_id = re.compile(r'^([^:]+):(\d+)$')
|
||||
ptrn_mcopy = re.compile(r'(Memcpy|Copy|Fill)')
|
||||
ptrn_barrier = re.compile(r'Marker')
|
||||
is_barrier = 0
|
||||
|
||||
if not os.path.isfile(file_name): return {}
|
||||
|
||||
filtr = {}
|
||||
|
||||
record_id = 0
|
||||
table_handle = db.add_table(table_name, ops_table_descr)
|
||||
kernel_table_handle = db.add_table(kernel_table_name, ops_table_descr)
|
||||
mcopy_table_handle = db.add_table(mcopy_table_name, ops_table_descr)
|
||||
with open(file_name, mode='r') as fd:
|
||||
for line in fd.readlines():
|
||||
record = line[:-1]
|
||||
m = ptrn_val.match(record)
|
||||
if m:
|
||||
# parsing trace record
|
||||
rec_vals = []
|
||||
for ind in range(1,6): rec_vals.append(m.group(ind))
|
||||
gpu_id = int(rec_vals[2]);
|
||||
if (gpu_id > max_gpu_id): max_gpu_id = gpu_id
|
||||
gpu_pid = GPU_BASE_PID + int(gpu_id)
|
||||
rec_vals.append(gpu_pid)
|
||||
rec_vals.append(0)
|
||||
m = ptrn_id.match(rec_vals[4])
|
||||
label = rec_vals[4] # record name
|
||||
m = ptrn_id.match(label)
|
||||
if not m: fatal("bad hcc ops entry '" + record + "'")
|
||||
corr_id = int(m.group(1)) - 1
|
||||
rec_vals.append(corr_id)
|
||||
name = m.group(1)
|
||||
corr_id = int(m.group(2)) - 1
|
||||
|
||||
# checking name for memcopy pattern
|
||||
if ptrn_mcopy.search(name):
|
||||
table_handle = mcopy_table_handle
|
||||
pid = COPY_PID;
|
||||
else:
|
||||
table_handle = kernel_table_handle
|
||||
|
||||
gpu_id = int(rec_vals[2]);
|
||||
if (gpu_id > max_gpu_id): max_gpu_id = gpu_id
|
||||
pid = GPU_BASE_PID + int(gpu_id)
|
||||
|
||||
if ptrn_barrier.search(name):
|
||||
name = '"<barrier packet>"'
|
||||
is_barrier = 1
|
||||
|
||||
# insert DB record
|
||||
rec_vals[4] = name # Name
|
||||
rec_vals.append(pid) # pid
|
||||
rec_vals.append(0) # tid
|
||||
rec_vals.append(corr_id) # Index
|
||||
db.insert_entry(table_handle, rec_vals)
|
||||
|
||||
# registering a dependency filtr
|
||||
filtr[corr_id] = 1
|
||||
|
||||
if not gpu_pid in dep_dict:
|
||||
dep_dict[gpu_pid] = {}
|
||||
dep_dict[gpu_pid]['to'] = {}
|
||||
dep_dict[gpu_pid]['to'][corr_id] = int(rec_vals[0]) / 1000
|
||||
dep_dict[gpu_pid]['bsp'] = OPS_PID
|
||||
else: fatal("hcc ops bad record: '" + record + "'")
|
||||
# filling a dependency
|
||||
if is_barrier == 0:
|
||||
if not pid in dep_dict: dep_dict[pid] = {}
|
||||
if not 'to' in dep_dict[pid]: dep_dict[pid]['to'] = {}
|
||||
dep_dict[pid]['to'][corr_id] = int(rec_vals[0]) / 1000
|
||||
dep_dict[pid]['bsp'] = OPS_PID
|
||||
|
||||
else:
|
||||
fatal("hcc ops bad record: '" + record + "'")
|
||||
|
||||
return filtr
|
||||
#############################################################
|
||||
@@ -449,6 +496,8 @@ else:
|
||||
hsa_statfile = re.sub(r'\.stats\.csv$', r'.hsa_stats.csv', statfile)
|
||||
hip_statfile = re.sub(r'\.stats\.csv$', r'.hip_stats.csv', statfile)
|
||||
kfd_statfile = re.sub(r'\.stats\.csv$', r'.kfd_stats.csv', statfile)
|
||||
ops_statfile = statfile
|
||||
copy_statfile = re.sub(r'\.stats\.csv$', r'.copy_stats.csv', statfile)
|
||||
|
||||
with open(dbfile, mode='w') as fd: fd.truncate()
|
||||
db = SQLiteDB(dbfile)
|
||||
@@ -460,7 +509,7 @@ else:
|
||||
hsa_activity_found = fill_copy_db('COPY', db, indir)
|
||||
hsa_trace_found = fill_api_db('HSA', db, indir, 'hsa', HSA_PID, COPY_PID, kern_dep_list, {}, 0)
|
||||
|
||||
ops_filtr = fill_ops_db('OPS', db, indir)
|
||||
ops_filtr = fill_ops_db('OPS', 'COPY', db, indir)
|
||||
hip_trace_found = fill_api_db('HIP', db, indir, 'hip', HIP_PID, OPS_PID, [], ops_filtr, 1)
|
||||
|
||||
fill_kernel_db('A', db)
|
||||
@@ -481,8 +530,7 @@ else:
|
||||
if kfd_trace_found:
|
||||
db.label_json(KFD_PID, "CPU KFD API", jsonfile)
|
||||
|
||||
if hsa_activity_found:
|
||||
db.label_json(COPY_PID, "COPY", jsonfile)
|
||||
db.label_json(COPY_PID, "COPY", jsonfile)
|
||||
|
||||
if any_trace_found and max_gpu_id >= 0:
|
||||
for ind in range(0, int(max_gpu_id) + 1):
|
||||
@@ -502,16 +550,18 @@ else:
|
||||
dform.gen_table_bins(db, 'HSA', hsa_statfile, 'Name', 'DurationNs')
|
||||
dform.gen_api_json_trace(db, 'HSA', START_US, jsonfile)
|
||||
|
||||
if hsa_activity_found:
|
||||
dform.post_process_data(db, 'COPY')
|
||||
dform.gen_api_json_trace(db, 'COPY', START_US, jsonfile)
|
||||
dform.post_process_data(db, 'COPY')
|
||||
dform.gen_table_bins(db, 'COPY', copy_statfile, 'Name', 'DurationNs')
|
||||
dform.gen_api_json_trace(db, 'COPY', START_US, jsonfile)
|
||||
|
||||
if hip_trace_found:
|
||||
dform.post_process_data(db, 'HIP')
|
||||
dform.gen_table_bins(db, 'HIP', hip_statfile, 'Name', 'DurationNs')
|
||||
dform.gen_api_json_trace(db, 'HIP', START_US, jsonfile)
|
||||
|
||||
if ops_filtr:
|
||||
dform.post_process_data(db, 'OPS')
|
||||
dform.gen_table_bins(db, 'OPS', ops_statfile, 'Name', 'DurationNs')
|
||||
dform.gen_ops_json_trace(db, 'OPS', GPU_BASE_PID, START_US, jsonfile)
|
||||
|
||||
if kfd_trace_found:
|
||||
|
||||
@@ -295,20 +295,15 @@ class InterceptQueue {
|
||||
static const char* GetKernelName(const uint64_t kernel_symbol) {
|
||||
amd_runtime_loader_debug_info_t* dbg_info =
|
||||
reinterpret_cast<amd_runtime_loader_debug_info_t*>(kernel_symbol);
|
||||
const char* kernel_name = (dbg_info != NULL) ? dbg_info->kernel_name : NULL;
|
||||
return (dbg_info != NULL) ? dbg_info->kernel_name : NULL;
|
||||
}
|
||||
|
||||
// Kernel name is mangled name
|
||||
// apply __cxa_demangle() to demangle it
|
||||
const char* funcname = NULL;
|
||||
if (kernel_name != NULL) {
|
||||
size_t funcnamesize = 0;
|
||||
int status;
|
||||
const char* ret = abi::__cxa_demangle(kernel_name, NULL, &funcnamesize, &status);
|
||||
funcname = (ret != 0) ? ret : strdup(kernel_name);
|
||||
}
|
||||
if (funcname == NULL) funcname = strdup(kernel_none_);
|
||||
|
||||
return funcname;
|
||||
// Demangle C++ symbol name
|
||||
static const char* cpp_demangle(const char* symname) {
|
||||
size_t size = 0;
|
||||
int status;
|
||||
const char* ret = abi::__cxa_demangle(symname, NULL, &size, &status);
|
||||
return (ret != 0) ? ret : strdup(symname);
|
||||
}
|
||||
|
||||
static const char* QueryKernelName(uint64_t kernel_object, const amd_kernel_code_t* kernel_code) {
|
||||
@@ -318,11 +313,10 @@ class InterceptQueue {
|
||||
EXC_ABORT(HSA_STATUS_ERROR, "Error: V3 code object detected - code objects tracking should be enabled\n");
|
||||
}
|
||||
}
|
||||
const char* kernel_name = (util::HsaRsrcFactory::IsExecutableTracking()) ?
|
||||
util::HsaRsrcFactory::GetKernelName(kernel_object) :
|
||||
const char* kernel_symname = (util::HsaRsrcFactory::IsExecutableTracking()) ?
|
||||
util::HsaRsrcFactory::GetKernelNameRef(kernel_object) :
|
||||
GetKernelName(kernel_code->runtime_loader_kernel_symbol);
|
||||
|
||||
return kernel_name;
|
||||
return cpp_demangle(kernel_symname);
|
||||
}
|
||||
|
||||
// method to get an intercept queue object
|
||||
|
||||
@@ -695,14 +695,14 @@ uint64_t HsaRsrcFactory::Submit(hsa_queue_t* queue, const void* packet, size_t s
|
||||
return write_idx;
|
||||
}
|
||||
|
||||
const char* HsaRsrcFactory::GetKernelName(uint64_t addr) {
|
||||
const char* HsaRsrcFactory::GetKernelNameRef(uint64_t addr) {
|
||||
std::lock_guard<mutex_t> lck(mutex_);
|
||||
const auto it = symbols_map_->find(addr);
|
||||
if (it == symbols_map_->end()) {
|
||||
fprintf(stderr, "HsaRsrcFactory::kernel addr (0x%lx) is not found\n", addr);
|
||||
abort();
|
||||
}
|
||||
return strdup(it->second);
|
||||
return it->second;
|
||||
}
|
||||
|
||||
void HsaRsrcFactory::EnableExecutableTracking(HsaApiTable* table) {
|
||||
|
||||
@@ -394,7 +394,7 @@ class HsaRsrcFactory {
|
||||
// Enable executables loading tracking
|
||||
static bool IsExecutableTracking() { return executable_tracking_on_; }
|
||||
static void EnableExecutableTracking(HsaApiTable* table);
|
||||
static const char* GetKernelName(uint64_t addr);
|
||||
static const char* GetKernelNameRef(uint64_t addr);
|
||||
|
||||
// Initialize HSA API table
|
||||
void static InitHsaApiTable(HsaApiTable* table);
|
||||
|
||||
Yeni konuda referans
Bir kullanıcı engelle