Rename /opt/rocm/rocprofiler.so link to proper librocprofiler64.so; Simplify --hsa-trace option just to one, no need with --stats and --timestamp on
Change-Id: I1dccfdd8843437bf9ed10ac259e84b34ef986be3
[ROCm/rocprofiler commit: 862f042c43]
Этот коммит содержится в:
@@ -95,7 +95,7 @@ install ( FILES
|
||||
DESTINATION bin
|
||||
PERMISSIONS OWNER_READ OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE )
|
||||
install ( FILES ${PROJECT_BINARY_DIR}/inc-link DESTINATION ../include RENAME ${ROCPROFILER_NAME} )
|
||||
install ( FILES ${PROJECT_BINARY_DIR}/so-link DESTINATION ../lib RENAME ${ROCPROFILER_NAME}.so )
|
||||
install ( FILES ${PROJECT_BINARY_DIR}/so-link DESTINATION ../lib RENAME ${ROCPROFILER_LIBRARY}.so )
|
||||
install ( FILES ${PROJECT_BINARY_DIR}/rocprof-link DESTINATION ../bin
|
||||
PERMISSIONS OWNER_READ OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE
|
||||
RENAME rocprof )
|
||||
|
||||
@@ -24,6 +24,11 @@ def gen_api_json_trace(db, table, start_us, outfile):
|
||||
db.dump_json('B', table, outfile)
|
||||
db.execute('DROP VIEW B')
|
||||
|
||||
def gen_ops_json_trace(db, table, base_pid, start_us, outfile):
|
||||
db.execute('create view B as select "Index", Name as name, ("gpu-id" + %d) as pid, tid, (BeginNs/1000 - %d) as ts, (DurationNs/1000) as dur from %s order by ts asc;' % (base_pid, start_us, table));
|
||||
db.dump_json('B', table, outfile)
|
||||
db.execute('DROP VIEW B')
|
||||
|
||||
def gen_kernel_json_trace(db, table, base_pid, start_us, outfile):
|
||||
db.execute('create view B as select "Index", KernelName as name, ("gpu-id" + %d) as pid, (0) as tid, (BeginNs/1000 - %d) as ts, (DurationNs/1000) as dur from %s order by ts asc;' % (base_pid, start_us, table));
|
||||
db.dump_json('B', table, outfile)
|
||||
|
||||
@@ -140,9 +140,8 @@ usage() {
|
||||
echo " --ctx-limit <max number> - maximum number of outstanding contexts [0 - unlimited]"
|
||||
echo " --heartbeat <rate sec> - to print progress heartbeats [0 - disabled]"
|
||||
echo ""
|
||||
echo " --stats - generating stats and json trace output"
|
||||
echo " --hsa-trace - to trace HSA"
|
||||
echo " --hip-trace - to trace HIP"
|
||||
echo " --stats - generating kernel executino stats"
|
||||
echo " --hsa-trace - to trace HSA, generates API execution stats and JSON file viewable in chrome tracing"
|
||||
echo ""
|
||||
echo "Configuration file:"
|
||||
echo " You can set your parameters defaults preferences in the configuration file 'rpl_rc.xml'. The search path sequence: .:${HOME}:<package path>"
|
||||
@@ -191,9 +190,25 @@ run() {
|
||||
mkdir -p "$ROCP_OUTPUT_DIR"
|
||||
fi
|
||||
|
||||
API_TRACE=""
|
||||
PRELOAD_LIBS=""
|
||||
if [ "$HSA_TRACE" = 1 ] ; then
|
||||
export ROCTRACER_DOMAIN="hsa"
|
||||
API_TRACE="hsa"
|
||||
fi
|
||||
if [ "$HIP_TRACE" = 1 ] ; then
|
||||
if [ -z "$API_TRACE" ] ; then
|
||||
API_TRACE="hip";
|
||||
else
|
||||
API_TRACE="all"
|
||||
fi
|
||||
if [ -z "$HCC_HOME" ] ; then error "env var HCC_HOME is not defined"; fi
|
||||
PRELOAD_LIBS="$PRELOAD_LIBS $HCC_HOME/lib/libmcwamp_hsa.so"
|
||||
fi
|
||||
if [ -n "$API_TRACE" ] ; then
|
||||
API_TRACE=$(echo $API_TRACE | sed 's/all//')
|
||||
if [ -n "$API_TRACE" ] ; then export ROCTRACER_DOMAIN=$API_TRACE; fi
|
||||
export HSA_TOOLS_LIB="libtracer_tool.so libroctracer64.so $HSA_TOOLS_LIB"
|
||||
PRELOAD_LIBS="$PRELOAD_LIBS $HSA_TOOLS_LIB"
|
||||
fi
|
||||
|
||||
redirection_cmd=""
|
||||
@@ -203,7 +218,8 @@ run() {
|
||||
fi
|
||||
|
||||
#unset ROCP_OUTPUT_DIR
|
||||
eval "LD_PRELOAD='$HSA_TOOLS_LIB' $APP_CMD $redirection_cmd"
|
||||
CMD_LINE="LD_PRELOAD='$PRELOAD_LIBS' $APP_CMD $redirection_cmd"
|
||||
eval "$CMD_LINE"
|
||||
}
|
||||
|
||||
# main
|
||||
@@ -266,18 +282,18 @@ while [ 1 ] ; do
|
||||
export ROCP_OUTSTANDING_MAX="$2"
|
||||
elif [ "$1" = "--heartbeat" ] ; then
|
||||
export ROCP_OUTSTANDING_MON="$2"
|
||||
elif [ "$1" = "--stats" ] ; then
|
||||
ARG_VAL=0
|
||||
export ROCP_TIMESTAMP_ON=1
|
||||
GEN_STATS=1
|
||||
elif [ "$1" = "--hsa-trace" ] ; then
|
||||
ARG_VAL=0
|
||||
export ROCP_TIMESTAMP_ON=1
|
||||
GEN_STATS=1
|
||||
HSA_TRACE=1
|
||||
elif [ "$1" = "--hip-trace" ] ; then
|
||||
ARG_VAL=0
|
||||
HIP_TRACE=1
|
||||
elif [ "$1" = "--verbose" ] ; then
|
||||
ARG_VAL=0
|
||||
export ROCP_VERBOSE_MODE=1
|
||||
elif [ "$1" = "--stats" ] ; then
|
||||
ARG_VAL=0
|
||||
GEN_STATS=1
|
||||
else
|
||||
break
|
||||
fi
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import csv, sqlite3, re, sys
|
||||
from functools import reduce
|
||||
|
||||
# SQLite Database class
|
||||
class SQLiteDB:
|
||||
@@ -112,15 +113,18 @@ class SQLiteDB:
|
||||
with open(file_name, mode='a') as fd:
|
||||
fd.write(',{"args":{"name":"%s"},"ph":"M","pid":%s,"name":"process_name"}\n' %(label, pid));
|
||||
|
||||
def flow_json(self, base_id, from_pid, from_tid, from_us_list, to_pid, to_us_dict, start_us, file_name):
|
||||
def flow_json(self, base_id, from_pid, from_tid, from_us_list, to_pid, to_us_dict, corr_id_list, start_us, file_name):
|
||||
if not re.search(r'\.json$', file_name):
|
||||
raise Exception('wrong output file type: "' + file_name + '"' )
|
||||
with open(file_name, mode='a') as fd:
|
||||
dep_id = base_id
|
||||
for ind in range(len(from_tid)):
|
||||
if (len(corr_id_list) != 0): corr_id = corr_id_list[ind]
|
||||
else: corr_id = ind
|
||||
from_ts = from_us_list[ind] - start_us
|
||||
to_ts = to_us_dict[ind] - start_us
|
||||
fd.write(',{"ts":%d,"ph":"s","cat":"DataFlow","id":%d,"pid":%s,"tid":%d,"name":"dep"}\n' % (from_ts, dep_id, str(from_pid), from_tid[ind]))
|
||||
to_ts = to_us_dict[corr_id] - start_us
|
||||
if from_ts > to_ts: from_ts = to_ts
|
||||
fd.write(',{"ts":%d,"ph":"s","cat":"DataFlow","id":%d,"pid":%s,"tid":%s,"name":"dep"}\n' % (from_ts, dep_id, str(from_pid), from_tid[ind]))
|
||||
fd.write(',{"ts":%d,"ph":"t","cat":"DataFlow","id":%d,"pid":%s,"tid":0,"name":"dep"}\n' % (to_ts, dep_id, str(to_pid)))
|
||||
dep_id += 1
|
||||
|
||||
|
||||
@@ -33,9 +33,11 @@ import dform
|
||||
# SQ_INSTS_VMEM_RD (36864)
|
||||
|
||||
COPY_PID = 0
|
||||
HSA_PID = 1
|
||||
GPU_BASE_PID = 2
|
||||
max_gpu_id = 0
|
||||
OPS_PID = 1
|
||||
HSA_PID = 2
|
||||
HIP_PID = 3
|
||||
GPU_BASE_PID = 4
|
||||
max_gpu_id = -1
|
||||
START_US = 0
|
||||
|
||||
# dependencies dictionary
|
||||
@@ -100,9 +102,9 @@ def parse_res(infile):
|
||||
var_table[dispatch_number][var] = val
|
||||
if not var in var_list: var_list.append(var);
|
||||
if var == 'gpu-id':
|
||||
if (val > max_gpu_id): max_gpu_id = val
|
||||
gpu_id = val
|
||||
if var == 'tid': disp_tid = int(val)
|
||||
gpu_id = int(val)
|
||||
if (gpu_id > max_gpu_id): max_gpu_id = gpu_id
|
||||
if var == 'tid': disp_tid = val
|
||||
else: fatal('wrong kernel property "' + prop + '" in "'+ kernel_properties + '"')
|
||||
m = ts_pattern.search(record)
|
||||
if m:
|
||||
@@ -123,6 +125,7 @@ def parse_res(infile):
|
||||
dep_str['to'][to_id] = to_us
|
||||
dep_str['from'].append(from_us)
|
||||
dep_str['tid'].append(disp_tid)
|
||||
dep_str['pid'] = HSA_PID
|
||||
kern_dep_list.append((disp_tid, m.group(1)))
|
||||
|
||||
inp.close()
|
||||
@@ -181,16 +184,16 @@ hsa_table_descr = [
|
||||
['BeginNs', 'EndNs', 'pid', 'tid', 'Name', 'args', 'Index'],
|
||||
{'Index':'INTEGER', 'Name':'TEXT', 'args':'TEXT', 'BeginNs':'INTEGER', 'EndNs':'INTEGER', 'pid':'INTEGER', 'tid':'INTEGER'}
|
||||
]
|
||||
def fill_hsa_db(table_name, db, indir):
|
||||
file_name = indir + '/' + 'hsa_api_trace.txt'
|
||||
def fill_api_db(table_name, db, indir, api_name, api_pid, dep_pid, dep_list, dep_filtr, expl_id):
|
||||
file_name = indir + '/' + api_name + '_api_trace.txt'
|
||||
ptrn_val = re.compile(r'(\d+):(\d+) (\d+):(\d+) ([^\(]+)(\(.*)$')
|
||||
ptrn_ac = re.compile(r'hsa_amd_memory_async_copy')
|
||||
|
||||
if not os.path.isfile(file_name): return 0
|
||||
|
||||
if not COPY_PID in dep_dict: dep_dict[COPY_PID] = {}
|
||||
dep_tid_list = []
|
||||
dep_from_us_list = []
|
||||
dep_id_list = []
|
||||
|
||||
global START_US
|
||||
with open(file_name, mode='r') as fd:
|
||||
@@ -210,24 +213,28 @@ def fill_hsa_db(table_name, db, indir):
|
||||
rec_vals = []
|
||||
for ind in range(1,7):
|
||||
rec_vals.append(m.group(ind))
|
||||
rec_vals[2] = HSA_PID
|
||||
rec_vals[2] = api_pid
|
||||
rec_vals.append(record_id)
|
||||
db.insert_entry(table_handle, rec_vals)
|
||||
if ptrn_ac.search(rec_vals[4]):
|
||||
if ptrn_ac.search(rec_vals[4]) or record_id in dep_filtr:
|
||||
beg_ns = int(rec_vals[0])
|
||||
end_ns = int(rec_vals[1])
|
||||
from_us = (beg_ns / 1000) + ((end_ns - beg_ns) / 1000)
|
||||
dep_from_us_list.append(from_us)
|
||||
dep_tid_list.append(int(rec_vals[3]))
|
||||
dep_id_list.append(record_id)
|
||||
record_id += 1
|
||||
else: fatal("hsa bad record")
|
||||
|
||||
for (tid, from_ns) in kern_dep_list:
|
||||
db.insert_entry(table_handle, [from_ns, from_ns, HSA_PID, tid, 'hsa_dispatch', '', record_id])
|
||||
for (tid, from_ns) in dep_list:
|
||||
db.insert_entry(table_handle, [from_ns, from_ns, api_pid, tid, 'hsa_dispatch', '', record_id])
|
||||
record_id += 1
|
||||
|
||||
dep_dict[COPY_PID]['tid'] = dep_tid_list
|
||||
dep_dict[COPY_PID]['from'] = dep_from_us_list
|
||||
if not dep_pid in dep_dict: dep_dict[dep_pid] = {}
|
||||
dep_dict[dep_pid]['pid'] = api_pid
|
||||
dep_dict[dep_pid]['tid'] = dep_tid_list
|
||||
dep_dict[dep_pid]['from'] = dep_from_us_list
|
||||
if expl_id: dep_dict[dep_pid]['id'] = dep_id_list
|
||||
|
||||
return 1
|
||||
#############################################################
|
||||
@@ -257,13 +264,57 @@ def fill_copy_db(table_name, db, indir):
|
||||
rec_vals.append(0)
|
||||
m = ptrn_id.match(rec_vals[2])
|
||||
if m: dep_to_us_dict[int(m.group(1))] = int(rec_vals[0]) / 1000
|
||||
else: fatal("async-copy bad name")
|
||||
else: fatal("bad async-copy entry")
|
||||
rec_vals.append(m.group(1))
|
||||
db.insert_entry(table_handle, rec_vals)
|
||||
else: fatal("async-copy bad record")
|
||||
|
||||
dep_dict[COPY_PID]['to'] = dep_to_us_dict
|
||||
#############################################################
|
||||
|
||||
# fill HCC ops DB
|
||||
ops_table_descr = [
|
||||
['BeginNs', 'EndNs', 'dev-id', 'queue-id', 'Name', 'pid', 'tid', 'Index'],
|
||||
{'Index':'INTEGER', 'Name':'TEXT', 'args':'TEXT', 'BeginNs':'INTEGER', 'EndNs':'INTEGER', 'dev-id':'INTEGER', 'queue-id':'INTEGER', 'pid':'INTEGER', 'tid':'INTEGER'}
|
||||
]
|
||||
def fill_ops_db(table_name, db, indir):
|
||||
global max_gpu_id
|
||||
file_name = indir + '/' + 'hcc_ops_trace.txt'
|
||||
ptrn_val = re.compile(r'(\d+):(\d+) (\d+):(\d+) (.*)$')
|
||||
ptrn_id = re.compile(r'^[^:]+:(\d+)$')
|
||||
|
||||
if not os.path.isfile(file_name): return {}
|
||||
|
||||
filtr = {}
|
||||
|
||||
record_id = 0
|
||||
table_handle = db.add_table(table_name, ops_table_descr)
|
||||
with open(file_name, mode='r') as fd:
|
||||
for line in fd.readlines():
|
||||
record = line[:-1]
|
||||
m = ptrn_val.match(record)
|
||||
if m:
|
||||
rec_vals = []
|
||||
for ind in range(1,6): rec_vals.append(m.group(ind))
|
||||
gpu_id = int(rec_vals[2]);
|
||||
if (gpu_id > max_gpu_id): max_gpu_id = gpu_id
|
||||
gpu_pid = GPU_BASE_PID + int(gpu_id)
|
||||
rec_vals.append(gpu_pid)
|
||||
rec_vals.append(0)
|
||||
m = ptrn_id.match(rec_vals[4])
|
||||
if not m: fatal("bad hcc ops entry '" + record + "'")
|
||||
corr_id = int(m.group(1)) - 1
|
||||
rec_vals.append(corr_id)
|
||||
db.insert_entry(table_handle, rec_vals)
|
||||
filtr[corr_id] = 1
|
||||
|
||||
if not gpu_pid in dep_dict: dep_dict[gpu_pid] = {}
|
||||
dep_dict[gpu_pid]['to'][corr_id] = int(rec_vals[0]) / 1000
|
||||
dep_dict[gpu_pid]['bsp'] = OPS_PID
|
||||
else: fatal("async-copy bad record")
|
||||
|
||||
return filtr
|
||||
#############################################################
|
||||
# main
|
||||
if (len(sys.argv) < 3): fatal("Usage: " + sys.argv[0] + " <output CSV file> <input result files list>")
|
||||
|
||||
@@ -294,45 +345,77 @@ else:
|
||||
|
||||
with open(dbfile, mode='w') as fd: fd.truncate()
|
||||
db = SQLiteDB(dbfile)
|
||||
db.open_json(jsonfile);
|
||||
|
||||
hsa_trace_found = fill_hsa_db('HSA', db, indir)
|
||||
hsa_trace_found = fill_api_db('HSA', db, indir, 'hsa', HSA_PID, COPY_PID, kern_dep_list, {}, 0)
|
||||
if hsa_trace_found:
|
||||
fill_copy_db('COPY', db, indir)
|
||||
fill_copy_db('COPY', db, indir)
|
||||
|
||||
ops_filtr = fill_ops_db('OPS', db, indir)
|
||||
hip_trace_found = fill_api_db('HIP', db, indir, 'hip', HIP_PID, OPS_PID, [], ops_filtr, 1)
|
||||
|
||||
fill_kernel_db('A', db)
|
||||
|
||||
any_trace_found = hsa_trace_found | hip_trace_found
|
||||
if any_trace_found:
|
||||
db.open_json(jsonfile)
|
||||
|
||||
if hsa_trace_found:
|
||||
db.label_json(HSA_PID, "CPU", jsonfile)
|
||||
db.label_json(HSA_PID, "CPU HSA API", jsonfile)
|
||||
db.label_json(COPY_PID, "COPY", jsonfile)
|
||||
|
||||
for ind in range(0, int(max_gpu_id) + 1):
|
||||
db.label_json(int(ind) + int(GPU_BASE_PID), "GPU" + str(ind), jsonfile)
|
||||
if hip_trace_found:
|
||||
db.label_json(HIP_PID, "CPU HIP API", jsonfile)
|
||||
|
||||
if 'BeginNs' in var_list:
|
||||
dform.post_process_data(db, 'A', csvfile)
|
||||
dform.gen_table_bins(db, 'A', statfile, 'KernelName', 'DurationNs')
|
||||
if any_trace_found and max_gpu_id >= 0:
|
||||
for ind in range(0, int(max_gpu_id) + 1):
|
||||
db.label_json(int(ind) + int(GPU_BASE_PID), "GPU" + str(ind), jsonfile)
|
||||
|
||||
dform.post_process_data(db, 'A', csvfile)
|
||||
dform.gen_table_bins(db, 'A', statfile, 'KernelName', 'DurationNs')
|
||||
if hsa_trace_found and 'BeginNs' in var_list:
|
||||
dform.gen_kernel_json_trace(db, 'A', GPU_BASE_PID, START_US, jsonfile)
|
||||
else:
|
||||
db.dump_csv('A', csvfile)
|
||||
|
||||
if hsa_trace_found:
|
||||
statfile = re.sub(r'stats', r'hsa_stats', statfile)
|
||||
dform.post_process_data(db, 'HSA')
|
||||
dform.gen_table_bins(db, 'HSA', statfile, 'Name', 'DurationNs')
|
||||
dform.gen_api_json_trace(db, 'HSA', START_US, jsonfile)
|
||||
|
||||
|
||||
dform.post_process_data(db, 'COPY')
|
||||
dform.gen_api_json_trace(db, 'COPY', START_US, jsonfile)
|
||||
|
||||
dep_id = 0
|
||||
for (to_pid, dep_str) in dep_dict.items():
|
||||
tid_list = dep_str['tid']
|
||||
from_us_list = dep_str['from']
|
||||
to_us_dict = dep_str['to']
|
||||
db.flow_json(dep_id, HSA_PID, tid_list, from_us_list, to_pid, to_us_dict, START_US, jsonfile)
|
||||
dep_id += len(tid_list)
|
||||
if hip_trace_found:
|
||||
statfile = re.sub(r'stats', r'hip_stats', statfile)
|
||||
dform.post_process_data(db, 'HIP')
|
||||
dform.gen_table_bins(db, 'HIP', statfile, 'Name', 'DurationNs')
|
||||
dform.gen_api_json_trace(db, 'HIP', START_US, jsonfile)
|
||||
|
||||
dform.post_process_data(db, 'OPS')
|
||||
dform.gen_ops_json_trace(db, 'OPS', GPU_BASE_PID, START_US, jsonfile)
|
||||
|
||||
db.close_json(jsonfile);
|
||||
if any_trace_found:
|
||||
for (to_pid, dep_str) in dep_dict.items():
|
||||
if 'bsp' in dep_str:
|
||||
bspid = dep_str['bsp']
|
||||
base_str = dep_dict[bspid]
|
||||
for v in ('pid', 'tid', 'from', 'id'):
|
||||
dep_str[v] = base_str[v]
|
||||
base_str['inv'] = 1
|
||||
|
||||
dep_id = 0
|
||||
for (to_pid, dep_str) in dep_dict.items():
|
||||
if 'inv' in dep_str: continue
|
||||
from_pid = dep_str['pid']
|
||||
tid_list = dep_str['tid']
|
||||
from_us_list = dep_str['from']
|
||||
to_us_dict = dep_str['to']
|
||||
corr_id_list = []
|
||||
if 'id' in dep_str: corr_id_list = dep_str['id']
|
||||
db.flow_json(dep_id, from_pid, tid_list, from_us_list, to_pid, to_us_dict, corr_id_list, START_US, jsonfile)
|
||||
dep_id += len(tid_list)
|
||||
|
||||
if any_trace_found:
|
||||
db.close_json(jsonfile);
|
||||
db.close()
|
||||
|
||||
sys.exit(0)
|
||||
|
||||
Ссылка в новой задаче
Block a user