Rename /opt/rocm/rocprofiler.so link to proper librocprofiler64.so; Simplify --hsa-trace option just to one, no need with --stats and --timestamp on

Change-Id: I1dccfdd8843437bf9ed10ac259e84b34ef986be3


[ROCm/rocprofiler commit: 862f042c43]
Этот коммит содержится в:
Evgeny
2019-02-01 22:37:59 -06:00
родитель 5b415caaa4
Коммит be894504b0
5 изменённых файлов: 159 добавлений и 51 удалений
+1 -1
Просмотреть файл
@@ -95,7 +95,7 @@ install ( FILES
DESTINATION bin
PERMISSIONS OWNER_READ OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE )
install ( FILES ${PROJECT_BINARY_DIR}/inc-link DESTINATION ../include RENAME ${ROCPROFILER_NAME} )
install ( FILES ${PROJECT_BINARY_DIR}/so-link DESTINATION ../lib RENAME ${ROCPROFILER_NAME}.so )
install ( FILES ${PROJECT_BINARY_DIR}/so-link DESTINATION ../lib RENAME ${ROCPROFILER_LIBRARY}.so )
install ( FILES ${PROJECT_BINARY_DIR}/rocprof-link DESTINATION ../bin
PERMISSIONS OWNER_READ OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE
RENAME rocprof )
+5
Просмотреть файл
@@ -24,6 +24,11 @@ def gen_api_json_trace(db, table, start_us, outfile):
db.dump_json('B', table, outfile)
db.execute('DROP VIEW B')
def gen_ops_json_trace(db, table, base_pid, start_us, outfile):
db.execute('create view B as select "Index", Name as name, ("gpu-id" + %d) as pid, tid, (BeginNs/1000 - %d) as ts, (DurationNs/1000) as dur from %s order by ts asc;' % (base_pid, start_us, table));
db.dump_json('B', table, outfile)
db.execute('DROP VIEW B')
def gen_kernel_json_trace(db, table, base_pid, start_us, outfile):
db.execute('create view B as select "Index", KernelName as name, ("gpu-id" + %d) as pid, (0) as tid, (BeginNs/1000 - %d) as ts, (DurationNs/1000) as dur from %s order by ts asc;' % (base_pid, start_us, table));
db.dump_json('B', table, outfile)
+27 -11
Просмотреть файл
@@ -140,9 +140,8 @@ usage() {
echo " --ctx-limit <max number> - maximum number of outstanding contexts [0 - unlimited]"
echo " --heartbeat <rate sec> - to print progress heartbeats [0 - disabled]"
echo ""
echo " --stats - generating stats and json trace output"
echo " --hsa-trace - to trace HSA"
echo " --hip-trace - to trace HIP"
echo " --stats - generating kernel executino stats"
echo " --hsa-trace - to trace HSA, generates API execution stats and JSON file viewable in chrome tracing"
echo ""
echo "Configuration file:"
echo " You can set your parameters defaults preferences in the configuration file 'rpl_rc.xml'. The search path sequence: .:${HOME}:<package path>"
@@ -191,9 +190,25 @@ run() {
mkdir -p "$ROCP_OUTPUT_DIR"
fi
API_TRACE=""
PRELOAD_LIBS=""
if [ "$HSA_TRACE" = 1 ] ; then
export ROCTRACER_DOMAIN="hsa"
API_TRACE="hsa"
fi
if [ "$HIP_TRACE" = 1 ] ; then
if [ -z "$API_TRACE" ] ; then
API_TRACE="hip";
else
API_TRACE="all"
fi
if [ -z "$HCC_HOME" ] ; then error "env var HCC_HOME is not defined"; fi
PRELOAD_LIBS="$PRELOAD_LIBS $HCC_HOME/lib/libmcwamp_hsa.so"
fi
if [ -n "$API_TRACE" ] ; then
API_TRACE=$(echo $API_TRACE | sed 's/all//')
if [ -n "$API_TRACE" ] ; then export ROCTRACER_DOMAIN=$API_TRACE; fi
export HSA_TOOLS_LIB="libtracer_tool.so libroctracer64.so $HSA_TOOLS_LIB"
PRELOAD_LIBS="$PRELOAD_LIBS $HSA_TOOLS_LIB"
fi
redirection_cmd=""
@@ -203,7 +218,8 @@ run() {
fi
#unset ROCP_OUTPUT_DIR
eval "LD_PRELOAD='$HSA_TOOLS_LIB' $APP_CMD $redirection_cmd"
CMD_LINE="LD_PRELOAD='$PRELOAD_LIBS' $APP_CMD $redirection_cmd"
eval "$CMD_LINE"
}
# main
@@ -266,18 +282,18 @@ while [ 1 ] ; do
export ROCP_OUTSTANDING_MAX="$2"
elif [ "$1" = "--heartbeat" ] ; then
export ROCP_OUTSTANDING_MON="$2"
elif [ "$1" = "--stats" ] ; then
ARG_VAL=0
export ROCP_TIMESTAMP_ON=1
GEN_STATS=1
elif [ "$1" = "--hsa-trace" ] ; then
ARG_VAL=0
export ROCP_TIMESTAMP_ON=1
GEN_STATS=1
HSA_TRACE=1
elif [ "$1" = "--hip-trace" ] ; then
ARG_VAL=0
HIP_TRACE=1
elif [ "$1" = "--verbose" ] ; then
ARG_VAL=0
export ROCP_VERBOSE_MODE=1
elif [ "$1" = "--stats" ] ; then
ARG_VAL=0
GEN_STATS=1
else
break
fi
+7 -3
Просмотреть файл
@@ -1,4 +1,5 @@
import csv, sqlite3, re, sys
from functools import reduce
# SQLite Database class
class SQLiteDB:
@@ -112,15 +113,18 @@ class SQLiteDB:
with open(file_name, mode='a') as fd:
fd.write(',{"args":{"name":"%s"},"ph":"M","pid":%s,"name":"process_name"}\n' %(label, pid));
def flow_json(self, base_id, from_pid, from_tid, from_us_list, to_pid, to_us_dict, start_us, file_name):
def flow_json(self, base_id, from_pid, from_tid, from_us_list, to_pid, to_us_dict, corr_id_list, start_us, file_name):
if not re.search(r'\.json$', file_name):
raise Exception('wrong output file type: "' + file_name + '"' )
with open(file_name, mode='a') as fd:
dep_id = base_id
for ind in range(len(from_tid)):
if (len(corr_id_list) != 0): corr_id = corr_id_list[ind]
else: corr_id = ind
from_ts = from_us_list[ind] - start_us
to_ts = to_us_dict[ind] - start_us
fd.write(',{"ts":%d,"ph":"s","cat":"DataFlow","id":%d,"pid":%s,"tid":%d,"name":"dep"}\n' % (from_ts, dep_id, str(from_pid), from_tid[ind]))
to_ts = to_us_dict[corr_id] - start_us
if from_ts > to_ts: from_ts = to_ts
fd.write(',{"ts":%d,"ph":"s","cat":"DataFlow","id":%d,"pid":%s,"tid":%s,"name":"dep"}\n' % (from_ts, dep_id, str(from_pid), from_tid[ind]))
fd.write(',{"ts":%d,"ph":"t","cat":"DataFlow","id":%d,"pid":%s,"tid":0,"name":"dep"}\n' % (to_ts, dep_id, str(to_pid)))
dep_id += 1
+119 -36
Просмотреть файл
@@ -33,9 +33,11 @@ import dform
# SQ_INSTS_VMEM_RD (36864)
COPY_PID = 0
HSA_PID = 1
GPU_BASE_PID = 2
max_gpu_id = 0
OPS_PID = 1
HSA_PID = 2
HIP_PID = 3
GPU_BASE_PID = 4
max_gpu_id = -1
START_US = 0
# dependencies dictionary
@@ -100,9 +102,9 @@ def parse_res(infile):
var_table[dispatch_number][var] = val
if not var in var_list: var_list.append(var);
if var == 'gpu-id':
if (val > max_gpu_id): max_gpu_id = val
gpu_id = val
if var == 'tid': disp_tid = int(val)
gpu_id = int(val)
if (gpu_id > max_gpu_id): max_gpu_id = gpu_id
if var == 'tid': disp_tid = val
else: fatal('wrong kernel property "' + prop + '" in "'+ kernel_properties + '"')
m = ts_pattern.search(record)
if m:
@@ -123,6 +125,7 @@ def parse_res(infile):
dep_str['to'][to_id] = to_us
dep_str['from'].append(from_us)
dep_str['tid'].append(disp_tid)
dep_str['pid'] = HSA_PID
kern_dep_list.append((disp_tid, m.group(1)))
inp.close()
@@ -181,16 +184,16 @@ hsa_table_descr = [
['BeginNs', 'EndNs', 'pid', 'tid', 'Name', 'args', 'Index'],
{'Index':'INTEGER', 'Name':'TEXT', 'args':'TEXT', 'BeginNs':'INTEGER', 'EndNs':'INTEGER', 'pid':'INTEGER', 'tid':'INTEGER'}
]
def fill_hsa_db(table_name, db, indir):
file_name = indir + '/' + 'hsa_api_trace.txt'
def fill_api_db(table_name, db, indir, api_name, api_pid, dep_pid, dep_list, dep_filtr, expl_id):
file_name = indir + '/' + api_name + '_api_trace.txt'
ptrn_val = re.compile(r'(\d+):(\d+) (\d+):(\d+) ([^\(]+)(\(.*)$')
ptrn_ac = re.compile(r'hsa_amd_memory_async_copy')
if not os.path.isfile(file_name): return 0
if not COPY_PID in dep_dict: dep_dict[COPY_PID] = {}
dep_tid_list = []
dep_from_us_list = []
dep_id_list = []
global START_US
with open(file_name, mode='r') as fd:
@@ -210,24 +213,28 @@ def fill_hsa_db(table_name, db, indir):
rec_vals = []
for ind in range(1,7):
rec_vals.append(m.group(ind))
rec_vals[2] = HSA_PID
rec_vals[2] = api_pid
rec_vals.append(record_id)
db.insert_entry(table_handle, rec_vals)
if ptrn_ac.search(rec_vals[4]):
if ptrn_ac.search(rec_vals[4]) or record_id in dep_filtr:
beg_ns = int(rec_vals[0])
end_ns = int(rec_vals[1])
from_us = (beg_ns / 1000) + ((end_ns - beg_ns) / 1000)
dep_from_us_list.append(from_us)
dep_tid_list.append(int(rec_vals[3]))
dep_id_list.append(record_id)
record_id += 1
else: fatal("hsa bad record")
for (tid, from_ns) in kern_dep_list:
db.insert_entry(table_handle, [from_ns, from_ns, HSA_PID, tid, 'hsa_dispatch', '', record_id])
for (tid, from_ns) in dep_list:
db.insert_entry(table_handle, [from_ns, from_ns, api_pid, tid, 'hsa_dispatch', '', record_id])
record_id += 1
dep_dict[COPY_PID]['tid'] = dep_tid_list
dep_dict[COPY_PID]['from'] = dep_from_us_list
if not dep_pid in dep_dict: dep_dict[dep_pid] = {}
dep_dict[dep_pid]['pid'] = api_pid
dep_dict[dep_pid]['tid'] = dep_tid_list
dep_dict[dep_pid]['from'] = dep_from_us_list
if expl_id: dep_dict[dep_pid]['id'] = dep_id_list
return 1
#############################################################
@@ -257,13 +264,57 @@ def fill_copy_db(table_name, db, indir):
rec_vals.append(0)
m = ptrn_id.match(rec_vals[2])
if m: dep_to_us_dict[int(m.group(1))] = int(rec_vals[0]) / 1000
else: fatal("async-copy bad name")
else: fatal("bad async-copy entry")
rec_vals.append(m.group(1))
db.insert_entry(table_handle, rec_vals)
else: fatal("async-copy bad record")
dep_dict[COPY_PID]['to'] = dep_to_us_dict
#############################################################
# fill HCC ops DB
ops_table_descr = [
['BeginNs', 'EndNs', 'dev-id', 'queue-id', 'Name', 'pid', 'tid', 'Index'],
{'Index':'INTEGER', 'Name':'TEXT', 'args':'TEXT', 'BeginNs':'INTEGER', 'EndNs':'INTEGER', 'dev-id':'INTEGER', 'queue-id':'INTEGER', 'pid':'INTEGER', 'tid':'INTEGER'}
]
def fill_ops_db(table_name, db, indir):
global max_gpu_id
file_name = indir + '/' + 'hcc_ops_trace.txt'
ptrn_val = re.compile(r'(\d+):(\d+) (\d+):(\d+) (.*)$')
ptrn_id = re.compile(r'^[^:]+:(\d+)$')
if not os.path.isfile(file_name): return {}
filtr = {}
record_id = 0
table_handle = db.add_table(table_name, ops_table_descr)
with open(file_name, mode='r') as fd:
for line in fd.readlines():
record = line[:-1]
m = ptrn_val.match(record)
if m:
rec_vals = []
for ind in range(1,6): rec_vals.append(m.group(ind))
gpu_id = int(rec_vals[2]);
if (gpu_id > max_gpu_id): max_gpu_id = gpu_id
gpu_pid = GPU_BASE_PID + int(gpu_id)
rec_vals.append(gpu_pid)
rec_vals.append(0)
m = ptrn_id.match(rec_vals[4])
if not m: fatal("bad hcc ops entry '" + record + "'")
corr_id = int(m.group(1)) - 1
rec_vals.append(corr_id)
db.insert_entry(table_handle, rec_vals)
filtr[corr_id] = 1
if not gpu_pid in dep_dict: dep_dict[gpu_pid] = {}
dep_dict[gpu_pid]['to'][corr_id] = int(rec_vals[0]) / 1000
dep_dict[gpu_pid]['bsp'] = OPS_PID
else: fatal("async-copy bad record")
return filtr
#############################################################
# main
if (len(sys.argv) < 3): fatal("Usage: " + sys.argv[0] + " <output CSV file> <input result files list>")
@@ -294,45 +345,77 @@ else:
with open(dbfile, mode='w') as fd: fd.truncate()
db = SQLiteDB(dbfile)
db.open_json(jsonfile);
hsa_trace_found = fill_hsa_db('HSA', db, indir)
hsa_trace_found = fill_api_db('HSA', db, indir, 'hsa', HSA_PID, COPY_PID, kern_dep_list, {}, 0)
if hsa_trace_found:
fill_copy_db('COPY', db, indir)
fill_copy_db('COPY', db, indir)
ops_filtr = fill_ops_db('OPS', db, indir)
hip_trace_found = fill_api_db('HIP', db, indir, 'hip', HIP_PID, OPS_PID, [], ops_filtr, 1)
fill_kernel_db('A', db)
any_trace_found = hsa_trace_found | hip_trace_found
if any_trace_found:
db.open_json(jsonfile)
if hsa_trace_found:
db.label_json(HSA_PID, "CPU", jsonfile)
db.label_json(HSA_PID, "CPU HSA API", jsonfile)
db.label_json(COPY_PID, "COPY", jsonfile)
for ind in range(0, int(max_gpu_id) + 1):
db.label_json(int(ind) + int(GPU_BASE_PID), "GPU" + str(ind), jsonfile)
if hip_trace_found:
db.label_json(HIP_PID, "CPU HIP API", jsonfile)
if 'BeginNs' in var_list:
dform.post_process_data(db, 'A', csvfile)
dform.gen_table_bins(db, 'A', statfile, 'KernelName', 'DurationNs')
if any_trace_found and max_gpu_id >= 0:
for ind in range(0, int(max_gpu_id) + 1):
db.label_json(int(ind) + int(GPU_BASE_PID), "GPU" + str(ind), jsonfile)
dform.post_process_data(db, 'A', csvfile)
dform.gen_table_bins(db, 'A', statfile, 'KernelName', 'DurationNs')
if hsa_trace_found and 'BeginNs' in var_list:
dform.gen_kernel_json_trace(db, 'A', GPU_BASE_PID, START_US, jsonfile)
else:
db.dump_csv('A', csvfile)
if hsa_trace_found:
statfile = re.sub(r'stats', r'hsa_stats', statfile)
dform.post_process_data(db, 'HSA')
dform.gen_table_bins(db, 'HSA', statfile, 'Name', 'DurationNs')
dform.gen_api_json_trace(db, 'HSA', START_US, jsonfile)
dform.post_process_data(db, 'COPY')
dform.gen_api_json_trace(db, 'COPY', START_US, jsonfile)
dep_id = 0
for (to_pid, dep_str) in dep_dict.items():
tid_list = dep_str['tid']
from_us_list = dep_str['from']
to_us_dict = dep_str['to']
db.flow_json(dep_id, HSA_PID, tid_list, from_us_list, to_pid, to_us_dict, START_US, jsonfile)
dep_id += len(tid_list)
if hip_trace_found:
statfile = re.sub(r'stats', r'hip_stats', statfile)
dform.post_process_data(db, 'HIP')
dform.gen_table_bins(db, 'HIP', statfile, 'Name', 'DurationNs')
dform.gen_api_json_trace(db, 'HIP', START_US, jsonfile)
dform.post_process_data(db, 'OPS')
dform.gen_ops_json_trace(db, 'OPS', GPU_BASE_PID, START_US, jsonfile)
db.close_json(jsonfile);
if any_trace_found:
for (to_pid, dep_str) in dep_dict.items():
if 'bsp' in dep_str:
bspid = dep_str['bsp']
base_str = dep_dict[bspid]
for v in ('pid', 'tid', 'from', 'id'):
dep_str[v] = base_str[v]
base_str['inv'] = 1
dep_id = 0
for (to_pid, dep_str) in dep_dict.items():
if 'inv' in dep_str: continue
from_pid = dep_str['pid']
tid_list = dep_str['tid']
from_us_list = dep_str['from']
to_us_dict = dep_str['to']
corr_id_list = []
if 'id' in dep_str: corr_id_list = dep_str['id']
db.flow_json(dep_id, from_pid, tid_list, from_us_list, to_pid, to_us_dict, corr_id_list, START_US, jsonfile)
dep_id += len(tid_list)
if any_trace_found:
db.close_json(jsonfile);
db.close()
sys.exit(0)