From be894504b07f416eaaa392365ea4b78bea22f316 Mon Sep 17 00:00:00 2001 From: Evgeny Date: Fri, 1 Feb 2019 22:37:59 -0600 Subject: [PATCH] Rename /opt/rocm/rocprofiler.so link to proper librocprofiler64.so; Simplify --hsa-trace option just to one, no need with --stats and --timestamp on Change-Id: I1dccfdd8843437bf9ed10ac259e84b34ef986be3 [ROCm/rocprofiler commit: 862f042c4340687a200d54f97d6ca83311bf2af8] --- projects/rocprofiler/CMakeLists.txt | 2 +- projects/rocprofiler/bin/dform.py | 5 + projects/rocprofiler/bin/rpl_run.sh | 38 +++++-- projects/rocprofiler/bin/sqlitedb.py | 10 +- projects/rocprofiler/bin/tblextr.py | 155 ++++++++++++++++++++------- 5 files changed, 159 insertions(+), 51 deletions(-) diff --git a/projects/rocprofiler/CMakeLists.txt b/projects/rocprofiler/CMakeLists.txt index 8205a8437e..18bbee130d 100644 --- a/projects/rocprofiler/CMakeLists.txt +++ b/projects/rocprofiler/CMakeLists.txt @@ -95,7 +95,7 @@ install ( FILES DESTINATION bin PERMISSIONS OWNER_READ OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE ) install ( FILES ${PROJECT_BINARY_DIR}/inc-link DESTINATION ../include RENAME ${ROCPROFILER_NAME} ) -install ( FILES ${PROJECT_BINARY_DIR}/so-link DESTINATION ../lib RENAME ${ROCPROFILER_NAME}.so ) +install ( FILES ${PROJECT_BINARY_DIR}/so-link DESTINATION ../lib RENAME ${ROCPROFILER_LIBRARY}.so ) install ( FILES ${PROJECT_BINARY_DIR}/rocprof-link DESTINATION ../bin PERMISSIONS OWNER_READ OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE RENAME rocprof ) diff --git a/projects/rocprofiler/bin/dform.py b/projects/rocprofiler/bin/dform.py index 107188fa0a..5fc8d6fc76 100644 --- a/projects/rocprofiler/bin/dform.py +++ b/projects/rocprofiler/bin/dform.py @@ -24,6 +24,11 @@ def gen_api_json_trace(db, table, start_us, outfile): db.dump_json('B', table, outfile) db.execute('DROP VIEW B') +def gen_ops_json_trace(db, table, base_pid, start_us, outfile): + db.execute('create view B as select "Index", Name as name, ("gpu-id" + %d) as pid, tid, (BeginNs/1000 - %d) as ts, (DurationNs/1000) as dur from %s order by ts asc;' % (base_pid, start_us, table)); + db.dump_json('B', table, outfile) + db.execute('DROP VIEW B') + def gen_kernel_json_trace(db, table, base_pid, start_us, outfile): db.execute('create view B as select "Index", KernelName as name, ("gpu-id" + %d) as pid, (0) as tid, (BeginNs/1000 - %d) as ts, (DurationNs/1000) as dur from %s order by ts asc;' % (base_pid, start_us, table)); db.dump_json('B', table, outfile) diff --git a/projects/rocprofiler/bin/rpl_run.sh b/projects/rocprofiler/bin/rpl_run.sh index 7d3dffac7d..f7437dac37 100755 --- a/projects/rocprofiler/bin/rpl_run.sh +++ b/projects/rocprofiler/bin/rpl_run.sh @@ -140,9 +140,8 @@ usage() { echo " --ctx-limit - maximum number of outstanding contexts [0 - unlimited]" echo " --heartbeat - to print progress heartbeats [0 - disabled]" echo "" - echo " --stats - generating stats and json trace output" - echo " --hsa-trace - to trace HSA" - echo " --hip-trace - to trace HIP" + echo " --stats - generating kernel executino stats" + echo " --hsa-trace - to trace HSA, generates API execution stats and JSON file viewable in chrome tracing" echo "" echo "Configuration file:" echo " You can set your parameters defaults preferences in the configuration file 'rpl_rc.xml'. The search path sequence: .:${HOME}:" @@ -191,9 +190,25 @@ run() { mkdir -p "$ROCP_OUTPUT_DIR" fi + API_TRACE="" + PRELOAD_LIBS="" if [ "$HSA_TRACE" = 1 ] ; then - export ROCTRACER_DOMAIN="hsa" + API_TRACE="hsa" + fi + if [ "$HIP_TRACE" = 1 ] ; then + if [ -z "$API_TRACE" ] ; then + API_TRACE="hip"; + else + API_TRACE="all" + fi + if [ -z "$HCC_HOME" ] ; then error "env var HCC_HOME is not defined"; fi + PRELOAD_LIBS="$PRELOAD_LIBS $HCC_HOME/lib/libmcwamp_hsa.so" + fi + if [ -n "$API_TRACE" ] ; then + API_TRACE=$(echo $API_TRACE | sed 's/all//') + if [ -n "$API_TRACE" ] ; then export ROCTRACER_DOMAIN=$API_TRACE; fi export HSA_TOOLS_LIB="libtracer_tool.so libroctracer64.so $HSA_TOOLS_LIB" + PRELOAD_LIBS="$PRELOAD_LIBS $HSA_TOOLS_LIB" fi redirection_cmd="" @@ -203,7 +218,8 @@ run() { fi #unset ROCP_OUTPUT_DIR - eval "LD_PRELOAD='$HSA_TOOLS_LIB' $APP_CMD $redirection_cmd" + CMD_LINE="LD_PRELOAD='$PRELOAD_LIBS' $APP_CMD $redirection_cmd" + eval "$CMD_LINE" } # main @@ -266,18 +282,18 @@ while [ 1 ] ; do export ROCP_OUTSTANDING_MAX="$2" elif [ "$1" = "--heartbeat" ] ; then export ROCP_OUTSTANDING_MON="$2" + elif [ "$1" = "--stats" ] ; then + ARG_VAL=0 + export ROCP_TIMESTAMP_ON=1 + GEN_STATS=1 elif [ "$1" = "--hsa-trace" ] ; then ARG_VAL=0 + export ROCP_TIMESTAMP_ON=1 + GEN_STATS=1 HSA_TRACE=1 - elif [ "$1" = "--hip-trace" ] ; then - ARG_VAL=0 - HIP_TRACE=1 elif [ "$1" = "--verbose" ] ; then ARG_VAL=0 export ROCP_VERBOSE_MODE=1 - elif [ "$1" = "--stats" ] ; then - ARG_VAL=0 - GEN_STATS=1 else break fi diff --git a/projects/rocprofiler/bin/sqlitedb.py b/projects/rocprofiler/bin/sqlitedb.py index bfcf12fae9..295fe7a7a4 100644 --- a/projects/rocprofiler/bin/sqlitedb.py +++ b/projects/rocprofiler/bin/sqlitedb.py @@ -1,4 +1,5 @@ import csv, sqlite3, re, sys +from functools import reduce # SQLite Database class class SQLiteDB: @@ -112,15 +113,18 @@ class SQLiteDB: with open(file_name, mode='a') as fd: fd.write(',{"args":{"name":"%s"},"ph":"M","pid":%s,"name":"process_name"}\n' %(label, pid)); - def flow_json(self, base_id, from_pid, from_tid, from_us_list, to_pid, to_us_dict, start_us, file_name): + def flow_json(self, base_id, from_pid, from_tid, from_us_list, to_pid, to_us_dict, corr_id_list, start_us, file_name): if not re.search(r'\.json$', file_name): raise Exception('wrong output file type: "' + file_name + '"' ) with open(file_name, mode='a') as fd: dep_id = base_id for ind in range(len(from_tid)): + if (len(corr_id_list) != 0): corr_id = corr_id_list[ind] + else: corr_id = ind from_ts = from_us_list[ind] - start_us - to_ts = to_us_dict[ind] - start_us - fd.write(',{"ts":%d,"ph":"s","cat":"DataFlow","id":%d,"pid":%s,"tid":%d,"name":"dep"}\n' % (from_ts, dep_id, str(from_pid), from_tid[ind])) + to_ts = to_us_dict[corr_id] - start_us + if from_ts > to_ts: from_ts = to_ts + fd.write(',{"ts":%d,"ph":"s","cat":"DataFlow","id":%d,"pid":%s,"tid":%s,"name":"dep"}\n' % (from_ts, dep_id, str(from_pid), from_tid[ind])) fd.write(',{"ts":%d,"ph":"t","cat":"DataFlow","id":%d,"pid":%s,"tid":0,"name":"dep"}\n' % (to_ts, dep_id, str(to_pid))) dep_id += 1 diff --git a/projects/rocprofiler/bin/tblextr.py b/projects/rocprofiler/bin/tblextr.py index aff6d6a880..4c4cc7820d 100755 --- a/projects/rocprofiler/bin/tblextr.py +++ b/projects/rocprofiler/bin/tblextr.py @@ -33,9 +33,11 @@ import dform # SQ_INSTS_VMEM_RD (36864) COPY_PID = 0 -HSA_PID = 1 -GPU_BASE_PID = 2 -max_gpu_id = 0 +OPS_PID = 1 +HSA_PID = 2 +HIP_PID = 3 +GPU_BASE_PID = 4 +max_gpu_id = -1 START_US = 0 # dependencies dictionary @@ -100,9 +102,9 @@ def parse_res(infile): var_table[dispatch_number][var] = val if not var in var_list: var_list.append(var); if var == 'gpu-id': - if (val > max_gpu_id): max_gpu_id = val - gpu_id = val - if var == 'tid': disp_tid = int(val) + gpu_id = int(val) + if (gpu_id > max_gpu_id): max_gpu_id = gpu_id + if var == 'tid': disp_tid = val else: fatal('wrong kernel property "' + prop + '" in "'+ kernel_properties + '"') m = ts_pattern.search(record) if m: @@ -123,6 +125,7 @@ def parse_res(infile): dep_str['to'][to_id] = to_us dep_str['from'].append(from_us) dep_str['tid'].append(disp_tid) + dep_str['pid'] = HSA_PID kern_dep_list.append((disp_tid, m.group(1))) inp.close() @@ -181,16 +184,16 @@ hsa_table_descr = [ ['BeginNs', 'EndNs', 'pid', 'tid', 'Name', 'args', 'Index'], {'Index':'INTEGER', 'Name':'TEXT', 'args':'TEXT', 'BeginNs':'INTEGER', 'EndNs':'INTEGER', 'pid':'INTEGER', 'tid':'INTEGER'} ] -def fill_hsa_db(table_name, db, indir): - file_name = indir + '/' + 'hsa_api_trace.txt' +def fill_api_db(table_name, db, indir, api_name, api_pid, dep_pid, dep_list, dep_filtr, expl_id): + file_name = indir + '/' + api_name + '_api_trace.txt' ptrn_val = re.compile(r'(\d+):(\d+) (\d+):(\d+) ([^\(]+)(\(.*)$') ptrn_ac = re.compile(r'hsa_amd_memory_async_copy') if not os.path.isfile(file_name): return 0 - if not COPY_PID in dep_dict: dep_dict[COPY_PID] = {} dep_tid_list = [] dep_from_us_list = [] + dep_id_list = [] global START_US with open(file_name, mode='r') as fd: @@ -210,24 +213,28 @@ def fill_hsa_db(table_name, db, indir): rec_vals = [] for ind in range(1,7): rec_vals.append(m.group(ind)) - rec_vals[2] = HSA_PID + rec_vals[2] = api_pid rec_vals.append(record_id) db.insert_entry(table_handle, rec_vals) - if ptrn_ac.search(rec_vals[4]): + if ptrn_ac.search(rec_vals[4]) or record_id in dep_filtr: beg_ns = int(rec_vals[0]) end_ns = int(rec_vals[1]) from_us = (beg_ns / 1000) + ((end_ns - beg_ns) / 1000) dep_from_us_list.append(from_us) dep_tid_list.append(int(rec_vals[3])) + dep_id_list.append(record_id) record_id += 1 else: fatal("hsa bad record") - for (tid, from_ns) in kern_dep_list: - db.insert_entry(table_handle, [from_ns, from_ns, HSA_PID, tid, 'hsa_dispatch', '', record_id]) + for (tid, from_ns) in dep_list: + db.insert_entry(table_handle, [from_ns, from_ns, api_pid, tid, 'hsa_dispatch', '', record_id]) record_id += 1 - dep_dict[COPY_PID]['tid'] = dep_tid_list - dep_dict[COPY_PID]['from'] = dep_from_us_list + if not dep_pid in dep_dict: dep_dict[dep_pid] = {} + dep_dict[dep_pid]['pid'] = api_pid + dep_dict[dep_pid]['tid'] = dep_tid_list + dep_dict[dep_pid]['from'] = dep_from_us_list + if expl_id: dep_dict[dep_pid]['id'] = dep_id_list return 1 ############################################################# @@ -257,13 +264,57 @@ def fill_copy_db(table_name, db, indir): rec_vals.append(0) m = ptrn_id.match(rec_vals[2]) if m: dep_to_us_dict[int(m.group(1))] = int(rec_vals[0]) / 1000 - else: fatal("async-copy bad name") + else: fatal("bad async-copy entry") rec_vals.append(m.group(1)) db.insert_entry(table_handle, rec_vals) else: fatal("async-copy bad record") dep_dict[COPY_PID]['to'] = dep_to_us_dict ############################################################# + +# fill HCC ops DB +ops_table_descr = [ + ['BeginNs', 'EndNs', 'dev-id', 'queue-id', 'Name', 'pid', 'tid', 'Index'], + {'Index':'INTEGER', 'Name':'TEXT', 'args':'TEXT', 'BeginNs':'INTEGER', 'EndNs':'INTEGER', 'dev-id':'INTEGER', 'queue-id':'INTEGER', 'pid':'INTEGER', 'tid':'INTEGER'} +] +def fill_ops_db(table_name, db, indir): + global max_gpu_id + file_name = indir + '/' + 'hcc_ops_trace.txt' + ptrn_val = re.compile(r'(\d+):(\d+) (\d+):(\d+) (.*)$') + ptrn_id = re.compile(r'^[^:]+:(\d+)$') + + if not os.path.isfile(file_name): return {} + + filtr = {} + + record_id = 0 + table_handle = db.add_table(table_name, ops_table_descr) + with open(file_name, mode='r') as fd: + for line in fd.readlines(): + record = line[:-1] + m = ptrn_val.match(record) + if m: + rec_vals = [] + for ind in range(1,6): rec_vals.append(m.group(ind)) + gpu_id = int(rec_vals[2]); + if (gpu_id > max_gpu_id): max_gpu_id = gpu_id + gpu_pid = GPU_BASE_PID + int(gpu_id) + rec_vals.append(gpu_pid) + rec_vals.append(0) + m = ptrn_id.match(rec_vals[4]) + if not m: fatal("bad hcc ops entry '" + record + "'") + corr_id = int(m.group(1)) - 1 + rec_vals.append(corr_id) + db.insert_entry(table_handle, rec_vals) + filtr[corr_id] = 1 + + if not gpu_pid in dep_dict: dep_dict[gpu_pid] = {} + dep_dict[gpu_pid]['to'][corr_id] = int(rec_vals[0]) / 1000 + dep_dict[gpu_pid]['bsp'] = OPS_PID + else: fatal("async-copy bad record") + + return filtr +############################################################# # main if (len(sys.argv) < 3): fatal("Usage: " + sys.argv[0] + " ") @@ -294,45 +345,77 @@ else: with open(dbfile, mode='w') as fd: fd.truncate() db = SQLiteDB(dbfile) - db.open_json(jsonfile); - hsa_trace_found = fill_hsa_db('HSA', db, indir) + hsa_trace_found = fill_api_db('HSA', db, indir, 'hsa', HSA_PID, COPY_PID, kern_dep_list, {}, 0) if hsa_trace_found: - fill_copy_db('COPY', db, indir) + fill_copy_db('COPY', db, indir) + + ops_filtr = fill_ops_db('OPS', db, indir) + hip_trace_found = fill_api_db('HIP', db, indir, 'hip', HIP_PID, OPS_PID, [], ops_filtr, 1) + fill_kernel_db('A', db) + any_trace_found = hsa_trace_found | hip_trace_found + if any_trace_found: + db.open_json(jsonfile) + if hsa_trace_found: - db.label_json(HSA_PID, "CPU", jsonfile) + db.label_json(HSA_PID, "CPU HSA API", jsonfile) db.label_json(COPY_PID, "COPY", jsonfile) - for ind in range(0, int(max_gpu_id) + 1): - db.label_json(int(ind) + int(GPU_BASE_PID), "GPU" + str(ind), jsonfile) + if hip_trace_found: + db.label_json(HIP_PID, "CPU HIP API", jsonfile) - if 'BeginNs' in var_list: - dform.post_process_data(db, 'A', csvfile) - dform.gen_table_bins(db, 'A', statfile, 'KernelName', 'DurationNs') + if any_trace_found and max_gpu_id >= 0: + for ind in range(0, int(max_gpu_id) + 1): + db.label_json(int(ind) + int(GPU_BASE_PID), "GPU" + str(ind), jsonfile) + + dform.post_process_data(db, 'A', csvfile) + dform.gen_table_bins(db, 'A', statfile, 'KernelName', 'DurationNs') + if hsa_trace_found and 'BeginNs' in var_list: dform.gen_kernel_json_trace(db, 'A', GPU_BASE_PID, START_US, jsonfile) - else: - db.dump_csv('A', csvfile) if hsa_trace_found: statfile = re.sub(r'stats', r'hsa_stats', statfile) dform.post_process_data(db, 'HSA') dform.gen_table_bins(db, 'HSA', statfile, 'Name', 'DurationNs') dform.gen_api_json_trace(db, 'HSA', START_US, jsonfile) - + dform.post_process_data(db, 'COPY') dform.gen_api_json_trace(db, 'COPY', START_US, jsonfile) - dep_id = 0 - for (to_pid, dep_str) in dep_dict.items(): - tid_list = dep_str['tid'] - from_us_list = dep_str['from'] - to_us_dict = dep_str['to'] - db.flow_json(dep_id, HSA_PID, tid_list, from_us_list, to_pid, to_us_dict, START_US, jsonfile) - dep_id += len(tid_list) + if hip_trace_found: + statfile = re.sub(r'stats', r'hip_stats', statfile) + dform.post_process_data(db, 'HIP') + dform.gen_table_bins(db, 'HIP', statfile, 'Name', 'DurationNs') + dform.gen_api_json_trace(db, 'HIP', START_US, jsonfile) + + dform.post_process_data(db, 'OPS') + dform.gen_ops_json_trace(db, 'OPS', GPU_BASE_PID, START_US, jsonfile) - db.close_json(jsonfile); + if any_trace_found: + for (to_pid, dep_str) in dep_dict.items(): + if 'bsp' in dep_str: + bspid = dep_str['bsp'] + base_str = dep_dict[bspid] + for v in ('pid', 'tid', 'from', 'id'): + dep_str[v] = base_str[v] + base_str['inv'] = 1 + + dep_id = 0 + for (to_pid, dep_str) in dep_dict.items(): + if 'inv' in dep_str: continue + from_pid = dep_str['pid'] + tid_list = dep_str['tid'] + from_us_list = dep_str['from'] + to_us_dict = dep_str['to'] + corr_id_list = [] + if 'id' in dep_str: corr_id_list = dep_str['id'] + db.flow_json(dep_id, from_pid, tid_list, from_us_list, to_pid, to_us_dict, corr_id_list, START_US, jsonfile) + dep_id += len(tid_list) + + if any_trace_found: + db.close_json(jsonfile); db.close() sys.exit(0)