################################################################################ # Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. ################################################################################ import os import sys import re import subprocess import bisect from sqlitedb import SQLiteDB from mem_manager import MemManager import dform mcopy_data_enabled = 0 EXT_PID = 0 COPY_PID = 1 HIP_PID = 2 HSA_PID = 3 OPS_PID = 5 GPU_BASE_PID = 6 NONE_PID = -1 max_gpu_id = -1 START_NS = 0 hsa_activity_found = 0 # dependencies dictionary dep_dict = {} kern_dep_list = [] last_hip_api_map = {} hip_streams = [] from_ids = {} # stream ID map stream_counter = 0 stream_id_map = {} def get_stream_index(stream_id): global stream_counter stream_ind = 0 if stream_id.lower() != "nil": if not stream_id in stream_id_map: stream_counter += 1 stream_ind = stream_counter stream_id_map[stream_id] = stream_ind else: stream_ind = stream_id_map[stream_id] return stream_ind # patching activity records def activity_record_patching( db, ops_table_name, kernel_found, kernel_name, stream_found, stream_ind, select_expr ): if kernel_found != 0: db.change_rec_fld(ops_table_name, 'Name = "' + kernel_name + '"', select_expr) if stream_found != 0: db.change_rec_fld(ops_table_name, "tid = " + str(stream_ind), select_expr) # global vars table_descr = [["Index", "KernelName"], {"Index": "INTEGER", "KernelName": "TEXT"}] var_list = table_descr[0] var_table = {} ############################################################# def fatal(msg): sys.stderr.write(sys.argv[0] + ": " + msg + "\n") sys.exit(1) dbglog_count = 0 def dbglog(msg): global dbglog_count dbglog_count += 1 sys.stderr.write(sys.argv[0] + ": " + msg + "\n") fatal("error") ############################################################# # Dumping sysinfo sysinfo_begin = 1 def metadata_gen(sysinfo_file, sysinfo_cmd): global sysinfo_begin if not re.search(r"\.txt$", sysinfo_file): raise Exception('wrong output file type: "' + sysinfo_file + '"') if sysinfo_begin == 1: sysinfo_begin = 0 with open(sysinfo_file, mode="w") as fd: fd.write("") with open(sysinfo_file, mode="a") as fd: fd.write("CMD: " + sysinfo_cmd + "\n") status = subprocess.call( sysinfo_cmd + " >> " + sysinfo_file, stderr=subprocess.STDOUT, shell=True ) if status != 0: raise Exception('Could not run command: "' + sysinfo_cmd + '"') # parse results method def parse_res(infile): global max_gpu_id if not os.path.isfile(infile): return inp = open(infile, "r", errors="replace") beg_pattern = re.compile(r'^dispatch\[(\d*)\], (.*) kernel-name\("([^"]*)"\)') prop_pattern = re.compile(r"([\w-]+)\((\w+)\)") ts_pattern = re.compile(r", time\((\d*),(\d*),(\d*),(\d*)\)") # var pattern below matches a variable name and a variable value from a one # line text in the format of for example "WRITE_SIZE (0.2500000000)" or # "GRBM_GUI_ACTIVE (27867)" or "TA_TA_BUSY[0]" var_pattern = re.compile(r"^\s*([a-zA-Z0-9_]+(?:\[\d+\])?)\s+\((\d+(?:\.\d+)?)\)") pid_pattern = re.compile(r"pid\((\d*)\)") dispatch_number = 0 var_table_pid = 0 for line in inp.readlines(): record = line[:-1] m = pid_pattern.search(record) if m and not os.getenv("ROCP_MERGE_PIDS"): var_table_pid = int(m.group(1)) m = var_pattern.match(record) if m: if not (var_table_pid, dispatch_number) in var_table: fatal("Error: dispatch number not found '" + str(dispatch_number) + "'") var = m.group(1) val = m.group(2) var_table[(var_table_pid, dispatch_number)][var] = val if not var in var_list: var_list.append(var) m = beg_pattern.match(record) if m: dispatch_number = m.group(1) if not (var_table_pid, dispatch_number) in var_table: var_table[(var_table_pid, dispatch_number)] = { "Index": dispatch_number, "KernelName": '"' + m.group(3) + '"', } gpu_id = 0 queue_id = 0 disp_pid = 0 disp_tid = 0 kernel_properties = m.group(2) for prop in kernel_properties.split(", "): m = prop_pattern.match(prop) if m: var = m.group(1) val = m.group(2) var_table[(var_table_pid, dispatch_number)][var] = val if not var in var_list: var_list.append(var) if var == "gpu-id": gpu_id = int(val) if gpu_id > max_gpu_id: max_gpu_id = gpu_id if var == "queue-id": queue_id = int(val) if var == "pid": disp_pid = int(val) if var == "tid": disp_tid = int(val) else: fatal( 'wrong kernel property "' + prop + '" in "' + kernel_properties + '"' ) m = ts_pattern.search(record) if m: var_table[(var_table_pid, dispatch_number)]["DispatchNs"] = m.group(1) var_table[(var_table_pid, dispatch_number)]["BeginNs"] = m.group(2) var_table[(var_table_pid, dispatch_number)]["EndNs"] = m.group(3) var_table[(var_table_pid, dispatch_number)]["CompleteNs"] = m.group(4) # filling dependenciws from_ns = int(m.group(1)) to_ns = int(m.group(2)) from_us = int((from_ns - START_NS) / 1000) to_us = int((to_ns - START_NS) / 1000) kern_dep_list.append((from_ns, disp_pid, disp_tid)) gpu_pid = GPU_BASE_PID + int(gpu_id) if not disp_pid in dep_dict: dep_dict[disp_pid] = {} dep_proc = dep_dict[disp_pid] if not gpu_pid in dep_proc: dep_proc[gpu_pid] = { "pid": HSA_PID, "from": [], "to": {}, "id": [], } dep_str = dep_proc[gpu_pid] to_id = len(dep_str["from"]) dep_str["from"].append((from_us, disp_tid, disp_tid)) dep_str["to"][to_id] = to_us ## inp.close() ############################################################# # Comparator to sort a dictionary of tuples. This comparator will convert # the second element of tuple to an int and return the new tuple. Then # the dictionary can use the default comparison i.e sort by first element, # then sort by second element. def tuple_comparator(tupleElem): return tupleElem[0], int(tupleElem[1]) # merge results table def merge_table(): global var_list keys = sorted(var_table.keys(), key=tuple_comparator) fields = set(var_table[keys[0]]) if "DispatchNs" in fields: var_list.append("DispatchNs") var_list.append("BeginNs") var_list.append("EndNs") var_list.append("CompleteNs") var_list = [x for x in var_list if x in fields] ############################################################# # dump CSV results def dump_csv(file_name): global var_list keys = sorted(var_table.keys(), key=tuple_comparator) with open(file_name, mode="w") as fd: fd.write(",".join(var_list) + "\n") for pid, ind in keys: entry = var_table[(pid, ind)] dispatch_number = entry["Index"] if ind != dispatch_number: fatal("Dispatch #" + ind + " index mismatch (" + dispatch_number + ")\n") val_list = [entry[var] if (var in entry) else 'None' for var in var_list] fd.write(",".join(val_list) + "\n") print("File '" + file_name + "' is generating") ############################################################# # fill kernels DB def fill_kernel_db(table_name, db): global var_list keys = sorted(var_table.keys(), key=tuple_comparator) for var in set(var_list).difference(set(table_descr[1])): table_descr[1][var] = "INTEGER" table_descr[0] = var_list table_handle = db.add_table(table_name, table_descr) for pid, ind in keys: entry = var_table[(pid, ind)] dispatch_number = entry["Index"] if ind != dispatch_number: fatal("Dispatch #" + ind + " index mismatch (" + dispatch_number + ")\n") val_list = [entry[var] for var in var_list] db.insert_entry(table_handle, val_list) ############################################################# # Fill Ext DB ext_table_descr = [ ["BeginNs", "EndNs", "pid", "tid", "Name", "Index", "__section", "__lane"], { "BeginNs": "INTEGER", "EndNs": "INTEGER", "pid": "INTEGER", "tid": "INTEGER", "Name": "TEXT", "Index": "INTEGER", "__section": "INTEGER", "__lane": "INTEGER", }, ] def fill_ext_db(table_name, db, indir, trace_name, api_pid): global range_data file_name = indir + "/" + trace_name + "_trace.txt" # tms pid:tid cid:rid:'.....' ptrn_val = re.compile(r'(\d+) (\d+):(\d+) (\d+):(\d+):"(.*)"$') range_data = {} range_stack = {} range_map = {} if not os.path.isfile(file_name): return 0 record_id = 0 table_handle = db.add_table(table_name, ext_table_descr) with open(file_name, mode="r", errors="replace") as fd: for line in fd.readlines(): record = line[:-1] m = ptrn_val.match(record) if m: tms = int(m.group(1)) pid = m.group(2) tid = int(m.group(3)) cid = int(m.group(4)) rid = int(m.group(5)) msg = m.group(6) rec_vals = [] if not tid in range_data: range_data[tid] = {} if cid != 2: rec_vals.append(tms) rec_vals.append(tms + 1) rec_vals.append(pid) rec_vals.append(tid) rec_vals.append(msg) rec_vals.append(record_id) rec_vals.append(api_pid) # __section rec_vals.append(tid) # __lane if cid == 1: if not pid in range_stack: range_stack[pid] = {} pid_stack = range_stack[pid] if not tid in pid_stack: pid_stack[tid] = [] rec_stack = pid_stack[tid] rec_stack.append(rec_vals) continue if cid == 2: if not pid in range_stack: fatal("ROCTX range begin not found, pid(" + pid + ")") pid_stack = range_stack[pid] if not tid in pid_stack: fatal("ROCTX range begin not found, tid(" + tid + ")") rec_stack = pid_stack[tid] rec_vals = rec_stack.pop() rec_vals[1] = tms # record the range's start/stop timestamps, its parent (ranges can be nested), and its message. range_start = rec_vals[0] range_stop = tms range_parent = rec_stack[-1][0] if len(rec_stack) != 0 else 0 range_msg = rec_vals[4] range_data[tid][range_start] = (range_stop, range_parent, range_msg) # range start if cid == 3: range_map[rid] = (tms, msg) continue # range stop if cid == 4: if rid in range_map: # querying start timestamp if rid exists (tms, msg) = range_map[rid] del range_map[rid] else: fatal("range id(" + str(rid) + ") is not found") rec_vals[0] = tms # begin timestamp rec_vals[4] = msg # range message rec_vals[7] = 0 # 0 lane for ranges db.insert_entry(table_handle, rec_vals) record_id += 1 return 1 ############################################################# # arguments manipulation routines def get_field(args, field): if args == None: return (None, 0) ptrn1_field = re.compile(r"^.* " + field + r"\(") ptrn2_field = re.compile(r"\) .*$") ptrn3_field = re.compile(r"\)\)$") (field_name, n) = ptrn1_field.subn("", args, count=1) if n != 0: (field_name, n) = ptrn2_field.subn("", field_name, count=1) if n == 0: (field_name, n) = ptrn3_field.subn("", field_name, count=1) return (field_name, n) def set_field(args, field, val): return re.subn( field + r"\(\w+\)([ \)])", field + "(" + str(val) + ")\\1", args, count=1 ) hsa_patch_data = {} ops_patch_data = {} # Fill API DB api_table_descr = [ [ "BeginNs", "EndNs", "pid", "tid", "Name", "args", "Index", "Data", "__section", "__lane", ], { "BeginNs": "INTEGER", "EndNs": "INTEGER", "pid": "INTEGER", "tid": "INTEGER", "Name": "TEXT", "args": "TEXT", "Index": "INTEGER", "Data": "TEXT", "__section": "INTEGER", "__lane": "INTEGER", }, ] # Filling API records DB table # table_name - created DB table name # db - DB handle # indir - input directory # api_name - traced API name # api_pid - assigned JSON PID # dep_pid - PID of dependet domain # dep_list - list of dependet dospatch events # dep_filtr - registered dependencies by record ID def fill_api_db( table_name, db, indir, api_name, api_pid, dep_pid, dep_list, dep_filtr, expl_id ): global hsa_activity_found global memory_manager range_start_times = {} copy_csv = "" # Matches normal API records. ptrn_api_record = re.compile(r"(\d+):(\d+) (\d+):(\d+) ([^\(]+)(\(.*)$") # Matches records with a function name of "unknown" and no parameters. # Capture groups 1-4 should match the same information as in ptrn_api_record. # Used to avoid modifying ptrn_api_record regex. ptrn_api_record_unknown = re.compile(r"(\d+):(\d+) (\d+):(\d+) (unknown).*$") hip_mcopy_ptrn = re.compile(r"hipMemcpy|hipMemset") hip_wait_event_ptrn = re.compile(r"WaitEvent") hip_sync_event_ptrn = re.compile(r"hipStreamSynchronize") hip_sync_dev_event_ptrn = re.compile(r"hipDeviceSynchronize") hip_graph_ptrn = re.compile(r"hipGraphLaunch") wait_event_ptrn = re.compile(r"WaitEvent|hipStreamSynchronize|hipDeviceSynchronize") hip_stream_wait_write_ptrn = re.compile( r"hipStreamWaitValue64|hipStreamWriteValue64|hipStreamWaitValue32|hipStreamWriteValue32" ) prop_pattern = re.compile(r"([\w-]+)\((\w+)\)") beg_pattern = re.compile(r'^dispatch\[(\d*)\], (.*) kernel-name\("([^"]*)"\)') hip_strm_cr_event_ptrn = re.compile(r"hipStreamCreate") hsa_mcopy_ptrn = re.compile(r"hsa_amd_memory_async_copy") ptrn_fixformat = re.compile(r"(\d+:\d+ \d+:\d+ \w+)\(\s*(.*)\)$") ptrn_fixkernel = re.compile(r"\s+kernel=(.*)$") ptrn_multi_kernel = re.compile(r"(.*):(\d+)$") ptrn_corr_id = re.compile(r"\ :(\d*)$") file_name = indir + "/" + api_name + "_api_trace.txt" if not os.path.isfile(file_name): return 0 hsa_copy_file_name = indir + "/" + "async_copy_trace.txt" hsa_copy_file_name_present = 1 if os.path.isfile(file_name) else 0 hsa_copy_deps = 1 if (api_pid == HSA_PID and hsa_copy_file_name_present == 1) else 0 print("hsa_copy_deps: " + str(hsa_copy_deps)) # parsing an input trace file and creating a DB table record_id_dict = {} table_handle = db.add_table(table_name, api_table_descr) with open(file_name, mode="r", errors="replace") as fd: file_lines = fd.readlines() total_lines = len(file_lines) line_index = 0 for line in file_lines: if (line_index == total_lines - 1) or (line_index % 100 == 0): sys.stdout.write( "\rscan " + api_name + " API data " + str(line_index) + ":" + str(total_lines) + " " * 100 ) line_index += 1 record = line[:-1] corr_id = 0 m = ptrn_corr_id.search(record) if m: corr_id = int(m.group(1)) record = ptrn_corr_id.sub("", record) kernel_arg = "" m = ptrn_fixkernel.search(record) if m: kernel_arg = "kernel(" + m.group(1) + ") " record = ptrn_fixkernel.sub("", record) mfixformat = ptrn_fixformat.match(record) if mfixformat: # replace '=' in args with parentheses reformated_args = ( kernel_arg + mfixformat.group(2) .replace("=", "(") .replace(",", ")") .replace("\\", "\\\\") .replace('"', '\\"') + ")" ) record = mfixformat.group(1) + "( " + reformated_args + ")" m = ptrn_api_record.match(record) if not m: m = ptrn_api_record_unknown.match(record) if not m: fatal(api_name + " bad record: '" + record + "'") rec_vals = [] rec_len = len(api_table_descr[0]) - 3 for ind in range(1, rec_len): try: rec_vals.append(m.group(ind)) except IndexError: rec_vals.append(None) proc_id = int(rec_vals[2]) thread_id = int(rec_vals[3]) record_name = rec_vals[4] # record_args is optional, and may be None if an unknown record is found. record_args = rec_vals[5] # incrementing per-process record id/correlation id if not proc_id in record_id_dict: record_id_dict[proc_id] = 0 record_id_dict[proc_id] += 1 record_id = record_id_dict[proc_id] # setting correlationid to record id if correlation id is not defined if corr_id == 0: corr_id = record_id rec_vals.append(corr_id) # extracting/converting stream id (stream_id, stream_found) = get_field(record_args, "stream") if stream_found: stream_id = get_stream_index(stream_id) (rec_vals[5], found) = set_field(record_args, "stream", stream_id) if found == 0: fatal( 'set_field() failed for "stream", args: "' + record_args + '"' ) else: (stream_id, stream_found) = get_field(record_args, "hStream") if stream_found: stream_id = get_stream_index(stream_id) (rec_vals[5], found) = set_field( record_args, "hStream", stream_id ) if found == 0: fatal( 'set_field() failed for "stream", args: "' + record_args + '"' ) else: stream_id = 0 if hip_strm_cr_event_ptrn.match(record_name): hip_streams.append(stream_id) if hip_sync_event_ptrn.match(record_name): if (proc_id, stream_id) in last_hip_api_map: (last_hip_api_corr_id, last_hip_api_from_pid) = last_hip_api_map[ (proc_id, stream_id) ][-1] sync_api_beg_us = int((int(rec_vals[0]) - START_NS) / 1000) if not proc_id in dep_dict: dep_dict[proc_id] = {} if HIP_PID not in dep_dict[proc_id]: dep_dict[proc_id][HIP_PID] = { "pid": last_hip_api_from_pid, "from": [], "to": {}, "id": [], } dep_dict[proc_id][HIP_PID]["from"].append( (-1, stream_id, thread_id) ) dep_dict[proc_id][HIP_PID]["id"].append(last_hip_api_corr_id) dep_dict[proc_id][HIP_PID]["to"][ last_hip_api_corr_id ] = sync_api_beg_us from_ids[(last_hip_api_corr_id, proc_id)] = ( len(dep_dict[proc_id][HIP_PID]["from"]) - 1 ) m = beg_pattern.match(record) gpu_id = 0 if m: kernel_properties = m.group(2) for prop in kernel_properties.split(", "): m = prop_pattern.match(prop) if m: val = m.group(2) var = m.group(1) if var == "gpu-id": gpu_id = int(val) if hsa_mcopy_ptrn.match(record_name) or hip_mcopy_ptrn.match(record_name): ops_section_id = COPY_PID else: ops_section_id = GPU_BASE_PID + int(gpu_id) if (proc_id, stream_id) not in last_hip_api_map: last_hip_api_map[(proc_id, stream_id)] = [] last_hip_api_map[(proc_id, stream_id)].append((corr_id, ops_section_id)) # asyncronous opeartion API found op_found = 0 mcopy_found = 0 # extract kernel name string (kernel_str, kernel_found) = get_field(record_args, "kernel") if kernel_found == 0: kernel_str = "" else: op_found = 1 if hip_mcopy_ptrn.match(record_name): mcopy_found = 1 op_found = 1 # HIP Graph API if hip_graph_ptrn.search(record_name): op_found = 1 # HIP WaitEvent API if wait_event_ptrn.search(record_name): op_found = 1 if hip_stream_wait_write_ptrn.search(record_name): op_found = 1 # HSA memcopy API if hsa_mcopy_ptrn.match(record_name): mcopy_found = 1 op_found = 1 stream_id = thread_id hsa_patch_data[(corr_id, proc_id)] = thread_id if op_found: roctx_msg = "" if not thread_id in range_start_times: range_start_times[thread_id] = ( sorted(range_data[thread_id].keys()) if thread_id in range_data else [] ) start_times = range_start_times[thread_id] index = bisect.bisect_right(start_times, int(rec_vals[0])) if index > 0: # We found the range that is closest to this operation. Iterate the # range stack this range is part of until we find a range that entirely # contains the operation. range_start = start_times[index - 1] while range_start != 0: (range_end, range_start, msg) = range_data[thread_id][ range_start ] if int(rec_vals[1]) < range_end: # This range contains the operation. roctx_msg = msg break ops_patch_data[(corr_id, proc_id)] = ( thread_id, stream_id, kernel_str, roctx_msg, ) if op_found: op_found = 0 beg_ns = int(rec_vals[0]) end_ns = int(rec_vals[1]) dur_us = int((end_ns - beg_ns) / 1000) from_us = int((beg_ns - START_NS) / 1000) + dur_us / 2 if api_pid == HIP_PID or hsa_copy_deps == 1: if not proc_id in dep_dict: dep_dict[proc_id] = {} dep_proc = dep_dict[proc_id] if not dep_pid in dep_proc: if api_pid == "HIP_PID": dep_proc[dep_pid] = {"pid": api_pid, "from": [], "id": []} else: dep_proc[dep_pid] = { "pid": api_pid, "from": [], "id": [], "to": {}, } dep_str = dep_proc[dep_pid] dep_str["from"].append((from_us, stream_id, thread_id)) if expl_id: dep_str["id"].append(corr_id) # memcopy registering api_data = ( memory_manager.register_api(rec_vals) if mcopy_data_enabled else "" ) rec_vals.append(api_data) # setting section and lane rec_vals.append(api_pid) # __section rec_vals.append(thread_id) # __lane # inserting an API record to DB db.insert_entry(table_handle, rec_vals) # inserting of dispatch events correlated to the dependent dispatches for from_ns, proc_id, thread_id in dep_list: if not proc_id in record_id_dict: record_id_dict[proc_id] = 0 record_id_dict[proc_id] += 1 corr_id = record_id_dict[proc_id] db.insert_entry( table_handle, [ from_ns, from_ns, proc_id, thread_id, "hsa_dispatch", "", corr_id, "", api_pid, thread_id, ], ) # generating memcopy CSV if copy_csv != "": file_name = os.environ["PWD"] + "/results_mcopy.csv" with open(file_name, mode="w") as fd: print("File '" + file_name + "' is generating") fd.write(copy_csv) return 1 ############################################################# # fill COPY DB copy_table_descr = [ ["BeginNs", "EndNs", "Name", "pid", "tid", "Index", "Data", "__section", "__lane"], { "Index": "INTEGER", "Name": "TEXT", "args": "TEXT", "BeginNs": "INTEGER", "EndNs": "INTEGER", "pid": "INTEGER", "tid": "INTEGER", "Data": "TEXT", "__section": "INTEGER", "__lane": "INTEGER", }, ] def fill_copy_db(table_name, db, indir): sect_id = COPY_PID file_name = indir + "/" + "async_copy_trace.txt" ptrn_val = re.compile(r"^(\d+):(\d+) (async-copy):(\d+):(\d+)$") if not os.path.isfile(file_name): return 0 table_handle = db.add_table(table_name, copy_table_descr) with open(file_name, mode="r", errors="replace") as fd: for line in fd.readlines(): record = line[:-1] m = ptrn_val.match(record) if not m: fatal("bad async-copy entry '" + record + "'") else: rec_vals = [] for ind in range(1, 4): rec_vals.append(m.group(ind)) corr_id = int(m.group(4)) proc_id = int(m.group(5)) # querying tid value if (corr_id, proc_id) in hsa_patch_data: thread_id = hsa_patch_data[(corr_id, proc_id)] else: thread_id = -1 # completing record rec_vals.append(proc_id) # tid rec_vals.append(thread_id) # tid rec_vals.append(corr_id) # Index # registering memcopy information activity_data = ( memory_manager.register_copy(rec_vals) if mcopy_data_enabled else "" ) rec_vals.append(activity_data) # appending straem ID and section ID rec_vals.append(COPY_PID) # __section rec_vals.append(thread_id) # __lane # inserting DB activity entry db.insert_entry(table_handle, rec_vals) # filling dependencies to_ns = int(rec_vals[0]) to_us = int((to_ns - START_NS) / 1000) if thread_id != -1: # if not proc_id in dep_dict: dep_dict[proc_id] = {} dep_proc = dep_dict[proc_id] # if not pid in dep_proc: dep_proc[pid] = { 'pid': HSA_PID, 'from': [], 'to': {}, 'id': [] } dep_str = dep_proc[sect_id] dep_str["to"][corr_id] = to_us dep_str["id"].append(corr_id) return 1 ############################################################# # fill HCC ops DB ops_table_descr = [ [ "BeginNs", "EndNs", "dev-id", "queue-id", "Name", "pid", "tid", "roctx-range", "stream-id", "Index", "Data", "__section", "__lane", ], { "Index": "INTEGER", "Name": "TEXT", "args": "TEXT", "BeginNs": "INTEGER", "EndNs": "INTEGER", "dev-id": "INTEGER", "queue-id": "INTEGER", "pid": "INTEGER", "tid": "INTEGER", "roctx-range": "TEXT", "Data": "TEXT", "stream-id": "INTEGER", "__section": "INTEGER", "__lane": "INTEGER", }, ] def fill_ops_db(kernel_table_name, mcopy_table_name, db, indir): global max_gpu_id file_name = indir + "/" + "hcc_ops_trace.txt" ptrn_val = re.compile(r"(\d+):(\d+) (\d+):(\d+) (.*)$") ptrn_id = re.compile(r"^([^:]+):(\d+):(\d+)$") ptrn_mcopy = re.compile(r"(Memcpy|Copy|Fill)") ptrn_barrier = re.compile(r"Marker") if not os.path.isfile(file_name): return {} filtr = {} kernel_table_handle = db.add_table(kernel_table_name, ops_table_descr) mcopy_table_handle = db.add_table(mcopy_table_name, ops_table_descr) with open(file_name, mode="r", errors="replace") as fd: file_lines = fd.readlines() total_lines = len(file_lines) line_index = 0 for line in file_lines: if (line_index == total_lines - 1) or (line_index % 100 == 0): sys.stdout.write( "\rscan ops data " + str(line_index) + ":" + str(total_lines) + " " * 100 ) line_index += 1 record = line[:-1] m = ptrn_val.match(record) if m: # parsing trace record rec_vals = [] for ind in range(1, 6): rec_vals.append(m.group(ind)) label = rec_vals[4] # record name m = ptrn_id.match(label) if not m: fatal("bad hcc ops entry '" + record + "'") name = m.group(1) corr_id = int(m.group(2)) proc_id = int(m.group(3)) # checking name for memcopy pattern is_barrier = 0 if ptrn_mcopy.search(name): rec_table_name = mcopy_table_name table_handle = mcopy_table_handle sect_id = COPY_PID else: rec_table_name = kernel_table_name table_handle = kernel_table_handle gpu_id = int(rec_vals[2]) if gpu_id > max_gpu_id: max_gpu_id = gpu_id if ptrn_barrier.search(name): name = '""' is_barrier = 1 sect_id = GPU_BASE_PID + int(gpu_id) + 512 else: sect_id = GPU_BASE_PID + int(gpu_id) thread_id = 0 stream_id = 0 roctx_range = "" if (corr_id, proc_id) in ops_patch_data: (thread_id, stream_id, name_patch, roctx_range) = ops_patch_data[ (corr_id, proc_id) ] if name_patch != "": name = name_patch if roctx_range == "": roctx_range = name else: if is_barrier: continue else: if "ROCP_CTRL_RATE" in os.environ: continue else: fatal( "hcc ops data not found: '" + record + "', " + str(corr_id) + ", " + str(proc_id) ) # activity record rec_vals[4] = name # Name rec_vals.append(proc_id) # pid rec_vals.append(thread_id) # tid rec_vals.append(roctx_range) # roctx-range rec_vals.append(stream_id) # StreamId rec_vals.append(corr_id) # Index # registering memcopy information activity_data = ( memory_manager.register_activity(rec_vals) if mcopy_data_enabled else "" ) rec_vals.append(activity_data) # activity record data for stream ID and sction ID rec_vals.append(sect_id) # __section rec_vals.append(stream_id) # __lane # inserting DB activity entry db.insert_entry(table_handle, rec_vals) # registering a dependency filtr filtr[(corr_id, proc_id)] = rec_table_name # filling a dependencies to_ns = int(rec_vals[0]) to_us = int((to_ns - START_NS) / 1000) end_ns = int(rec_vals[1]) dur_us = int((end_ns - to_ns) / 1000) if (corr_id, proc_id) in from_ids: depid = from_ids[(corr_id, proc_id)] from_val = dep_dict[proc_id][HIP_PID]["from"][depid] print("from_val" + str(from_val)) from_val_new = (to_us + dur_us, from_val[1], from_val[2]) dep_dict[proc_id][HIP_PID]["from"][depid] = from_val_new if not proc_id in dep_dict: dep_dict[proc_id] = {} dep_proc = dep_dict[proc_id] if not sect_id in dep_proc: dep_proc[sect_id] = {"bsp": OPS_PID, "to": {}} dep_str = dep_proc[sect_id] dep_str["to"][corr_id] = to_us else: fatal("hcc ops bad record: '" + record + "'") return filtr ############################################################# # main if len(sys.argv) < 2: fatal("Usage: " + sys.argv[0] + " ") outfile = sys.argv[1] infiles = sys.argv[2:] indir = re.sub(r"\/[^\/]*$", r"", infiles[0]) inext = re.sub(r"\s+$", r"", infiles[0]) inext = re.sub(r"^.*(\.[^\.]+)$", r"\1", inext) dbfile = "" csvfile = "" if "ROCP_JSON_REBASE" in os.environ and os.environ["ROCP_JSON_REBASE"] == 0: begin_ts_file = indir + "/begin_ts_file.txt" if os.path.isfile(begin_ts_file): with open(begin_ts_file, mode="r", errors="replace") as fd: ind = 0 for line in fd.readlines(): val = int(line) if ind == 0 or val < START_NS: START_NS = val ind += 1 print("START timestamp found (" + str(START_NS) + "ns)") if re.search(r"\.csv$", outfile): csvfile = outfile elif re.search(r"\.db$", outfile): dbfile = outfile csvfile = re.sub(r"\.db$", ".csv", outfile) else: fatal("Bad output file '" + outfile + "'") if inext == ".txt": for f in infiles: parse_res(f) if len(var_table) != 0: merge_table() if dbfile == "": dump_csv(csvfile) else: statfile = re.sub(r"\.csv$", ".stats.csv", csvfile) jsonfile = re.sub(r"\.csv$", ".json", csvfile) hsa_statfile = re.sub(r"\.stats\.csv$", r".hsa_stats.csv", statfile) hip_statfile = re.sub(r"\.stats\.csv$", r".hip_stats.csv", statfile) ops_statfile = statfile copy_statfile = re.sub(r"\.stats\.csv$", r".copy_stats.csv", statfile) memcopy_info_file = re.sub(r"\.stats\.csv$", r".memcopy_info.csv", statfile) sysinfo_file = re.sub(r"\.stats\.csv$", r".sysinfo.txt", statfile) metadata_gen(sysinfo_file, "rocminfo") with open(dbfile, mode="w") as fd: fd.truncate() db = SQLiteDB(dbfile) memory_manager = MemManager(db, indir) ext_trace_found = fill_ext_db("rocTX", db, indir, "roctx", EXT_PID) hsa_trace_found = fill_api_db( "HSA", db, indir, "hsa", HSA_PID, COPY_PID, kern_dep_list, {}, 0 ) hsa_activity_found = fill_copy_db("COPY", db, indir) hip_trace_found = fill_api_db("HIP", db, indir, "hip", HIP_PID, OPS_PID, [], {}, 1) ops_filtr = fill_ops_db("OPS", "COPY", db, indir) fill_kernel_db("KERN", db) any_trace_found = ext_trace_found | hsa_trace_found | hip_trace_found copy_trace_found = 0 if hsa_activity_found or len(ops_filtr): copy_trace_found = 1 db.open_json(jsonfile) if ext_trace_found: db.label_json(EXT_PID, "Markers and Ranges", jsonfile) if hip_trace_found: db.label_json(HIP_PID, "CPU HIP API", jsonfile) if hsa_trace_found: db.label_json(HSA_PID, "CPU HSA API", jsonfile) db.label_json(COPY_PID, "COPY", jsonfile) if max_gpu_id >= 0: for ind in range(0, int(max_gpu_id) + 1): db.label_json(int(ind) + int(GPU_BASE_PID), "GPU" + str(ind), jsonfile) db.label_json(int(ind) + int(GPU_BASE_PID) + 512 , "GPU Barriers" + str(ind), jsonfile) if ext_trace_found: dform.gen_ext_json_trace(db, "rocTX", START_NS, jsonfile) if len(var_table) != 0: dform.post_process_data(db, "KERN", csvfile) dform.gen_table_bins(db, "KERN", statfile, "KernelName", "DurationNs") if "BeginNs" in var_list: dform.gen_kernel_json_trace(db, "KERN", GPU_BASE_PID, START_NS, jsonfile) if hsa_trace_found: dform.post_process_data(db, "HSA") dform.gen_table_bins(db, "HSA", hsa_statfile, "Name", "DurationNs") dform.gen_api_json_trace(db, "HSA", START_NS, jsonfile) if copy_trace_found: dform.post_process_data(db, "COPY") dform.gen_table_bins(db, "COPY", copy_statfile, "Name", "DurationNs") dform.gen_api_json_trace(db, "COPY", START_NS, jsonfile) if hip_trace_found: dform.post_process_data(db, "HIP") dform.gen_table_bins(db, "HIP", hip_statfile, "Name", "DurationNs") dform.gen_api_json_trace(db, "HIP", START_NS, jsonfile) if ops_filtr: dform.post_process_data(db, "OPS") dform.gen_table_bins(db, "OPS", ops_statfile, "Name", "DurationNs") dform.gen_ops_json_trace(db, "OPS", GPU_BASE_PID, START_NS, jsonfile) if any_trace_found: dep_id = 0 for proc_id, dep_proc in dep_dict.items(): for to_pid, dep_str in dep_proc.items(): if "bsp" in dep_str: bspid = dep_str["bsp"] base_str = dep_proc[bspid] for v in ("pid", "from", "id"): dep_str[v] = base_str[v] base_str["inv"] = 1 for to_pid, dep_str in dep_proc.items(): if "inv" in dep_str: continue if not "to" in dep_str: continue from_pid = dep_str["pid"] from_us_list = dep_str["from"] to_us_dict = dep_str["to"] corr_id_list = dep_str["id"] db.flow_json( dep_id, from_pid, from_us_list, to_pid, to_us_dict, corr_id_list, jsonfile, ) dep_id += len(from_us_list) db.metadata_json(jsonfile, sysinfo_file) db.close_json(jsonfile) if mcopy_data_enabled: memory_manager.dump_data("MM", memcopy_info_file) db.close() sys.exit(0) #############################################################