Files
Jason Tang 2c8312626e SWDEV-500435 - Use raw strings for regular expression.
Starting from Python version 3.12, a backslash-character pair that
is not a valid escape sequence generates a SyntaxWarning, instead
of DeprecationWarning. Using raw strings fixes the warnings.

Change-Id: I6a9f35e2e2e5d9d09410dfb5964fdc19cf0bfa4a


[ROCm/rocprofiler commit: 01f01adedf]
2024-12-04 16:48:42 -05:00

1243 line
42 KiB
Python
Executable File

################################################################################
# Copyright (c) 2018 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
################################################################################
import os
import sys
import re
import subprocess
import bisect
from sqlitedb import SQLiteDB
from mem_manager import MemManager
import dform
mcopy_data_enabled = 0
EXT_PID = 0
COPY_PID = 1
HIP_PID = 2
HSA_PID = 3
OPS_PID = 5
GPU_BASE_PID = 6
NONE_PID = -1
max_gpu_id = -1
START_NS = 0
hsa_activity_found = 0
# dependencies dictionary
dep_dict = {}
kern_dep_list = []
last_hip_api_map = {}
hip_streams = []
from_ids = {}
# stream ID map
stream_counter = 0
stream_id_map = {}
def get_stream_index(stream_id):
global stream_counter
stream_ind = 0
if stream_id.lower() != "nil":
if not stream_id in stream_id_map:
stream_counter += 1
stream_ind = stream_counter
stream_id_map[stream_id] = stream_ind
else:
stream_ind = stream_id_map[stream_id]
return stream_ind
# patching activity records
def activity_record_patching(
db, ops_table_name, kernel_found, kernel_name, stream_found, stream_ind, select_expr
):
if kernel_found != 0:
db.change_rec_fld(ops_table_name, 'Name = "' + kernel_name + '"', select_expr)
if stream_found != 0:
db.change_rec_fld(ops_table_name, "tid = " + str(stream_ind), select_expr)
# global vars
table_descr = [["Index", "KernelName"], {"Index": "INTEGER", "KernelName": "TEXT"}]
var_list = table_descr[0]
var_table = {}
#############################################################
def fatal(msg):
sys.stderr.write(sys.argv[0] + ": " + msg + "\n")
sys.exit(1)
dbglog_count = 0
def dbglog(msg):
global dbglog_count
dbglog_count += 1
sys.stderr.write(sys.argv[0] + ": " + msg + "\n")
fatal("error")
#############################################################
# Dumping sysinfo
sysinfo_begin = 1
def metadata_gen(sysinfo_file, sysinfo_cmd):
global sysinfo_begin
if not re.search(r"\.txt$", sysinfo_file):
raise Exception('wrong output file type: "' + sysinfo_file + '"')
if sysinfo_begin == 1:
sysinfo_begin = 0
with open(sysinfo_file, mode="w") as fd:
fd.write("")
with open(sysinfo_file, mode="a") as fd:
fd.write("CMD: " + sysinfo_cmd + "\n")
status = subprocess.call(
sysinfo_cmd + " >> " + sysinfo_file, stderr=subprocess.STDOUT, shell=True
)
if status != 0:
raise Exception('Could not run command: "' + sysinfo_cmd + '"')
# parse results method
def parse_res(infile):
global max_gpu_id
if not os.path.isfile(infile):
return
inp = open(infile, "r", errors="replace")
beg_pattern = re.compile(r'^dispatch\[(\d*)\], (.*) kernel-name\("([^"]*)"\)')
prop_pattern = re.compile(r"([\w-]+)\((\w+)\)")
ts_pattern = re.compile(r", time\((\d*),(\d*),(\d*),(\d*)\)")
# var pattern below matches a variable name and a variable value from a one
# line text in the format of for example "WRITE_SIZE (0.2500000000)" or
# "GRBM_GUI_ACTIVE (27867)" or "TA_TA_BUSY[0]"
var_pattern = re.compile(r"^\s*([a-zA-Z0-9_]+(?:\[\d+\])?)\s+\((\d+(?:\.\d+)?)\)")
pid_pattern = re.compile(r"pid\((\d*)\)")
dispatch_number = 0
var_table_pid = 0
for line in inp.readlines():
record = line[:-1]
m = pid_pattern.search(record)
if m and not os.getenv("ROCP_MERGE_PIDS"):
var_table_pid = int(m.group(1))
m = var_pattern.match(record)
if m:
if not (var_table_pid, dispatch_number) in var_table:
fatal("Error: dispatch number not found '" + str(dispatch_number) + "'")
var = m.group(1)
val = m.group(2)
var_table[(var_table_pid, dispatch_number)][var] = val
if not var in var_list:
var_list.append(var)
m = beg_pattern.match(record)
if m:
dispatch_number = m.group(1)
if not (var_table_pid, dispatch_number) in var_table:
var_table[(var_table_pid, dispatch_number)] = {
"Index": dispatch_number,
"KernelName": '"' + m.group(3) + '"',
}
gpu_id = 0
queue_id = 0
disp_pid = 0
disp_tid = 0
kernel_properties = m.group(2)
for prop in kernel_properties.split(", "):
m = prop_pattern.match(prop)
if m:
var = m.group(1)
val = m.group(2)
var_table[(var_table_pid, dispatch_number)][var] = val
if not var in var_list:
var_list.append(var)
if var == "gpu-id":
gpu_id = int(val)
if gpu_id > max_gpu_id:
max_gpu_id = gpu_id
if var == "queue-id":
queue_id = int(val)
if var == "pid":
disp_pid = int(val)
if var == "tid":
disp_tid = int(val)
else:
fatal(
'wrong kernel property "'
+ prop
+ '" in "'
+ kernel_properties
+ '"'
)
m = ts_pattern.search(record)
if m:
var_table[(var_table_pid, dispatch_number)]["DispatchNs"] = m.group(1)
var_table[(var_table_pid, dispatch_number)]["BeginNs"] = m.group(2)
var_table[(var_table_pid, dispatch_number)]["EndNs"] = m.group(3)
var_table[(var_table_pid, dispatch_number)]["CompleteNs"] = m.group(4)
# filling dependenciws
from_ns = int(m.group(1))
to_ns = int(m.group(2))
from_us = int((from_ns - START_NS) / 1000)
to_us = int((to_ns - START_NS) / 1000)
kern_dep_list.append((from_ns, disp_pid, disp_tid))
gpu_pid = GPU_BASE_PID + int(gpu_id)
if not disp_pid in dep_dict:
dep_dict[disp_pid] = {}
dep_proc = dep_dict[disp_pid]
if not gpu_pid in dep_proc:
dep_proc[gpu_pid] = {
"pid": HSA_PID,
"from": [],
"to": {},
"id": [],
}
dep_str = dep_proc[gpu_pid]
to_id = len(dep_str["from"])
dep_str["from"].append((from_us, disp_tid, disp_tid))
dep_str["to"][to_id] = to_us
##
inp.close()
#############################################################
# Comparator to sort a dictionary of tuples. This comparator will convert
# the second element of tuple to an int and return the new tuple. Then
# the dictionary can use the default comparison i.e sort by first element,
# then sort by second element.
def tuple_comparator(tupleElem):
return tupleElem[0], int(tupleElem[1])
# merge results table
def merge_table():
global var_list
keys = sorted(var_table.keys(), key=tuple_comparator)
fields = set(var_table[keys[0]])
if "DispatchNs" in fields:
var_list.append("DispatchNs")
var_list.append("BeginNs")
var_list.append("EndNs")
var_list.append("CompleteNs")
var_list = [x for x in var_list if x in fields]
#############################################################
# dump CSV results
def dump_csv(file_name):
global var_list
keys = sorted(var_table.keys(), key=tuple_comparator)
with open(file_name, mode="w") as fd:
fd.write(",".join(var_list) + "\n")
for pid, ind in keys:
entry = var_table[(pid, ind)]
dispatch_number = entry["Index"]
if ind != dispatch_number:
fatal("Dispatch #" + ind + " index mismatch (" + dispatch_number + ")\n")
val_list = [entry[var] if (var in entry) else 'None' for var in var_list]
fd.write(",".join(val_list) + "\n")
print("File '" + file_name + "' is generating")
#############################################################
# fill kernels DB
def fill_kernel_db(table_name, db):
global var_list
keys = sorted(var_table.keys(), key=tuple_comparator)
for var in set(var_list).difference(set(table_descr[1])):
table_descr[1][var] = "INTEGER"
table_descr[0] = var_list
table_handle = db.add_table(table_name, table_descr)
for pid, ind in keys:
entry = var_table[(pid, ind)]
dispatch_number = entry["Index"]
if ind != dispatch_number:
fatal("Dispatch #" + ind + " index mismatch (" + dispatch_number + ")\n")
val_list = [entry[var] for var in var_list]
db.insert_entry(table_handle, val_list)
#############################################################
# Fill Ext DB
ext_table_descr = [
["BeginNs", "EndNs", "pid", "tid", "Name", "Index", "__section", "__lane"],
{
"BeginNs": "INTEGER",
"EndNs": "INTEGER",
"pid": "INTEGER",
"tid": "INTEGER",
"Name": "TEXT",
"Index": "INTEGER",
"__section": "INTEGER",
"__lane": "INTEGER",
},
]
def fill_ext_db(table_name, db, indir, trace_name, api_pid):
global range_data
file_name = indir + "/" + trace_name + "_trace.txt"
# tms pid:tid cid:rid:'.....'
ptrn_val = re.compile(r'(\d+) (\d+):(\d+) (\d+):(\d+):"(.*)"$')
range_data = {}
range_stack = {}
range_map = {}
if not os.path.isfile(file_name):
return 0
record_id = 0
table_handle = db.add_table(table_name, ext_table_descr)
with open(file_name, mode="r", errors="replace") as fd:
for line in fd.readlines():
record = line[:-1]
m = ptrn_val.match(record)
if m:
tms = int(m.group(1))
pid = m.group(2)
tid = int(m.group(3))
cid = int(m.group(4))
rid = int(m.group(5))
msg = m.group(6)
rec_vals = []
if not tid in range_data:
range_data[tid] = {}
if cid != 2:
rec_vals.append(tms)
rec_vals.append(tms + 1)
rec_vals.append(pid)
rec_vals.append(tid)
rec_vals.append(msg)
rec_vals.append(record_id)
rec_vals.append(api_pid) # __section
rec_vals.append(tid) # __lane
if cid == 1:
if not pid in range_stack:
range_stack[pid] = {}
pid_stack = range_stack[pid]
if not tid in pid_stack:
pid_stack[tid] = []
rec_stack = pid_stack[tid]
rec_stack.append(rec_vals)
continue
if cid == 2:
if not pid in range_stack:
fatal("ROCTX range begin not found, pid(" + pid + ")")
pid_stack = range_stack[pid]
if not tid in pid_stack:
fatal("ROCTX range begin not found, tid(" + tid + ")")
rec_stack = pid_stack[tid]
rec_vals = rec_stack.pop()
rec_vals[1] = tms
# record the range's start/stop timestamps, its parent (ranges can be nested), and its message.
range_start = rec_vals[0]
range_stop = tms
range_parent = rec_stack[-1][0] if len(rec_stack) != 0 else 0
range_msg = rec_vals[4]
range_data[tid][range_start] = (range_stop, range_parent, range_msg)
# range start
if cid == 3:
range_map[rid] = (tms, msg)
continue
# range stop
if cid == 4:
if rid in range_map:
# querying start timestamp if rid exists
(tms, msg) = range_map[rid]
del range_map[rid]
else:
fatal("range id(" + str(rid) + ") is not found")
rec_vals[0] = tms # begin timestamp
rec_vals[4] = msg # range message
rec_vals[7] = 0 # 0 lane for ranges
db.insert_entry(table_handle, rec_vals)
record_id += 1
return 1
#############################################################
# arguments manipulation routines
def get_field(args, field):
if args == None:
return (None, 0)
ptrn1_field = re.compile(r"^.* " + field + r"\(")
ptrn2_field = re.compile(r"\) .*$")
ptrn3_field = re.compile(r"\)\)$")
(field_name, n) = ptrn1_field.subn("", args, count=1)
if n != 0:
(field_name, n) = ptrn2_field.subn("", field_name, count=1)
if n == 0:
(field_name, n) = ptrn3_field.subn("", field_name, count=1)
return (field_name, n)
def set_field(args, field, val):
return re.subn(
field + r"\(\w+\)([ \)])", field + "(" + str(val) + ")\\1", args, count=1
)
hsa_patch_data = {}
ops_patch_data = {}
# Fill API DB
api_table_descr = [
[
"BeginNs",
"EndNs",
"pid",
"tid",
"Name",
"args",
"Index",
"Data",
"__section",
"__lane",
],
{
"BeginNs": "INTEGER",
"EndNs": "INTEGER",
"pid": "INTEGER",
"tid": "INTEGER",
"Name": "TEXT",
"args": "TEXT",
"Index": "INTEGER",
"Data": "TEXT",
"__section": "INTEGER",
"__lane": "INTEGER",
},
]
# Filling API records DB table
# table_name - created DB table name
# db - DB handle
# indir - input directory
# api_name - traced API name
# api_pid - assigned JSON PID
# dep_pid - PID of dependet domain
# dep_list - list of dependet dospatch events
# dep_filtr - registered dependencies by record ID
def fill_api_db(
table_name, db, indir, api_name, api_pid, dep_pid, dep_list, dep_filtr, expl_id
):
global hsa_activity_found
global memory_manager
range_start_times = {}
copy_csv = ""
# Matches normal API records.
ptrn_api_record = re.compile(r"(\d+):(\d+) (\d+):(\d+) ([^\(]+)(\(.*)$")
# Matches records with a function name of "unknown" and no parameters.
# Capture groups 1-4 should match the same information as in ptrn_api_record.
# Used to avoid modifying ptrn_api_record regex.
ptrn_api_record_unknown = re.compile(r"(\d+):(\d+) (\d+):(\d+) (unknown).*$")
hip_mcopy_ptrn = re.compile(r"hipMemcpy|hipMemset")
hip_wait_event_ptrn = re.compile(r"WaitEvent")
hip_sync_event_ptrn = re.compile(r"hipStreamSynchronize")
hip_sync_dev_event_ptrn = re.compile(r"hipDeviceSynchronize")
hip_graph_ptrn = re.compile(r"hipGraphLaunch")
wait_event_ptrn = re.compile(r"WaitEvent|hipStreamSynchronize|hipDeviceSynchronize")
hip_stream_wait_write_ptrn = re.compile(
r"hipStreamWaitValue64|hipStreamWriteValue64|hipStreamWaitValue32|hipStreamWriteValue32"
)
prop_pattern = re.compile(r"([\w-]+)\((\w+)\)")
beg_pattern = re.compile(r'^dispatch\[(\d*)\], (.*) kernel-name\("([^"]*)"\)')
hip_strm_cr_event_ptrn = re.compile(r"hipStreamCreate")
hsa_mcopy_ptrn = re.compile(r"hsa_amd_memory_async_copy")
ptrn_fixformat = re.compile(r"(\d+:\d+ \d+:\d+ \w+)\(\s*(.*)\)$")
ptrn_fixkernel = re.compile(r"\s+kernel=(.*)$")
ptrn_multi_kernel = re.compile(r"(.*):(\d+)$")
ptrn_corr_id = re.compile(r"\ :(\d*)$")
file_name = indir + "/" + api_name + "_api_trace.txt"
if not os.path.isfile(file_name):
return 0
hsa_copy_file_name = indir + "/" + "async_copy_trace.txt"
hsa_copy_file_name_present = 1 if os.path.isfile(file_name) else 0
hsa_copy_deps = 1 if (api_pid == HSA_PID and hsa_copy_file_name_present == 1) else 0
print("hsa_copy_deps: " + str(hsa_copy_deps))
# parsing an input trace file and creating a DB table
record_id_dict = {}
table_handle = db.add_table(table_name, api_table_descr)
with open(file_name, mode="r", errors="replace") as fd:
file_lines = fd.readlines()
total_lines = len(file_lines)
line_index = 0
for line in file_lines:
if (line_index == total_lines - 1) or (line_index % 100 == 0):
sys.stdout.write(
"\rscan "
+ api_name
+ " API data "
+ str(line_index)
+ ":"
+ str(total_lines)
+ " " * 100
)
line_index += 1
record = line[:-1]
corr_id = 0
m = ptrn_corr_id.search(record)
if m:
corr_id = int(m.group(1))
record = ptrn_corr_id.sub("", record)
kernel_arg = ""
m = ptrn_fixkernel.search(record)
if m:
kernel_arg = "kernel(" + m.group(1) + ") "
record = ptrn_fixkernel.sub("", record)
mfixformat = ptrn_fixformat.match(record)
if mfixformat: # replace '=' in args with parentheses
reformated_args = (
kernel_arg
+ mfixformat.group(2)
.replace("=", "(")
.replace(",", ")")
.replace("\\", "\\\\")
.replace('"', '\\"')
+ ")"
)
record = mfixformat.group(1) + "( " + reformated_args + ")"
m = ptrn_api_record.match(record)
if not m:
m = ptrn_api_record_unknown.match(record)
if not m:
fatal(api_name + " bad record: '" + record + "'")
rec_vals = []
rec_len = len(api_table_descr[0]) - 3
for ind in range(1, rec_len):
try:
rec_vals.append(m.group(ind))
except IndexError:
rec_vals.append(None)
proc_id = int(rec_vals[2])
thread_id = int(rec_vals[3])
record_name = rec_vals[4]
# record_args is optional, and may be None if an unknown record is found.
record_args = rec_vals[5]
# incrementing per-process record id/correlation id
if not proc_id in record_id_dict:
record_id_dict[proc_id] = 0
record_id_dict[proc_id] += 1
record_id = record_id_dict[proc_id]
# setting correlationid to record id if correlation id is not defined
if corr_id == 0:
corr_id = record_id
rec_vals.append(corr_id)
# extracting/converting stream id
(stream_id, stream_found) = get_field(record_args, "stream")
if stream_found:
stream_id = get_stream_index(stream_id)
(rec_vals[5], found) = set_field(record_args, "stream", stream_id)
if found == 0:
fatal(
'set_field() failed for "stream", args: "' + record_args + '"'
)
else:
(stream_id, stream_found) = get_field(record_args, "hStream")
if stream_found:
stream_id = get_stream_index(stream_id)
(rec_vals[5], found) = set_field(
record_args, "hStream", stream_id
)
if found == 0:
fatal(
'set_field() failed for "stream", args: "'
+ record_args
+ '"'
)
else:
stream_id = 0
if hip_strm_cr_event_ptrn.match(record_name):
hip_streams.append(stream_id)
if hip_sync_event_ptrn.match(record_name):
if (proc_id, stream_id) in last_hip_api_map:
(last_hip_api_corr_id, last_hip_api_from_pid) = last_hip_api_map[
(proc_id, stream_id)
][-1]
sync_api_beg_us = int((int(rec_vals[0]) - START_NS) / 1000)
if not proc_id in dep_dict:
dep_dict[proc_id] = {}
if HIP_PID not in dep_dict[proc_id]:
dep_dict[proc_id][HIP_PID] = {
"pid": last_hip_api_from_pid,
"from": [],
"to": {},
"id": [],
}
dep_dict[proc_id][HIP_PID]["from"].append(
(-1, stream_id, thread_id)
)
dep_dict[proc_id][HIP_PID]["id"].append(last_hip_api_corr_id)
dep_dict[proc_id][HIP_PID]["to"][
last_hip_api_corr_id
] = sync_api_beg_us
from_ids[(last_hip_api_corr_id, proc_id)] = (
len(dep_dict[proc_id][HIP_PID]["from"]) - 1
)
m = beg_pattern.match(record)
gpu_id = 0
if m:
kernel_properties = m.group(2)
for prop in kernel_properties.split(", "):
m = prop_pattern.match(prop)
if m:
val = m.group(2)
var = m.group(1)
if var == "gpu-id":
gpu_id = int(val)
if hsa_mcopy_ptrn.match(record_name) or hip_mcopy_ptrn.match(record_name):
ops_section_id = COPY_PID
else:
ops_section_id = GPU_BASE_PID + int(gpu_id)
if (proc_id, stream_id) not in last_hip_api_map:
last_hip_api_map[(proc_id, stream_id)] = []
last_hip_api_map[(proc_id, stream_id)].append((corr_id, ops_section_id))
# asyncronous opeartion API found
op_found = 0
mcopy_found = 0
# extract kernel name string
(kernel_str, kernel_found) = get_field(record_args, "kernel")
if kernel_found == 0:
kernel_str = ""
else:
op_found = 1
if hip_mcopy_ptrn.match(record_name):
mcopy_found = 1
op_found = 1
# HIP Graph API
if hip_graph_ptrn.search(record_name):
op_found = 1
# HIP WaitEvent API
if wait_event_ptrn.search(record_name):
op_found = 1
if hip_stream_wait_write_ptrn.search(record_name):
op_found = 1
# HSA memcopy API
if hsa_mcopy_ptrn.match(record_name):
mcopy_found = 1
op_found = 1
stream_id = thread_id
hsa_patch_data[(corr_id, proc_id)] = thread_id
if op_found:
roctx_msg = ""
if not thread_id in range_start_times:
range_start_times[thread_id] = (
sorted(range_data[thread_id].keys())
if thread_id in range_data
else []
)
start_times = range_start_times[thread_id]
index = bisect.bisect_right(start_times, int(rec_vals[0]))
if index > 0:
# We found the range that is closest to this operation. Iterate the
# range stack this range is part of until we find a range that entirely
# contains the operation.
range_start = start_times[index - 1]
while range_start != 0:
(range_end, range_start, msg) = range_data[thread_id][
range_start
]
if int(rec_vals[1]) < range_end:
# This range contains the operation.
roctx_msg = msg
break
ops_patch_data[(corr_id, proc_id)] = (
thread_id,
stream_id,
kernel_str,
roctx_msg,
)
if op_found:
op_found = 0
beg_ns = int(rec_vals[0])
end_ns = int(rec_vals[1])
dur_us = int((end_ns - beg_ns) / 1000)
from_us = int((beg_ns - START_NS) / 1000) + dur_us / 2
if api_pid == HIP_PID or hsa_copy_deps == 1:
if not proc_id in dep_dict:
dep_dict[proc_id] = {}
dep_proc = dep_dict[proc_id]
if not dep_pid in dep_proc:
if api_pid == "HIP_PID":
dep_proc[dep_pid] = {"pid": api_pid, "from": [], "id": []}
else:
dep_proc[dep_pid] = {
"pid": api_pid,
"from": [],
"id": [],
"to": {},
}
dep_str = dep_proc[dep_pid]
dep_str["from"].append((from_us, stream_id, thread_id))
if expl_id:
dep_str["id"].append(corr_id)
# memcopy registering
api_data = (
memory_manager.register_api(rec_vals) if mcopy_data_enabled else ""
)
rec_vals.append(api_data)
# setting section and lane
rec_vals.append(api_pid) # __section
rec_vals.append(thread_id) # __lane
# inserting an API record to DB
db.insert_entry(table_handle, rec_vals)
# inserting of dispatch events correlated to the dependent dispatches
for from_ns, proc_id, thread_id in dep_list:
if not proc_id in record_id_dict:
record_id_dict[proc_id] = 0
record_id_dict[proc_id] += 1
corr_id = record_id_dict[proc_id]
db.insert_entry(
table_handle,
[
from_ns,
from_ns,
proc_id,
thread_id,
"hsa_dispatch",
"",
corr_id,
"",
api_pid,
thread_id,
],
)
# generating memcopy CSV
if copy_csv != "":
file_name = os.environ["PWD"] + "/results_mcopy.csv"
with open(file_name, mode="w") as fd:
print("File '" + file_name + "' is generating")
fd.write(copy_csv)
return 1
#############################################################
# fill COPY DB
copy_table_descr = [
["BeginNs", "EndNs", "Name", "pid", "tid", "Index", "Data", "__section", "__lane"],
{
"Index": "INTEGER",
"Name": "TEXT",
"args": "TEXT",
"BeginNs": "INTEGER",
"EndNs": "INTEGER",
"pid": "INTEGER",
"tid": "INTEGER",
"Data": "TEXT",
"__section": "INTEGER",
"__lane": "INTEGER",
},
]
def fill_copy_db(table_name, db, indir):
sect_id = COPY_PID
file_name = indir + "/" + "async_copy_trace.txt"
ptrn_val = re.compile(r"^(\d+):(\d+) (async-copy):(\d+):(\d+)$")
if not os.path.isfile(file_name):
return 0
table_handle = db.add_table(table_name, copy_table_descr)
with open(file_name, mode="r", errors="replace") as fd:
for line in fd.readlines():
record = line[:-1]
m = ptrn_val.match(record)
if not m:
fatal("bad async-copy entry '" + record + "'")
else:
rec_vals = []
for ind in range(1, 4):
rec_vals.append(m.group(ind))
corr_id = int(m.group(4))
proc_id = int(m.group(5))
# querying tid value
if (corr_id, proc_id) in hsa_patch_data:
thread_id = hsa_patch_data[(corr_id, proc_id)]
else:
thread_id = -1
# completing record
rec_vals.append(proc_id) # tid
rec_vals.append(thread_id) # tid
rec_vals.append(corr_id) # Index
# registering memcopy information
activity_data = (
memory_manager.register_copy(rec_vals) if mcopy_data_enabled else ""
)
rec_vals.append(activity_data)
# appending straem ID and section ID
rec_vals.append(COPY_PID) # __section
rec_vals.append(thread_id) # __lane
# inserting DB activity entry
db.insert_entry(table_handle, rec_vals)
# filling dependencies
to_ns = int(rec_vals[0])
to_us = int((to_ns - START_NS) / 1000)
if thread_id != -1:
# if not proc_id in dep_dict: dep_dict[proc_id] = {}
dep_proc = dep_dict[proc_id]
# if not pid in dep_proc: dep_proc[pid] = { 'pid': HSA_PID, 'from': [], 'to': {}, 'id': [] }
dep_str = dep_proc[sect_id]
dep_str["to"][corr_id] = to_us
dep_str["id"].append(corr_id)
return 1
#############################################################
# fill HCC ops DB
ops_table_descr = [
[
"BeginNs",
"EndNs",
"dev-id",
"queue-id",
"Name",
"pid",
"tid",
"roctx-range",
"stream-id",
"Index",
"Data",
"__section",
"__lane",
],
{
"Index": "INTEGER",
"Name": "TEXT",
"args": "TEXT",
"BeginNs": "INTEGER",
"EndNs": "INTEGER",
"dev-id": "INTEGER",
"queue-id": "INTEGER",
"pid": "INTEGER",
"tid": "INTEGER",
"roctx-range": "TEXT",
"Data": "TEXT",
"stream-id": "INTEGER",
"__section": "INTEGER",
"__lane": "INTEGER",
},
]
def fill_ops_db(kernel_table_name, mcopy_table_name, db, indir):
global max_gpu_id
file_name = indir + "/" + "hcc_ops_trace.txt"
ptrn_val = re.compile(r"(\d+):(\d+) (\d+):(\d+) (.*)$")
ptrn_id = re.compile(r"^([^:]+):(\d+):(\d+)$")
ptrn_mcopy = re.compile(r"(Memcpy|Copy|Fill)")
ptrn_barrier = re.compile(r"Marker")
if not os.path.isfile(file_name):
return {}
filtr = {}
kernel_table_handle = db.add_table(kernel_table_name, ops_table_descr)
mcopy_table_handle = db.add_table(mcopy_table_name, ops_table_descr)
with open(file_name, mode="r", errors="replace") as fd:
file_lines = fd.readlines()
total_lines = len(file_lines)
line_index = 0
for line in file_lines:
if (line_index == total_lines - 1) or (line_index % 100 == 0):
sys.stdout.write(
"\rscan ops data "
+ str(line_index)
+ ":"
+ str(total_lines)
+ " " * 100
)
line_index += 1
record = line[:-1]
m = ptrn_val.match(record)
if m:
# parsing trace record
rec_vals = []
for ind in range(1, 6):
rec_vals.append(m.group(ind))
label = rec_vals[4] # record name
m = ptrn_id.match(label)
if not m:
fatal("bad hcc ops entry '" + record + "'")
name = m.group(1)
corr_id = int(m.group(2))
proc_id = int(m.group(3))
# checking name for memcopy pattern
is_barrier = 0
if ptrn_mcopy.search(name):
rec_table_name = mcopy_table_name
table_handle = mcopy_table_handle
sect_id = COPY_PID
else:
rec_table_name = kernel_table_name
table_handle = kernel_table_handle
gpu_id = int(rec_vals[2])
if gpu_id > max_gpu_id:
max_gpu_id = gpu_id
if ptrn_barrier.search(name):
name = '"<barrier packet>"'
is_barrier = 1
sect_id = GPU_BASE_PID + int(gpu_id) + 512
else:
sect_id = GPU_BASE_PID + int(gpu_id)
thread_id = 0
stream_id = 0
roctx_range = ""
if (corr_id, proc_id) in ops_patch_data:
(thread_id, stream_id, name_patch, roctx_range) = ops_patch_data[
(corr_id, proc_id)
]
if name_patch != "":
name = name_patch
if roctx_range == "":
roctx_range = name
else:
if is_barrier:
continue
else:
if "ROCP_CTRL_RATE" in os.environ:
continue
else:
fatal(
"hcc ops data not found: '"
+ record
+ "', "
+ str(corr_id)
+ ", "
+ str(proc_id)
)
# activity record
rec_vals[4] = name # Name
rec_vals.append(proc_id) # pid
rec_vals.append(thread_id) # tid
rec_vals.append(roctx_range) # roctx-range
rec_vals.append(stream_id) # StreamId
rec_vals.append(corr_id) # Index
# registering memcopy information
activity_data = (
memory_manager.register_activity(rec_vals)
if mcopy_data_enabled
else ""
)
rec_vals.append(activity_data)
# activity record data for stream ID and sction ID
rec_vals.append(sect_id) # __section
rec_vals.append(stream_id) # __lane
# inserting DB activity entry
db.insert_entry(table_handle, rec_vals)
# registering a dependency filtr
filtr[(corr_id, proc_id)] = rec_table_name
# filling a dependencies
to_ns = int(rec_vals[0])
to_us = int((to_ns - START_NS) / 1000)
end_ns = int(rec_vals[1])
dur_us = int((end_ns - to_ns) / 1000)
if (corr_id, proc_id) in from_ids:
depid = from_ids[(corr_id, proc_id)]
from_val = dep_dict[proc_id][HIP_PID]["from"][depid]
print("from_val" + str(from_val))
from_val_new = (to_us + dur_us, from_val[1], from_val[2])
dep_dict[proc_id][HIP_PID]["from"][depid] = from_val_new
if not proc_id in dep_dict:
dep_dict[proc_id] = {}
dep_proc = dep_dict[proc_id]
if not sect_id in dep_proc:
dep_proc[sect_id] = {"bsp": OPS_PID, "to": {}}
dep_str = dep_proc[sect_id]
dep_str["to"][corr_id] = to_us
else:
fatal("hcc ops bad record: '" + record + "'")
return filtr
#############################################################
# main
if len(sys.argv) < 2:
fatal("Usage: " + sys.argv[0] + " <output CSV file> <input result files list>")
outfile = sys.argv[1]
infiles = sys.argv[2:]
indir = re.sub(r"\/[^\/]*$", r"", infiles[0])
inext = re.sub(r"\s+$", r"", infiles[0])
inext = re.sub(r"^.*(\.[^\.]+)$", r"\1", inext)
dbfile = ""
csvfile = ""
if "ROCP_JSON_REBASE" in os.environ and os.environ["ROCP_JSON_REBASE"] == 0:
begin_ts_file = indir + "/begin_ts_file.txt"
if os.path.isfile(begin_ts_file):
with open(begin_ts_file, mode="r", errors="replace") as fd:
ind = 0
for line in fd.readlines():
val = int(line)
if ind == 0 or val < START_NS:
START_NS = val
ind += 1
print("START timestamp found (" + str(START_NS) + "ns)")
if re.search(r"\.csv$", outfile):
csvfile = outfile
elif re.search(r"\.db$", outfile):
dbfile = outfile
csvfile = re.sub(r"\.db$", ".csv", outfile)
else:
fatal("Bad output file '" + outfile + "'")
if inext == ".txt":
for f in infiles:
parse_res(f)
if len(var_table) != 0:
merge_table()
if dbfile == "":
dump_csv(csvfile)
else:
statfile = re.sub(r"\.csv$", ".stats.csv", csvfile)
jsonfile = re.sub(r"\.csv$", ".json", csvfile)
hsa_statfile = re.sub(r"\.stats\.csv$", r".hsa_stats.csv", statfile)
hip_statfile = re.sub(r"\.stats\.csv$", r".hip_stats.csv", statfile)
ops_statfile = statfile
copy_statfile = re.sub(r"\.stats\.csv$", r".copy_stats.csv", statfile)
memcopy_info_file = re.sub(r"\.stats\.csv$", r".memcopy_info.csv", statfile)
sysinfo_file = re.sub(r"\.stats\.csv$", r".sysinfo.txt", statfile)
metadata_gen(sysinfo_file, "rocminfo")
with open(dbfile, mode="w") as fd:
fd.truncate()
db = SQLiteDB(dbfile)
memory_manager = MemManager(db, indir)
ext_trace_found = fill_ext_db("rocTX", db, indir, "roctx", EXT_PID)
hsa_trace_found = fill_api_db(
"HSA", db, indir, "hsa", HSA_PID, COPY_PID, kern_dep_list, {}, 0
)
hsa_activity_found = fill_copy_db("COPY", db, indir)
hip_trace_found = fill_api_db("HIP", db, indir, "hip", HIP_PID, OPS_PID, [], {}, 1)
ops_filtr = fill_ops_db("OPS", "COPY", db, indir)
fill_kernel_db("KERN", db)
any_trace_found = ext_trace_found | hsa_trace_found | hip_trace_found
copy_trace_found = 0
if hsa_activity_found or len(ops_filtr):
copy_trace_found = 1
db.open_json(jsonfile)
if ext_trace_found:
db.label_json(EXT_PID, "Markers and Ranges", jsonfile)
if hip_trace_found:
db.label_json(HIP_PID, "CPU HIP API", jsonfile)
if hsa_trace_found:
db.label_json(HSA_PID, "CPU HSA API", jsonfile)
db.label_json(COPY_PID, "COPY", jsonfile)
if max_gpu_id >= 0:
for ind in range(0, int(max_gpu_id) + 1):
db.label_json(int(ind) + int(GPU_BASE_PID), "GPU" + str(ind), jsonfile)
db.label_json(int(ind) + int(GPU_BASE_PID) + 512 , "GPU Barriers" + str(ind), jsonfile)
if ext_trace_found:
dform.gen_ext_json_trace(db, "rocTX", START_NS, jsonfile)
if len(var_table) != 0:
dform.post_process_data(db, "KERN", csvfile)
dform.gen_table_bins(db, "KERN", statfile, "KernelName", "DurationNs")
if "BeginNs" in var_list:
dform.gen_kernel_json_trace(db, "KERN", GPU_BASE_PID, START_NS, jsonfile)
if hsa_trace_found:
dform.post_process_data(db, "HSA")
dform.gen_table_bins(db, "HSA", hsa_statfile, "Name", "DurationNs")
dform.gen_api_json_trace(db, "HSA", START_NS, jsonfile)
if copy_trace_found:
dform.post_process_data(db, "COPY")
dform.gen_table_bins(db, "COPY", copy_statfile, "Name", "DurationNs")
dform.gen_api_json_trace(db, "COPY", START_NS, jsonfile)
if hip_trace_found:
dform.post_process_data(db, "HIP")
dform.gen_table_bins(db, "HIP", hip_statfile, "Name", "DurationNs")
dform.gen_api_json_trace(db, "HIP", START_NS, jsonfile)
if ops_filtr:
dform.post_process_data(db, "OPS")
dform.gen_table_bins(db, "OPS", ops_statfile, "Name", "DurationNs")
dform.gen_ops_json_trace(db, "OPS", GPU_BASE_PID, START_NS, jsonfile)
if any_trace_found:
dep_id = 0
for proc_id, dep_proc in dep_dict.items():
for to_pid, dep_str in dep_proc.items():
if "bsp" in dep_str:
bspid = dep_str["bsp"]
base_str = dep_proc[bspid]
for v in ("pid", "from", "id"):
dep_str[v] = base_str[v]
base_str["inv"] = 1
for to_pid, dep_str in dep_proc.items():
if "inv" in dep_str:
continue
if not "to" in dep_str:
continue
from_pid = dep_str["pid"]
from_us_list = dep_str["from"]
to_us_dict = dep_str["to"]
corr_id_list = dep_str["id"]
db.flow_json(
dep_id,
from_pid,
from_us_list,
to_pid,
to_us_dict,
corr_id_list,
jsonfile,
)
dep_id += len(from_us_list)
db.metadata_json(jsonfile, sysinfo_file)
db.close_json(jsonfile)
if mcopy_data_enabled:
memory_manager.dump_data("MM", memcopy_info_file)
db.close()
sys.exit(0)
#############################################################