SWDEV-470692: Fix parsing of unknown API records
- A valid trace is now given even if an individual entry in an API trace
does not contain a parameter list
- Previously an "unknown" record in an API trace would cause the entire
output to be discarded
Change-Id: I29bf6df2cf58a231d836aafb37fbde9c5d85512f
[ROCm/rocprofiler commit: 4467e50453]
Этот коммит содержится в:
коммит произвёл
Mark Meserve
родитель
76e25cff42
Коммит
f3aaad1687
@@ -416,6 +416,8 @@ def fill_ext_db(table_name, db, indir, trace_name, api_pid):
|
||||
#############################################################
|
||||
# arguments manipulation routines
|
||||
def get_field(args, field):
|
||||
if args == None:
|
||||
return (None, 0)
|
||||
ptrn1_field = re.compile(r"^.* " + field + "\(")
|
||||
ptrn2_field = re.compile(r"\) .*$")
|
||||
ptrn3_field = re.compile(r"\)\)$")
|
||||
@@ -483,7 +485,13 @@ def fill_api_db(
|
||||
range_start_times = {}
|
||||
copy_csv = ""
|
||||
|
||||
ptrn_val = re.compile(r"(\d+):(\d+) (\d+):(\d+) ([^\(]+)(\(.*)$")
|
||||
# Matches normal API records.
|
||||
ptrn_api_record = re.compile(r"(\d+):(\d+) (\d+):(\d+) ([^\(]+)(\(.*)$")
|
||||
# Matches records with a function name of "unknown" and no parameters.
|
||||
# Capture groups 1-4 should match the same information as in ptrn_api_record.
|
||||
# Used to avoid modifying ptrn_api_record regex.
|
||||
ptrn_api_record_unknown = re.compile(r"(\d+):(\d+) (\d+):(\d+) (unknown).*$")
|
||||
|
||||
hip_mcopy_ptrn = re.compile(r"hipMemcpy|hipMemset")
|
||||
hip_wait_event_ptrn = re.compile(r"WaitEvent")
|
||||
hip_sync_event_ptrn = re.compile(r"hipStreamSynchronize")
|
||||
@@ -558,209 +566,215 @@ def fill_api_db(
|
||||
)
|
||||
record = mfixformat.group(1) + "( " + reformated_args + ")"
|
||||
|
||||
m = ptrn_val.match(record)
|
||||
m = ptrn_api_record.match(record)
|
||||
if not m:
|
||||
fatal(api_name + " bad record: '" + record + "'")
|
||||
else:
|
||||
rec_vals = []
|
||||
rec_len = len(api_table_descr[0]) - 3
|
||||
for ind in range(1, rec_len):
|
||||
m = ptrn_api_record_unknown.match(record)
|
||||
if not m:
|
||||
fatal(api_name + " bad record: '" + record + "'")
|
||||
|
||||
rec_vals = []
|
||||
rec_len = len(api_table_descr[0]) - 3
|
||||
for ind in range(1, rec_len):
|
||||
try:
|
||||
rec_vals.append(m.group(ind))
|
||||
proc_id = int(rec_vals[2])
|
||||
thread_id = int(rec_vals[3])
|
||||
record_name = rec_vals[4]
|
||||
record_args = rec_vals[5]
|
||||
except IndexError:
|
||||
rec_vals.append(None)
|
||||
proc_id = int(rec_vals[2])
|
||||
thread_id = int(rec_vals[3])
|
||||
record_name = rec_vals[4]
|
||||
# record_args is optional, and may be None if an unknown record is found.
|
||||
record_args = rec_vals[5]
|
||||
|
||||
# incrementing per-process record id/correlation id
|
||||
if not proc_id in record_id_dict:
|
||||
record_id_dict[proc_id] = 0
|
||||
record_id_dict[proc_id] += 1
|
||||
record_id = record_id_dict[proc_id]
|
||||
# incrementing per-process record id/correlation id
|
||||
if not proc_id in record_id_dict:
|
||||
record_id_dict[proc_id] = 0
|
||||
record_id_dict[proc_id] += 1
|
||||
record_id = record_id_dict[proc_id]
|
||||
|
||||
# setting correlationid to record id if correlation id is not defined
|
||||
if corr_id == 0:
|
||||
corr_id = record_id
|
||||
# setting correlationid to record id if correlation id is not defined
|
||||
if corr_id == 0:
|
||||
corr_id = record_id
|
||||
|
||||
rec_vals.append(corr_id)
|
||||
# extracting/converting stream id
|
||||
(stream_id, stream_found) = get_field(record_args, "stream")
|
||||
rec_vals.append(corr_id)
|
||||
# extracting/converting stream id
|
||||
(stream_id, stream_found) = get_field(record_args, "stream")
|
||||
if stream_found:
|
||||
stream_id = get_stream_index(stream_id)
|
||||
(rec_vals[5], found) = set_field(record_args, "stream", stream_id)
|
||||
if found == 0:
|
||||
fatal(
|
||||
'set_field() failed for "stream", args: "' + record_args + '"'
|
||||
)
|
||||
else:
|
||||
(stream_id, stream_found) = get_field(record_args, "hStream")
|
||||
if stream_found:
|
||||
stream_id = get_stream_index(stream_id)
|
||||
(rec_vals[5], found) = set_field(record_args, "stream", stream_id)
|
||||
(rec_vals[5], found) = set_field(
|
||||
record_args, "hStream", stream_id
|
||||
)
|
||||
if found == 0:
|
||||
fatal(
|
||||
'set_field() failed for "stream", args: "' + record_args + '"'
|
||||
'set_field() failed for "stream", args: "'
|
||||
+ record_args
|
||||
+ '"'
|
||||
)
|
||||
else:
|
||||
(stream_id, stream_found) = get_field(record_args, "hStream")
|
||||
if stream_found:
|
||||
stream_id = get_stream_index(stream_id)
|
||||
(rec_vals[5], found) = set_field(
|
||||
record_args, "hStream", stream_id
|
||||
)
|
||||
if found == 0:
|
||||
fatal(
|
||||
'set_field() failed for "stream", args: "'
|
||||
+ record_args
|
||||
+ '"'
|
||||
)
|
||||
else:
|
||||
stream_id = 0
|
||||
stream_id = 0
|
||||
|
||||
if hip_strm_cr_event_ptrn.match(record_name):
|
||||
hip_streams.append(stream_id)
|
||||
if hip_strm_cr_event_ptrn.match(record_name):
|
||||
hip_streams.append(stream_id)
|
||||
|
||||
if hip_sync_event_ptrn.match(record_name):
|
||||
if (proc_id, stream_id) in last_hip_api_map:
|
||||
(last_hip_api_corr_id, last_hip_api_from_pid) = last_hip_api_map[
|
||||
(proc_id, stream_id)
|
||||
][-1]
|
||||
sync_api_beg_us = int((int(rec_vals[0]) - START_NS) / 1000)
|
||||
if not proc_id in dep_dict:
|
||||
dep_dict[proc_id] = {}
|
||||
if HIP_PID not in dep_dict[proc_id]:
|
||||
dep_dict[proc_id][HIP_PID] = {
|
||||
"pid": last_hip_api_from_pid,
|
||||
"from": [],
|
||||
"to": {},
|
||||
"id": [],
|
||||
}
|
||||
dep_dict[proc_id][HIP_PID]["from"].append(
|
||||
(-1, stream_id, thread_id)
|
||||
)
|
||||
dep_dict[proc_id][HIP_PID]["id"].append(last_hip_api_corr_id)
|
||||
dep_dict[proc_id][HIP_PID]["to"][
|
||||
last_hip_api_corr_id
|
||||
] = sync_api_beg_us
|
||||
from_ids[(last_hip_api_corr_id, proc_id)] = (
|
||||
len(dep_dict[proc_id][HIP_PID]["from"]) - 1
|
||||
)
|
||||
|
||||
m = beg_pattern.match(record)
|
||||
gpu_id = 0
|
||||
if m:
|
||||
kernel_properties = m.group(2)
|
||||
for prop in kernel_properties.split(", "):
|
||||
m = prop_pattern.match(prop)
|
||||
if m:
|
||||
val = m.group(2)
|
||||
var = m.group(1)
|
||||
if var == "gpu-id":
|
||||
gpu_id = int(val)
|
||||
|
||||
if hsa_mcopy_ptrn.match(record_name) or hip_mcopy_ptrn.match(record_name):
|
||||
ops_section_id = COPY_PID
|
||||
else:
|
||||
ops_section_id = GPU_BASE_PID + int(gpu_id)
|
||||
|
||||
if (proc_id, stream_id) not in last_hip_api_map:
|
||||
last_hip_api_map[(proc_id, stream_id)] = []
|
||||
last_hip_api_map[(proc_id, stream_id)].append((corr_id, ops_section_id))
|
||||
|
||||
# asyncronous opeartion API found
|
||||
op_found = 0
|
||||
mcopy_found = 0
|
||||
|
||||
# extract kernel name string
|
||||
(kernel_str, kernel_found) = get_field(record_args, "kernel")
|
||||
if kernel_found == 0:
|
||||
kernel_str = ""
|
||||
else:
|
||||
op_found = 1
|
||||
|
||||
if hip_mcopy_ptrn.match(record_name):
|
||||
mcopy_found = 1
|
||||
op_found = 1
|
||||
|
||||
# HIP Graph API
|
||||
if hip_graph_ptrn.search(record_name):
|
||||
op_found = 1
|
||||
|
||||
# HIP WaitEvent API
|
||||
if wait_event_ptrn.search(record_name):
|
||||
op_found = 1
|
||||
|
||||
if hip_stream_wait_write_ptrn.search(record_name):
|
||||
op_found = 1
|
||||
|
||||
# HSA memcopy API
|
||||
if hsa_mcopy_ptrn.match(record_name):
|
||||
mcopy_found = 1
|
||||
op_found = 1
|
||||
|
||||
stream_id = thread_id
|
||||
hsa_patch_data[(corr_id, proc_id)] = thread_id
|
||||
|
||||
if op_found:
|
||||
roctx_msg = ""
|
||||
|
||||
if not thread_id in range_start_times:
|
||||
range_start_times[thread_id] = (
|
||||
sorted(range_data[thread_id].keys())
|
||||
if thread_id in range_data
|
||||
else []
|
||||
)
|
||||
start_times = range_start_times[thread_id]
|
||||
|
||||
index = bisect.bisect_right(start_times, int(rec_vals[0]))
|
||||
if index > 0:
|
||||
# We found the range that is closest to this operation. Iterate the
|
||||
# range stack this range is part of until we find a range that entirely
|
||||
# contains the operation.
|
||||
range_start = start_times[index - 1]
|
||||
while range_start != 0:
|
||||
(range_end, range_start, msg) = range_data[thread_id][
|
||||
range_start
|
||||
]
|
||||
if int(rec_vals[1]) < range_end:
|
||||
# This range contains the operation.
|
||||
roctx_msg = msg
|
||||
break
|
||||
|
||||
ops_patch_data[(corr_id, proc_id)] = (
|
||||
thread_id,
|
||||
stream_id,
|
||||
kernel_str,
|
||||
roctx_msg,
|
||||
if hip_sync_event_ptrn.match(record_name):
|
||||
if (proc_id, stream_id) in last_hip_api_map:
|
||||
(last_hip_api_corr_id, last_hip_api_from_pid) = last_hip_api_map[
|
||||
(proc_id, stream_id)
|
||||
][-1]
|
||||
sync_api_beg_us = int((int(rec_vals[0]) - START_NS) / 1000)
|
||||
if not proc_id in dep_dict:
|
||||
dep_dict[proc_id] = {}
|
||||
if HIP_PID not in dep_dict[proc_id]:
|
||||
dep_dict[proc_id][HIP_PID] = {
|
||||
"pid": last_hip_api_from_pid,
|
||||
"from": [],
|
||||
"to": {},
|
||||
"id": [],
|
||||
}
|
||||
dep_dict[proc_id][HIP_PID]["from"].append(
|
||||
(-1, stream_id, thread_id)
|
||||
)
|
||||
dep_dict[proc_id][HIP_PID]["id"].append(last_hip_api_corr_id)
|
||||
dep_dict[proc_id][HIP_PID]["to"][
|
||||
last_hip_api_corr_id
|
||||
] = sync_api_beg_us
|
||||
from_ids[(last_hip_api_corr_id, proc_id)] = (
|
||||
len(dep_dict[proc_id][HIP_PID]["from"]) - 1
|
||||
)
|
||||
|
||||
if op_found:
|
||||
op_found = 0
|
||||
beg_ns = int(rec_vals[0])
|
||||
end_ns = int(rec_vals[1])
|
||||
dur_us = int((end_ns - beg_ns) / 1000)
|
||||
from_us = int((beg_ns - START_NS) / 1000) + dur_us / 2
|
||||
if api_pid == HIP_PID or hsa_copy_deps == 1:
|
||||
if not proc_id in dep_dict:
|
||||
dep_dict[proc_id] = {}
|
||||
dep_proc = dep_dict[proc_id]
|
||||
if not dep_pid in dep_proc:
|
||||
if api_pid == "HIP_PID":
|
||||
dep_proc[dep_pid] = {"pid": api_pid, "from": [], "id": []}
|
||||
else:
|
||||
dep_proc[dep_pid] = {
|
||||
"pid": api_pid,
|
||||
"from": [],
|
||||
"id": [],
|
||||
"to": {},
|
||||
}
|
||||
dep_str = dep_proc[dep_pid]
|
||||
dep_str["from"].append((from_us, stream_id, thread_id))
|
||||
if expl_id:
|
||||
dep_str["id"].append(corr_id)
|
||||
m = beg_pattern.match(record)
|
||||
gpu_id = 0
|
||||
if m:
|
||||
kernel_properties = m.group(2)
|
||||
for prop in kernel_properties.split(", "):
|
||||
m = prop_pattern.match(prop)
|
||||
if m:
|
||||
val = m.group(2)
|
||||
var = m.group(1)
|
||||
if var == "gpu-id":
|
||||
gpu_id = int(val)
|
||||
|
||||
# memcopy registering
|
||||
api_data = (
|
||||
memory_manager.register_api(rec_vals) if mcopy_data_enabled else ""
|
||||
if hsa_mcopy_ptrn.match(record_name) or hip_mcopy_ptrn.match(record_name):
|
||||
ops_section_id = COPY_PID
|
||||
else:
|
||||
ops_section_id = GPU_BASE_PID + int(gpu_id)
|
||||
|
||||
if (proc_id, stream_id) not in last_hip_api_map:
|
||||
last_hip_api_map[(proc_id, stream_id)] = []
|
||||
last_hip_api_map[(proc_id, stream_id)].append((corr_id, ops_section_id))
|
||||
|
||||
# asyncronous opeartion API found
|
||||
op_found = 0
|
||||
mcopy_found = 0
|
||||
|
||||
# extract kernel name string
|
||||
(kernel_str, kernel_found) = get_field(record_args, "kernel")
|
||||
if kernel_found == 0:
|
||||
kernel_str = ""
|
||||
else:
|
||||
op_found = 1
|
||||
|
||||
if hip_mcopy_ptrn.match(record_name):
|
||||
mcopy_found = 1
|
||||
op_found = 1
|
||||
|
||||
# HIP Graph API
|
||||
if hip_graph_ptrn.search(record_name):
|
||||
op_found = 1
|
||||
|
||||
# HIP WaitEvent API
|
||||
if wait_event_ptrn.search(record_name):
|
||||
op_found = 1
|
||||
|
||||
if hip_stream_wait_write_ptrn.search(record_name):
|
||||
op_found = 1
|
||||
|
||||
# HSA memcopy API
|
||||
if hsa_mcopy_ptrn.match(record_name):
|
||||
mcopy_found = 1
|
||||
op_found = 1
|
||||
|
||||
stream_id = thread_id
|
||||
hsa_patch_data[(corr_id, proc_id)] = thread_id
|
||||
|
||||
if op_found:
|
||||
roctx_msg = ""
|
||||
|
||||
if not thread_id in range_start_times:
|
||||
range_start_times[thread_id] = (
|
||||
sorted(range_data[thread_id].keys())
|
||||
if thread_id in range_data
|
||||
else []
|
||||
)
|
||||
start_times = range_start_times[thread_id]
|
||||
|
||||
index = bisect.bisect_right(start_times, int(rec_vals[0]))
|
||||
if index > 0:
|
||||
# We found the range that is closest to this operation. Iterate the
|
||||
# range stack this range is part of until we find a range that entirely
|
||||
# contains the operation.
|
||||
range_start = start_times[index - 1]
|
||||
while range_start != 0:
|
||||
(range_end, range_start, msg) = range_data[thread_id][
|
||||
range_start
|
||||
]
|
||||
if int(rec_vals[1]) < range_end:
|
||||
# This range contains the operation.
|
||||
roctx_msg = msg
|
||||
break
|
||||
|
||||
ops_patch_data[(corr_id, proc_id)] = (
|
||||
thread_id,
|
||||
stream_id,
|
||||
kernel_str,
|
||||
roctx_msg,
|
||||
)
|
||||
rec_vals.append(api_data)
|
||||
|
||||
# setting section and lane
|
||||
rec_vals.append(api_pid) # __section
|
||||
rec_vals.append(thread_id) # __lane
|
||||
if op_found:
|
||||
op_found = 0
|
||||
beg_ns = int(rec_vals[0])
|
||||
end_ns = int(rec_vals[1])
|
||||
dur_us = int((end_ns - beg_ns) / 1000)
|
||||
from_us = int((beg_ns - START_NS) / 1000) + dur_us / 2
|
||||
if api_pid == HIP_PID or hsa_copy_deps == 1:
|
||||
if not proc_id in dep_dict:
|
||||
dep_dict[proc_id] = {}
|
||||
dep_proc = dep_dict[proc_id]
|
||||
if not dep_pid in dep_proc:
|
||||
if api_pid == "HIP_PID":
|
||||
dep_proc[dep_pid] = {"pid": api_pid, "from": [], "id": []}
|
||||
else:
|
||||
dep_proc[dep_pid] = {
|
||||
"pid": api_pid,
|
||||
"from": [],
|
||||
"id": [],
|
||||
"to": {},
|
||||
}
|
||||
dep_str = dep_proc[dep_pid]
|
||||
dep_str["from"].append((from_us, stream_id, thread_id))
|
||||
if expl_id:
|
||||
dep_str["id"].append(corr_id)
|
||||
|
||||
# inserting an API record to DB
|
||||
db.insert_entry(table_handle, rec_vals)
|
||||
# memcopy registering
|
||||
api_data = (
|
||||
memory_manager.register_api(rec_vals) if mcopy_data_enabled else ""
|
||||
)
|
||||
rec_vals.append(api_data)
|
||||
|
||||
# setting section and lane
|
||||
rec_vals.append(api_pid) # __section
|
||||
rec_vals.append(thread_id) # __lane
|
||||
|
||||
# inserting an API record to DB
|
||||
db.insert_entry(table_handle, rec_vals)
|
||||
|
||||
# inserting of dispatch events correlated to the dependent dispatches
|
||||
for from_ns, proc_id, thread_id in dep_list:
|
||||
|
||||
Ссылка в новой задаче
Block a user