[PC Sampling] Separate counts for issued and stalled samples (#756)

Bu işleme şunda yer alıyor:
Vladimir Indic
2025-06-16 23:09:21 +02:00
işlemeyi yapan: GitHub
ebeveyn 7c0ffd14a9
işleme 451be8a600
2 değiştirilmiş dosya ile 48 ekleme ve 10 silme
+46 -8
Dosyayı Görüntüle
@@ -1050,6 +1050,8 @@ def search_pc_sampling_record(records):
lambda: defaultdict(
lambda: {
"count": 0,
"count_issued": 0,
"count_stalled": 0,
"inst_index": None,
"stall_reason": {
"NONE": 0,
@@ -1068,6 +1070,10 @@ def search_pc_sampling_record(records):
)
)
rocp_inst_not_issued_prefix_len = len(
"ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_"
)
# Populate grouped_data
for i, item in enumerate(records):
pc_info = item["record"].get("pc", {})
@@ -1075,6 +1081,7 @@ def search_pc_sampling_record(records):
code_object_offset = pc_info.get("code_object_offset")
snapshot = item["record"].get("snapshot", {})
inst_index = item.get("inst_index")
issued = item["record"].get("wave_issued")
# Todo: opt me
if (
@@ -1087,10 +1094,13 @@ def search_pc_sampling_record(records):
grouped_data[code_object_id][code_object_offset]["inst_index"] = inst_index
if len(snapshot):
# NB: 54 is the length of prefix "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_"
grouped_data[code_object_id][code_object_offset]["stall_reason"][
snapshot.get("stall_reason")[54:]
] += 1
if issued:
grouped_data[code_object_id][code_object_offset]["count_issued"] += 1
else:
grouped_data[code_object_id][code_object_offset]["count_stalled"] += 1
grouped_data[code_object_id][code_object_offset]["stall_reason"][
snapshot.get("stall_reason")[rocp_inst_not_issued_prefix_len:]
] += 1
# print(
# inst_index,
# grouped_data[code_object_id][code_object_offset]["stall_reason"],
@@ -1110,6 +1120,8 @@ def search_pc_sampling_record(records):
info["inst_index"],
offset,
info["count"],
info["count_issued"],
info["count_stalled"],
# For info["stall_reason"], remove the zero entries, sorting the remaining items by their values in descending order
sorted(
((k, v) for k, v in info["stall_reason"].items() if v > 0),
@@ -1205,14 +1217,22 @@ def load_pc_sampling_data_per_kernel(
df = pd.DataFrame(
search_pc_sampling_record(pc_sample_key_loc),
columns=["code_object_id", "inst_index", "offset", "count", "stall_reason"],
columns=[
"code_object_id",
"inst_index",
"offset",
"count",
"count_issued",
"count_stalled",
"stall_reason",
],
)
df = df[
(df["code_object_id"] == kernel_info["code_object_id"])
& (df["offset"] > kernel_info["entry_byte_offset"])
& (df["offset"] < kernel_info["potential_end_offset"])
][["inst_index", "offset", "count", "stall_reason"]]
][["inst_index", "offset", "count", "count_issued", "count_stalled", "stall_reason"]]
df["offset"] = df["offset"].apply(lambda x: hex(x))
@@ -1239,7 +1259,17 @@ def load_pc_sampling_data_per_kernel(
return (
df[["source_line", "instruction", "offset", "count"]]
if method == "host_trap"
else df[["source_line", "instruction", "offset", "count", "stall_reason"]]
else df[
[
"source_line",
"instruction",
"offset",
"count",
"count_issued",
"count_stalled",
"stall_reason",
]
]
)
else: # sort by "count"
return (
@@ -1248,7 +1278,15 @@ def load_pc_sampling_data_per_kernel(
)
if method == "host_trap"
else df[
["source_line", "instruction", "offset", "count", "stall_reason"]
[
"source_line",
"instruction",
"offset",
"count",
"count_issued",
"count_stalled",
"stall_reason",
]
].sort_values(by="count", ascending=False)
)
# might support sort by stall reason in the future
+2 -2
Dosyayı Görüntüle
@@ -993,7 +993,7 @@ def pc_sampling_prof(
"ROCPROFILER_PC_SAMPLING_BETA_ENABLED": "1",
"ROCPROF_PC_SAMPLING_UNIT": unit,
"ROCPROF_PC_SAMPLING_INTERVAL": str(interval),
"ROCPROF_PC_SAMPLING_METHOD": "host_trap",
"ROCPROF_PC_SAMPLING_METHOD": method,
}
new_env = os.environ.copy()
for key, value in options.items():
@@ -1007,7 +1007,7 @@ def pc_sampling_prof(
options = [
"--pc-sampling-beta-enabled",
"--pc-sampling-method",
"host_trap",
method,
"--pc-sampling-unit",
unit,
"--output-format",