From 451be8a6004a78513b83eb63c86c20d10fbb2837 Mon Sep 17 00:00:00 2001 From: Vladimir Indic <139573562+vlaindic@users.noreply.github.com> Date: Mon, 16 Jun 2025 23:09:21 +0200 Subject: [PATCH] [PC Sampling] Separate counts for issued and stalled samples (#756) --- src/utils/parser.py | 54 ++++++++++++++++++++++++++++++++++++++------- src/utils/utils.py | 4 ++-- 2 files changed, 48 insertions(+), 10 deletions(-) diff --git a/src/utils/parser.py b/src/utils/parser.py index 4855aa28a3..e99ec05416 100644 --- a/src/utils/parser.py +++ b/src/utils/parser.py @@ -1050,6 +1050,8 @@ def search_pc_sampling_record(records): lambda: defaultdict( lambda: { "count": 0, + "count_issued": 0, + "count_stalled": 0, "inst_index": None, "stall_reason": { "NONE": 0, @@ -1068,6 +1070,10 @@ def search_pc_sampling_record(records): ) ) + rocp_inst_not_issued_prefix_len = len( + "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_" + ) + # Populate grouped_data for i, item in enumerate(records): pc_info = item["record"].get("pc", {}) @@ -1075,6 +1081,7 @@ def search_pc_sampling_record(records): code_object_offset = pc_info.get("code_object_offset") snapshot = item["record"].get("snapshot", {}) inst_index = item.get("inst_index") + issued = item["record"].get("wave_issued") # Todo: opt me if ( @@ -1087,10 +1094,13 @@ def search_pc_sampling_record(records): grouped_data[code_object_id][code_object_offset]["inst_index"] = inst_index if len(snapshot): - # NB: 54 is the length of prefix "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_" - grouped_data[code_object_id][code_object_offset]["stall_reason"][ - snapshot.get("stall_reason")[54:] - ] += 1 + if issued: + grouped_data[code_object_id][code_object_offset]["count_issued"] += 1 + else: + grouped_data[code_object_id][code_object_offset]["count_stalled"] += 1 + grouped_data[code_object_id][code_object_offset]["stall_reason"][ + snapshot.get("stall_reason")[rocp_inst_not_issued_prefix_len:] + ] += 1 # print( # inst_index, # grouped_data[code_object_id][code_object_offset]["stall_reason"], @@ -1110,6 +1120,8 @@ def search_pc_sampling_record(records): info["inst_index"], offset, info["count"], + info["count_issued"], + info["count_stalled"], # For info["stall_reason"], remove the zero entries, sorting the remaining items by their values in descending order sorted( ((k, v) for k, v in info["stall_reason"].items() if v > 0), @@ -1205,14 +1217,22 @@ def load_pc_sampling_data_per_kernel( df = pd.DataFrame( search_pc_sampling_record(pc_sample_key_loc), - columns=["code_object_id", "inst_index", "offset", "count", "stall_reason"], + columns=[ + "code_object_id", + "inst_index", + "offset", + "count", + "count_issued", + "count_stalled", + "stall_reason", + ], ) df = df[ (df["code_object_id"] == kernel_info["code_object_id"]) & (df["offset"] > kernel_info["entry_byte_offset"]) & (df["offset"] < kernel_info["potential_end_offset"]) - ][["inst_index", "offset", "count", "stall_reason"]] + ][["inst_index", "offset", "count", "count_issued", "count_stalled", "stall_reason"]] df["offset"] = df["offset"].apply(lambda x: hex(x)) @@ -1239,7 +1259,17 @@ def load_pc_sampling_data_per_kernel( return ( df[["source_line", "instruction", "offset", "count"]] if method == "host_trap" - else df[["source_line", "instruction", "offset", "count", "stall_reason"]] + else df[ + [ + "source_line", + "instruction", + "offset", + "count", + "count_issued", + "count_stalled", + "stall_reason", + ] + ] ) else: # sort by "count" return ( @@ -1248,7 +1278,15 @@ def load_pc_sampling_data_per_kernel( ) if method == "host_trap" else df[ - ["source_line", "instruction", "offset", "count", "stall_reason"] + [ + "source_line", + "instruction", + "offset", + "count", + "count_issued", + "count_stalled", + "stall_reason", + ] ].sort_values(by="count", ascending=False) ) # might support sort by stall reason in the future diff --git a/src/utils/utils.py b/src/utils/utils.py index 43a12ae10b..49375a6116 100644 --- a/src/utils/utils.py +++ b/src/utils/utils.py @@ -993,7 +993,7 @@ def pc_sampling_prof( "ROCPROFILER_PC_SAMPLING_BETA_ENABLED": "1", "ROCPROF_PC_SAMPLING_UNIT": unit, "ROCPROF_PC_SAMPLING_INTERVAL": str(interval), - "ROCPROF_PC_SAMPLING_METHOD": "host_trap", + "ROCPROF_PC_SAMPLING_METHOD": method, } new_env = os.environ.copy() for key, value in options.items(): @@ -1007,7 +1007,7 @@ def pc_sampling_prof( options = [ "--pc-sampling-beta-enabled", "--pc-sampling-method", - "host_trap", + method, "--pc-sampling-unit", unit, "--output-format",