Files
rocm-systems/src/utils/parser.py
T
vedithal-amd f9aa7be97c Support MI 350 profiling (#632)
* Add MI 350 hardware information

* Refactor MI GPU YAML file and corresponding interface

* Add SoC file for gfx950 architecture

* Add analysis report configs for MI 350 containing existing metrics

* Add placeholder None valued metrics for previous architectures to make
  baseline comparison work

* Enable testing on MI 350

* Analysis config metric changes
    - SPI changes
        - Update metric formula for default SPI pipe counter
             - Use efficiently collected pipe wise SPI counters
        - Add SPI Wave Occupancy
        - Add Scheduler-Pipe Wave Utilization
        - Update formula for VGPR Writes
        - Add Scheduler-Pipe FIFO Full Rate
   - CPC changes
	- Add CPC SYNC FIFO Full Rate
	- Add CPC CANE Stall Rate
        - Add CPC ADC Utilization
   - SQ changes
        - Add VALU co-issue efficiency
        - Add F6F4 datatype metrics
        - Update formula for total FLOPs by adding F6F4 counters
        - Add LDS STORE / LOAD / ATOMIC metrics
        - Add LDS STORE / LOAD / ATOMIC bandwidth
        - Add LDS FIFO and TA ADDR / CMD / DATA FIFO full rates

* Collect TCP_TCP_LATENCY_sum only for gfx950 (MI 350)

* Do not inject SQ_ACCUM_PREV_HIRES unnecesarily

* Do not hardcode memory and shader clock speeds

* Write num_hbm_channels to sysinfo.csv instead of hbm_bw while profiling

* Move generate sysinfo.csv to pre processing step of profiling

* Add warnings to use --specs-correction for missing sysinfo.csv values during analysis phase

* Update CHANGELOG

* Analysis phase warning to use --specs-correction when needed
2025-04-03 02:21:18 -04:00

1341 строка
53 KiB
Python

##############################################################################bl
# MIT License
#
# Copyright (c) 2021 - 2025 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
##############################################################################el
import ast
import json
import re
import sys
import warnings
from collections import defaultdict
from pathlib import Path
import astunparse
import numpy as np
import pandas as pd
from utils import schema
from utils.logger import console_debug, console_error, console_warning, demarcate
# ------------------------------------------------------------------------------
# Internal global definitions
# NB:
# Ammolite is unique gemstone from the Rocky Mountains.
# "ammolite__" is a special internal prefix to mark build-in global variables
# calculated or parsed from raw data sources. Its range is only in this file.
# Any other general prefixes string, like "buildin__", might be used by the
# editor. Whenever change it to a new one, replace all appearances in this file.
# 001 is ID of pmc_kernel_top.csv table
pmc_kernel_top_table_id = 1
# Build-in $denom defined in mongodb query:
# "denom": {
# "$switch" : {
# "branches": [
# {
# "case": { "$eq": [ $normUnit, "per Wave"]} ,
# "then": "&SQ_WAVES"
# },
# {
# "case": { "$eq": [ $normUnit, "per Cycle"]} ,
# "then": "&GRBM_GUI_ACTIVE"
# },
# {
# "case": { "$eq": [ $normUnit, "per Sec"]} ,
# "then": {"$divide":[{"$subtract": ["&End_Timestamp", "&Start_Timestamp" ]}, 1000000000]}
# }
# ],
# "default": 1
# }
# }
supported_denom = {
"per_wave": "SQ_WAVES",
"per_cycle": "$GRBM_GUI_ACTIVE_PER_XCD",
"per_second": "((End_Timestamp - Start_Timestamp) / 1000000000)",
"per_kernel": "1",
}
# Build-in defined in mongodb variables:
build_in_vars = {
"GRBM_GUI_ACTIVE_PER_XCD": "(GRBM_GUI_ACTIVE / $num_xcd)",
"GRBM_COUNT_PER_XCD": "(GRBM_COUNT / $num_xcd)",
"GRBM_SPI_BUSY_PER_XCD": "(GRBM_SPI_BUSY / $num_xcd)",
"numActiveCUs": "TO_INT(MIN((((ROUND(AVG(((4 * SQ_BUSY_CU_CYCLES) / $GRBM_GUI_ACTIVE_PER_XCD)), \
0) / $max_waves_per_cu) * 8) + MIN(MOD(ROUND(AVG(((4 * SQ_BUSY_CU_CYCLES) \
/ $GRBM_GUI_ACTIVE_PER_XCD)), 0), $max_waves_per_cu), 8)), $cu_per_gpu))",
"kernelBusyCycles": "ROUND(AVG((((End_Timestamp - Start_Timestamp) / 1000) * $max_sclk)), 0)",
"hbmBandwidth": "($max_mclk / 1000 * 32 * $num_hbm_channels)",
}
supported_call = {
# If the below has single arg, like(expr), it is a aggr, in which turn to a pd function.
# If it has args like list [], in which turn to a python function.
"MIN": "to_min",
"MAX": "to_max",
# simple aggr
"AVG": "to_avg",
"MEDIAN": "to_median",
"STD": "to_std",
# functions apply to whole column of df or a single value
"TO_INT": "to_int",
# Support the below with 2 inputs
"ROUND": "to_round",
"QUANTILE": "to_quantile",
"MOD": "to_mod",
# Concat operation from the memory chart "active cus"
"CONCAT": "to_concat",
}
# ------------------------------------------------------------------------------
def to_min(*args):
if len(args) == 1 and isinstance(args[0], pd.core.series.Series):
return args[0].min()
elif min(args) == None:
return np.nan
else:
return min(args)
def to_max(*args):
if len(args) == 1 and isinstance(args[0], pd.core.series.Series):
return args[0].max()
elif len(args) == 2 and (
isinstance(args[0], pd.core.series.Series)
or isinstance(args[1], pd.core.series.Series)
):
return np.maximum(args[0], args[1])
elif max(args) == None:
return np.nan
else:
return max(args)
def to_avg(a):
if str(type(a)) == "<class 'NoneType'>":
return np.nan
elif np.isnan(a).all():
return np.nan
elif a.empty:
return np.nan
elif isinstance(a, pd.core.series.Series):
return a.mean()
else:
raise Exception("to_avg: unsupported type.")
def to_median(a):
if a is None:
return None
elif isinstance(a, pd.core.series.Series):
with warnings.catch_warnings():
warnings.simplefilter("ignore", category=RuntimeWarning)
return a.median()
else:
raise Exception("to_median: unsupported type.")
def to_std(a):
if isinstance(a, pd.core.series.Series):
return a.std()
else:
raise Exception("to_std: unsupported type.")
def to_int(a):
if str(type(a)) == "<class 'NoneType'>":
return None
elif isinstance(a, (int, float, np.int64)):
return int(a)
elif isinstance(a, pd.core.series.Series):
return a.astype("Int64")
# Do we need it?
# elif isinstance(a, str):
# return int(a)
else:
raise Exception("to_int: unsupported type.")
def to_round(a, b):
if isinstance(a, pd.core.series.Series):
return a.round(b)
else:
return round(a, b)
def to_quantile(a, b):
if a is None:
return None
elif isinstance(a, pd.core.series.Series):
return a.quantile(b)
else:
raise Exception("to_quantile: unsupported type.")
def to_mod(a, b):
if isinstance(a, pd.core.series.Series):
return a.mod(b)
else:
return a % b
def to_concat(a, b):
return str(a) + str(b)
class CodeTransformer(ast.NodeTransformer):
"""
Python AST visitor to transform user defined equation string to df format
"""
def visit_Call(self, node):
self.generic_visit(node)
# print("--- debug visit_Call --- ", node.args, node.func)
# print(astunparse.dump(node))
# print(astunparse.unparse(node))
if isinstance(node.func, ast.Name):
if node.func.id in supported_call:
node.func.id = supported_call[node.func.id]
else:
raise Exception(
"Unknown call:", node.func.id
) # Could be removed if too strict
return node
def visit_IfExp(self, node):
self.generic_visit(node)
# print("visit_IfExp", type(node.test), type(node.body), type(node.orelse), dir(node))
if isinstance(node.body, ast.Num):
raise Exception(
"Don't support body of IF with number only! Has to be expr with df['column']."
)
new_node = ast.Expr(
value=ast.Call(
func=ast.Attribute(value=node.body, attr="where", ctx=ast.Load()),
args=[node.test, node.orelse],
keywords=[],
)
)
# print("-------------")
# print(astunparse.dump(new_node))
# print("-------------")
return new_node
# NB:
# visit_Name is for replacing HW counter to its df expr. In this way, we
# could support any HW counter names, which is easier than regex.
#
# There are 2 limitations:
# - It is not straightforward to support types other than simple column
# in df, such as [], (). If we need to support those, have to implement
# in correct way or work around.
# - The 'raw_pmc_df' is hack code. For other data sources, like wavefront
# data,We need to think about template or pass it as a parameter.
def visit_Name(self, node):
self.generic_visit(node)
# print("-------------", node.id)
if (not node.id.startswith("ammolite__")) and (not node.id in supported_call):
new_node = ast.Subscript(
value=ast.Name(id="raw_pmc_df", ctx=ast.Load()),
slice=ast.Index(value=ast.Str(s=node.id)),
ctx=ast.Load(),
)
node = new_node
return node
def build_eval_string(equation, coll_level):
"""
Convert user defined equation string to eval executable string
For example,
input: AVG(100 * SQ_ACTIVE_INST_SCA / ( GRBM_GUI_ACTIVE * $numCU ))
output: to_avg(100 * raw_pmc_df["pmc_perf"]["SQ_ACTIVE_INST_SCA"] / \
(raw_pmc_df["pmc_perf"]["GRBM_GUI_ACTIVE"] * numCU))
input: AVG(((TCC_EA_RDREQ_LEVEL_31 / TCC_EA_RDREQ_31) if (TCC_EA_RDREQ_31 != 0) else (0)))
output: to_avg((raw_pmc_df["pmc_perf"]["TCC_EA_RDREQ_LEVEL_31"] / raw_pmc_df["pmc_perf"]["TCC_EA_RDREQ_31"]).where(raw_pmc_df["pmc_perf"]["TCC_EA_RDREQ_31"] != 0, 0))
We can not handle the below for now,
input: AVG((0 if (TCC_EA_RDREQ_31 == 0) else (TCC_EA_RDREQ_LEVEL_31 / TCC_EA_RDREQ_31)))
But potential workaound is,
output: to_avg(raw_pmc_df["pmc_perf"]["TCC_EA_RDREQ_31"].where(raw_pmc_df["pmc_perf"]["TCC_EA_RDREQ_31"] == 0, raw_pmc_df["pmc_perf"]["TCC_EA_RDREQ_LEVEL_31"] / raw_pmc_df["pmc_perf"]["TCC_EA_RDREQ_31"]))
"""
if coll_level is None:
raise Exception("Error: coll_level can not be None.")
if not equation:
return ""
s = str(equation)
# print("input:", s)
# build-in variable starts with '$', python can not handle it.
# replace '$' with 'ammolite__'.
# TODO: pre-check there is no "ammolite__" in all config files.
s = re.sub(r"\$", "ammolite__", s)
# convert equation string to intermediate expression in df array format
ast_node = ast.parse(s)
# print(astunparse.dump(ast_node))
transformer = CodeTransformer()
transformer.visit(ast_node)
s = astunparse.unparse(ast_node)
# correct column name/label in df with [], such as TCC_HIT[0],
# the target is df['TCC_HIT[0]']
s = re.sub(r"\'\]\[(\d+)\]", r"[\g<1>]']", s)
# use .get() to catch any potential KeyErrors
s = re.sub(r"raw_pmc_df\['(.*?)']", r'raw_pmc_df.get("\1")', s)
# apply coll_level
s = re.sub(r"raw_pmc_df", "raw_pmc_df.get('" + coll_level + "')", s)
# print("--- build_eval_string, return: ", s)
return s
def update_denom_string(equation, unit):
"""
Update $denom in equation with runtime normalization unit.
"""
if not equation:
return ""
s = str(equation)
if unit in supported_denom.keys():
s = re.sub(r"\$denom", supported_denom[unit], s)
return s
def update_normUnit_string(equation, unit):
"""
Update $normUnit in equation with runtime normalization unit.
It is string replacement for display only.
"""
# TODO: We might want to do it for subtitle contains $normUnit
if not equation:
return ""
return re.sub(
r"\((?P<PREFIX>\w*)\s+\+\s+(\$normUnit\))",
r"\g<PREFIX> " + re.sub("_", " ", unit),
str(equation),
).capitalize()
def gen_counter_list(formula):
function_filter = {
"MIN": None,
"MAX": None,
"AVG": None,
"ROUND": None,
"TO_INT": None,
"GB": None,
"STD": None,
"GFLOP": None,
"GOP": None,
"OP": None,
"CU": None,
"NC": None,
"UC": None,
"CC": None,
"RW": None,
"GIOP": None,
"GFLOPs": None,
"CONCAT": None,
"MOD": None,
}
built_in_counter = [
"LDS_Per_Workgroup",
"Grid_Size",
"Workgroup_Size",
"Arch_VGPR",
"Accum_VGPR",
"SGPR",
"Scratch_Per_Workitem",
"Start_Timestamp",
"End_Timestamp",
]
visited = False
counters = []
if not isinstance(formula, str):
return visited, counters
try:
tree = ast.parse(
formula.replace("$normUnit", "SQ_WAVES")
.replace("$denom", "SQ_WAVES")
.replace(
"$numActiveCUs",
"TO_INT(MIN((((ROUND(AVG(((4 * SQ_BUSY_CU_CYCLES) / $GRBM_GUI_ACTIVE_PER_XCD})), \
0) / $maxWavesPerCU) * 8) + MIN(MOD(ROUND(AVG(((4 * SQ_BUSY_CU_CYCLES) \
/ $GRBM_GUI_ACTIVE_PER_XCD)), 0), $maxWavesPerCU), 8)), $numCU))",
)
.replace("$", "")
)
for node in ast.walk(tree):
if isinstance(node, ast.Name):
val = str(node.id)[:-4] if str(node.id).endswith("_sum") else str(node.id)
if val.isupper() and val not in function_filter:
counters.append(val)
visited = True
if val in built_in_counter:
visited = True
except:
pass
return visited, counters
def calc_builtin_var(var, sys_info):
"""
Calculate build-in variable based on sys_info:
"""
if isinstance(var, int):
return var
elif isinstance(var, str) and var.startswith("$total_l2_chan"):
return sys_info.total_l2_chan
else:
console_error('Built-in var " %s " is not supported' % var)
@demarcate
def build_dfs(archConfigs, filter_metrics, sys_info):
"""
- Build dataframe for each type of data source within each panel.
Each dataframe will be used as a template to load data with each run later.
For now, support "metric_table" and "raw_csv_table". Otherwise, put an empty df.
- Collect/build metric_list to suport customrized metrics profiling.
"""
# TODO: more error checking for filter_metrics!!
# if filter_metrics:
# for metric in filter_metrics:
# if not metric in avail_ip_blocks:
# print("{} is not a valid metric to filter".format(metric))
# exit(1)
simple_box = {
"Min": ["MIN(", ")"],
"Q1": ["QUANTILE(", ", 0.25)"],
"Median": ["MEDIAN(", ")"],
"Q3": ["QUANTILE(", ", 0.75)"],
"Max": ["MAX(", ")"],
}
d = {}
metric_list = {}
dfs_type = {}
metric_counters = {}
for panel_id, panel in archConfigs.panel_configs.items():
for data_source in panel["data source"]:
for type, data_config in data_source.items():
if (
type == "metric_table"
and "metric" in data_config
and "placeholder_range" in data_config["metric"]
):
# print(data_config["metric"])
new_metrics = {}
# NB: support single placeholder for now!!
p_range = data_config["metric"].pop("placeholder_range")
metric, metric_expr = data_config["metric"].popitem()
# print(len(data_config["metric"]))
# data_config['metric'].clear()
for p, r in p_range.items():
# NB: We have to resolve placeholder range first if it
# is a build-in var. It will be too late to do it in
# eval_metric(). This is the only reason we need
# sys_info at this stage.
var = calc_builtin_var(r, sys_info)
for i in range(var):
new_key = metric.replace(p, str(i))
new_val = {}
for k, v in metric_expr.items():
new_val[k] = metric_expr[k].replace(p, str(i))
# print(new_val)
new_metrics[new_key] = new_val
# print(p_range)
# print(new_metrics)
data_config["metric"] = new_metrics
# print(data_config)
# print(data_config["metric"])
for panel_id, panel in archConfigs.panel_configs.items():
for data_source in panel["data source"]:
for type, data_config in data_source.items():
if type == "metric_table":
headers = ["Metric_ID"]
data_source_idx = str(data_config["id"] // 100)
if data_source_idx != 0 or (
filter_metrics and data_source_idx in filter_metrics
):
metric_list[data_source_idx] = panel["title"]
if (
"cli_style" in data_config
and data_config["cli_style"] == "simple_box"
):
headers.append(data_config["header"]["metric"])
for k in simple_box.keys():
headers.append(k)
for key, tile in data_config["header"].items():
if key != "metric" and key != "tips" and key != "expr":
headers.append(tile)
else:
for key, tile in data_config["header"].items():
if key != "tips":
headers.append(tile)
# do we always need one?
headers.append("coll_level")
if "tips" in data_config["header"].keys():
headers.append(data_config["header"]["tips"])
df = pd.DataFrame(columns=headers)
i = 0
for key, entries in data_config["metric"].items():
data_source_idx = (
str(data_config["id"] // 100)
+ "."
+ str(data_config["id"] % 100)
)
metric_idx = data_source_idx + "." + str(i)
values = []
eqn_content = []
if (
(not filter_metrics)
or (
metric_idx in filter_metrics
) # no filter # metric in filter
or
# the whole table in filter
(data_source_idx in filter_metrics)
or
# the whole IP block in filter
(str(panel_id // 100) in filter_metrics)
):
values.append(metric_idx)
values.append(key)
metric_list[data_source_idx] = data_config["title"]
if (
"cli_style" in data_config
and data_config["cli_style"] == "simple_box"
):
# print("~~~~~~~~~~~~~~~~~")
# print(entries)
# print("~~~~~~~~~~~~~~~~~")
for k, v in entries.items():
if k == "expr":
for bk, bv in simple_box.items():
values.append(bv[0] + v + bv[1])
else:
if (
k != "tips"
and k != "coll_level"
and k != "alias"
):
values.append(v)
else:
for k, v in entries.items():
if k != "tips" and k != "coll_level" and k != "alias":
values.append(v)
eqn_content.append(v)
if "alias" in entries.keys():
values.append(entries["alias"])
if "coll_level" in entries.keys():
values.append(entries["coll_level"])
else:
values.append(schema.pmc_perf_file_prefix)
if "tips" in entries.keys():
values.append(entries["tips"])
# print(headers, values)
# print(key, entries)
df_new_row = pd.DataFrame([values], columns=headers)
df = pd.concat([df, df_new_row])
# collect metric_list
metric_list[metric_idx] = key
# generate mapping of counters and metrics
filter = {}
_visited = False
for formula in eqn_content:
if formula is not None and formula != "None":
visited, counters = gen_counter_list(formula)
if visited:
_visited = True
for k in counters:
filter[k] = None
if len(filter) > 0 or _visited:
metric_counters[key] = list(filter)
i += 1
df.set_index("Metric_ID", inplace=True)
# df.set_index('Metric', inplace=True)
# print(tabulate(df, headers='keys', tablefmt='fancy_grid'))
elif type == "raw_csv_table":
data_source_idx = str(data_config["id"] // 100)
if (
(not filter_metrics)
or (data_source_idx == "0") # no filter
or (data_source_idx in filter_metrics)
):
if (
"columnwise" in data_config
and data_config["columnwise"] == True
):
df = pd.DataFrame(
[data_config["source"]], columns=["from_csv_columnwise"]
)
else:
df = pd.DataFrame(
[data_config["source"]], columns=["from_csv"]
)
metric_list[data_source_idx] = panel["title"]
else:
df = pd.DataFrame()
elif type == "pc_sampling_table":
data_source_idx = str(data_config["id"] // 100)
# NB: enable pc sampling only when users specify, not enable as default
if filter_metrics and (data_source_idx in filter_metrics):
df = pd.DataFrame(
[data_config["source"]], columns=["from_pc_sampling"]
)
metric_list[data_source_idx] = panel["title"]
else:
df = pd.DataFrame()
d[data_config["id"]] = df
dfs_type[data_config["id"]] = type
setattr(archConfigs, "dfs", d)
setattr(archConfigs, "metric_list", metric_list)
setattr(archConfigs, "dfs_type", dfs_type)
setattr(archConfigs, "metric_counters", metric_counters)
def build_metric_value_string(dfs, dfs_type, normal_unit):
"""
Apply the real eval string to its field in the metric_table df.
"""
for id, df in dfs.items():
if dfs_type[id] == "metric_table":
for expr in df.columns:
if expr in schema.supported_field:
# NB: apply all build-in before building the whole string
df[expr] = df[expr].apply(update_denom_string, unit=normal_unit)
# NB: there should be a faster way to do with single apply
if not df.empty:
for i in range(df.shape[0]):
row_idx_label = df.index.to_list()[i]
# print(i, "row_idx_label", row_idx_label, expr)
if expr.lower() != "alias":
df.at[row_idx_label, expr] = build_eval_string(
df.at[row_idx_label, expr],
df.at[row_idx_label, "coll_level"],
)
elif expr.lower() == "unit" or expr.lower() == "units":
df[expr] = df[expr].apply(update_normUnit_string, unit=normal_unit)
# print(tabulate(df, headers='keys', tablefmt='fancy_grid'))
@demarcate
def eval_metric(dfs, dfs_type, sys_info, raw_pmc_df, debug):
"""
Execute the expr string for each metric in the df.
"""
# confirm no illogical counter values (only consider non-roofline runs)
roof_only_run = sys_info.ip_blocks == "roofline"
rocscope_run = sys_info.ip_blocks == "rocscope"
if (
(not rocscope_run and not roof_only_run)
and hasattr(raw_pmc_df["pmc_perf"], "GRBM_GUI_ACTIVE")
and (raw_pmc_df["pmc_perf"]["GRBM_GUI_ACTIVE"] == 0).any()
):
console_warning("Dectected GRBM_GUI_ACTIVE == 0")
console_error("Hauting execution for warning above.")
ammolite__se_per_gpu = int(sys_info.se_per_gpu)
if np.isnan(ammolite__se_per_gpu) or ammolite__se_per_gpu == 0:
console_warning(
"se_per_gpu is not available in sysinfo.csv, please provide the correct value using --specs-correction"
)
ammolite__pipes_per_gpu = int(sys_info.pipes_per_gpu)
if np.isnan(ammolite__pipes_per_gpu) or ammolite__pipes_per_gpu == 0:
console_warning(
"pipes_per_gpu is not available in sysinfo.csv, please provide the correct value using --specs-correction"
)
ammolite__cu_per_gpu = int(sys_info.cu_per_gpu)
if np.isnan(ammolite__cu_per_gpu) or ammolite__cu_per_gpu == 0:
console_warning(
"cu_per_gpu is not available in sysinfo.csv, please provide the correct value using --specs-correction"
)
ammolite__simd_per_cu = int(sys_info.simd_per_cu) # not used
if np.isnan(ammolite__simd_per_cu) or ammolite__simd_per_cu == 0:
console_warning(
"simd_per_cu is not available in sysinfo.csv, please provide the correct value using --specs-correction"
)
ammolite__sqc_per_gpu = int(sys_info.sqc_per_gpu)
if np.isnan(ammolite__sqc_per_gpu) or ammolite__sqc_per_gpu == 0:
console_warning(
"sqc_per_gpu is not available in sysinfo.csv, please provide the correct value using --specs-correction"
)
ammolite__lds_banks_per_cu = int(sys_info.lds_banks_per_cu)
if np.isnan(ammolite__lds_banks_per_cu) or ammolite__lds_banks_per_cu == 0:
console_warning(
"lds_banks_per_cu is not available in sysinfo.csv, please provide the correct value using --specs-correction"
)
ammolite__cur_sclk = float(sys_info.cur_sclk) # not used
if np.isnan(ammolite__cur_sclk) or ammolite__cur_sclk == 0:
console_warning(
"cur_sclk is not available in sysinfo.csv, please provide the correct value using --specs-correction"
)
ammolite__cur_mclk = float(sys_info.cur_mclk) # not used
if np.isnan(ammolite__cur_mclk) or ammolite__cur_mclk == 0:
console_warning(
"cur_mclk is not available in sysinfo.csv, please provide the correct value using --specs-correction"
)
ammolite__max_mclk = float(sys_info.max_mclk)
if np.isnan(ammolite__max_mclk) or ammolite__max_mclk == 0:
console_warning(
"max_mclk is not available in sysinfo.csv, please provide the correct value using --specs-correction"
)
ammolite__max_sclk = float(sys_info.max_sclk)
if np.isnan(ammolite__max_sclk) or ammolite__max_sclk == 0:
console_warning(
"max_sclk is not available in sysinfo.csv, please provide the correct value using --specs-correction"
)
ammolite__max_waves_per_cu = int(sys_info.max_waves_per_cu)
if np.isnan(ammolite__max_waves_per_cu) or ammolite__max_waves_per_cu == 0:
console_warning(
"max_waver_per_cu is not available in sysinfo.csv, please provide the correct value using --specs-correction"
)
ammolite__num_hbm_channels = float(sys_info.num_hbm_channels)
if np.isnan(ammolite__num_hbm_channels) or ammolite__num_hbm_channels == 0:
console_warning(
"num_hbm_channels is not available in sysinfo.csv, please provide the correct value using --specs-correction"
)
ammolite__total_l2_chan = calc_builtin_var("$total_l2_chan", sys_info)
if np.isnan(ammolite__total_l2_chan) or ammolite__total_l2_chan == 0:
console_warning(
"total_l2_chan is not available in sysinfo.csv, please provide the correct value using --specs-correction"
)
ammolite__num_xcd = int(sys_info.num_xcd)
if np.isnan(ammolite__num_xcd) or ammolite__num_xcd == 0:
console_warning(
"num_xcd is not available in sysinfo.csv, please provide the correct value using --specs-correction"
)
ammolite__wave_size = int(sys_info.wave_size)
if np.isnan(ammolite__wave_size) or ammolite__wave_size == 0:
console_warning(
"wave_size is not available in sysinfo.csv, please provide the correct value using --specs-correction"
)
# TODO: fix all $normUnit in Unit column or title
# build and eval all derived build-in global variables
ammolite__build_in = {}
# first pass, we do all per-xcd values, as these are used in subsequent builtins
for key, value in build_in_vars.items():
if "PER_XCD" not in key:
continue
# NB: assume all built-in vars from pmc_perf.csv for now
s = build_eval_string(value, schema.pmc_perf_file_prefix)
try:
ammolite__build_in[key] = eval(compile(s, "<string>", "eval"))
except TypeError:
ammolite__build_in[key] = None
except AttributeError as ae:
if ae == "'NoneType' object has no attribute 'get'":
ammolite__build_in[key] = None
ammolite__GRBM_GUI_ACTIVE_PER_XCD = ammolite__build_in["GRBM_GUI_ACTIVE_PER_XCD"]
ammolite__GRBM_COUNT_PER_XCD = ammolite__build_in["GRBM_COUNT_PER_XCD"]
ammolite__GRBM_SPI_BUSY_PER_XCD = ammolite__build_in["GRBM_SPI_BUSY_PER_XCD"]
for key, value in build_in_vars.items():
# next pass, we evaluate the builtins the depend on the per-XCD values
if "PER_XCD" in key:
continue
# NB: assume all built-in vars from pmc_perf.csv for now
s = build_eval_string(value, schema.pmc_perf_file_prefix)
try:
ammolite__build_in[key] = eval(compile(s, "<string>", "eval"))
except TypeError:
ammolite__build_in[key] = None
except AttributeError as ae:
if ae == "'NoneType' object has no attribute 'get'":
ammolite__build_in[key] = None
ammolite__numActiveCUs = ammolite__build_in["numActiveCUs"]
ammolite__kernelBusyCycles = ammolite__build_in["kernelBusyCycles"]
ammolite__hbmBandwidth = ammolite__build_in["hbmBandwidth"]
# Hmmm... apply + lambda should just work
# df['Value'] = df['Value'].apply(lambda s: eval(compile(str(s), '<string>', 'eval')))
for id, df in dfs.items():
if dfs_type[id] == "metric_table":
for idx, row in df.iterrows():
for expr in df.columns:
if expr in schema.supported_field:
if expr.lower() != "alias":
if row[expr]:
if debug: # debug won't impact the regular calc
print("~" * 40 + "\nExpression:")
print(expr, "=", row[expr])
print("Inputs:")
matched_vars = re.findall(r"ammolite__\w+", row[expr])
if matched_vars:
for v in matched_vars:
print(
"Var ",
v,
":",
eval(compile(v, "<string>", "eval")),
)
matched_cols = re.findall(
r"raw_pmc_df\['\w+'\]\['\w+'\]", row[expr]
)
if matched_cols:
for c in matched_cols:
m = re.match(
r"raw_pmc_df\['(\w+)'\]\['(\w+)'\]", c
)
t = raw_pmc_df[m.group(1)][
m.group(2)
].to_list()
print(c)
print(
raw_pmc_df[m.group(1)][
m.group(2)
].to_list()
)
# print(
# tabulate(raw_pmc_df[m.group(1)][
# m.group(2)],
# headers='keys',
# tablefmt='fancy_grid'))
print("\nOutput:")
try:
print(
eval(compile(row[expr], "<string>", "eval"))
)
print("~" * 40)
except TypeError:
console_warning(
"Skipping entry. Encountered a missing counter\n{} has been assigned to None\n{}".format(
expr, np.nan
)
)
except AttributeError as ae:
if (
str(ae)
== "'NoneType' object has no attribute 'get'"
):
console_warning(
"Skipping entry. Encountered a missing csv\n{}".format(
np.nan
)
)
else:
console_error("analysis", str(ae))
try:
out = eval(compile(row[expr], "<string>", "eval"))
if np.isnan(out):
row[expr] = ""
else:
row[expr] = out
except TypeError:
row[expr] = ""
except AttributeError as ae:
if (
str(ae)
== "'NoneType' object has no attribute 'get'"
):
row[expr] = ""
else:
console_error("analysis", str(ae))
else:
# If not insert nan, the whole col might be treated
# as string but not nubmer if there is NONE
row[expr] = ""
# print(tabulate(df, headers='keys', tablefmt='fancy_grid'))
@demarcate
def apply_filters(workload, dir, is_gui, debug):
"""
Apply user's filters to the raw_pmc df.
"""
# TODO: error out properly if filters out of bound
ret_df = workload.raw_pmc
if workload.filter_nodes:
ret_df = ret_df.loc[
ret_df[schema.pmc_perf_file_prefix]["Node"]
.astype(str)
.isin([workload.filter_gpu_ids])
]
if ret_df.empty:
console_error("analysis", "{} is invalid".format(workload.filter_nodes))
if workload.filter_gpu_ids:
ret_df = ret_df.loc[
ret_df[schema.pmc_perf_file_prefix]["GPU_ID"]
.astype(str)
.isin([workload.filter_gpu_ids])
]
if ret_df.empty:
console_error(
"analysis", "{} is an invalid gpu-id".format(workload.filter_gpu_ids)
)
# NB:
# Kernel id is unique!
# We pick up kernel names from kerne ids first.
# Then filter valid entries with kernel names.
if workload.filter_kernel_ids:
if all(type(kid) == int for kid in workload.filter_kernel_ids):
# Verify valid kernel filter
kernels_df = pd.read_csv(str(Path(dir).joinpath("pmc_kernel_top.csv")))
for kernel_id in workload.filter_kernel_ids:
if kernel_id >= len(kernels_df["Kernel_Name"]):
console_error(
"{} is an invalid kernel id. Please enter an id between 0-{}".format(
kernel_id, len(kernels_df["Kernel_Name"]) - 1
)
)
kernels = []
# NB: mark selected kernels with "*"
# Todo: fix it for unaligned comparison
kernel_top_df = workload.dfs[pmc_kernel_top_table_id]
kernel_top_df["S"] = ""
for kernel_id in workload.filter_kernel_ids:
# print("------- ", kernel_id)
kernels.append(kernel_top_df.loc[kernel_id, "Kernel_Name"])
kernel_top_df.loc[kernel_id, "S"] = "*"
if kernels:
# print("fitlered df:", len(df.index))
ret_df = ret_df.loc[
ret_df[schema.pmc_perf_file_prefix]["Kernel_Name"].isin(kernels)
]
elif all(type(kid) == str for kid in workload.filter_kernel_ids):
df_cleaned = ret_df[schema.pmc_perf_file_prefix]["Kernel_Name"].apply(
lambda x: x.strip() if isinstance(x, str) else x
)
ret_df = ret_df.loc[df_cleaned.isin(workload.filter_kernel_ids)]
else:
console_error(
"analyze",
"Mixing kernel indices and string filters is not currently supported",
)
if workload.filter_dispatch_ids:
# NB: support ignoring the 1st n dispatched execution by '> n'
# The better way may be parsing python slice string
for d in workload.filter_dispatch_ids:
if int(d) >= len(ret_df): # subtract 2 bc of the two header rows
console_error("analysis", "{} is an invalid dispatch id.".format(d))
if ">" in workload.filter_dispatch_ids[0]:
m = re.match(r"\> (\d+)", workload.filter_dispatch_ids[0])
ret_df = ret_df[
ret_df[schema.pmc_perf_file_prefix]["Dispatch_ID"] > int(m.group(1))
]
else:
dispatches = [int(x) for x in workload.filter_dispatch_ids]
ret_df = ret_df.loc[dispatches]
if debug:
print("~" * 40, "\nraw pmc df info:\n")
print(workload.raw_pmc.info())
print("~" * 40, "\nfiltered pmc df info:")
print(ret_df.info())
return ret_df
def find_key_recursively(data, search_key):
"""
Recursively search for the search_key in the given data (which can be a dict or list).
If the key is found, returns the value as a DataFrame.
"""
if isinstance(data, dict):
for key, value in data.items():
if key == search_key:
# Convert JSON value to DataFrame
# return pd.read_json(StringIO(json.dumps(value)))
return value
elif isinstance(value, (dict, list)):
result = find_key_recursively(value, search_key)
if result is not None:
return result # Return the DataFrame if found
elif isinstance(data, list):
for item in data:
result = find_key_recursively(item, search_key)
if result is not None:
return result # Return the DataFrame if found
return None # Return None if the key was not found
def search_key_in_json(file_path, search_key):
# FIXME:
# Load the entire JSON into memory.
# Should not use for large file.
with open(file_path, "r") as file:
data = json.load(file)
found = find_key_recursively(data, search_key)
if found == None:
console_error(f"Key '{search_key}' not found in the JSON file.")
return found
def search_pc_sampling_record(records):
"""
Search PC sampling records, and group and sort them
"""
grouped_data = defaultdict(
lambda: defaultdict(lambda: {"count": 0, "inst_index": None})
)
# Populate grouped_data
for item in records:
pc_info = item["record"].get("pc", {})
code_object_id = pc_info.get("code_object_id")
code_object_offset = pc_info.get("code_object_offset")
inst_index = item.get("inst_index")
if (
code_object_id is not None
and code_object_offset is not None
and inst_index is not None
):
grouped_data[code_object_id][code_object_offset]["count"] += 1
grouped_data[code_object_id][code_object_offset]["inst_index"] = inst_index
if len(grouped_data) == 0:
console_warning("PC sampling: no pc sampling record found!")
return None
# Convert to sorted list of tuples (code_object_id, inst_index, code_object_offset, count)
sorted_counts = sorted(
[
(code_object_id, info["inst_index"], offset, info["count"])
for code_object_id, offsets in grouped_data.items()
for offset, info in offsets.items()
],
key=lambda x: (
x[0],
x[2],
), # Sort by code_object_id, then by code_object_offset
)
return sorted_counts
@demarcate
def load_pc_sampling_data_per_kernel(file_name, kernel_name):
"""
Load PC sampling raw data from json file with given kernel name,
then return df.
"""
kernel_info_list = search_key_in_json(file_name, "kernel_symbols")
kernel_info = {}
if kernel_info_list:
for item in kernel_info_list:
if (
item["formatted_kernel_name"] == kernel_name
or item["demangled_kernel_name"] == kernel_name
or item["truncated_kernel_name"] == kernel_name
):
# kernel_info["kernel_id"] = item["kernel_id"]
kernel_info["code_object_id"] = item["code_object_id"]
kernel_info["entry_byte_offset"] = item["kernel_code_entry_byte_offset"]
break
if not kernel_info:
console_warning("PC sampling: can not find the kernel %s " % kernel_name)
return pd.DataFrame()
else:
console_debug("PC sampling: kernel %s " % kernel_info)
filtered_sorted_list = sorted(
[
item
for item in kernel_info_list
if item["code_object_id"] == kernel_info["code_object_id"]
],
key=lambda x: x["kernel_code_entry_byte_offset"],
)
for i, item in enumerate(filtered_sorted_list):
if item["kernel_code_entry_byte_offset"] == kernel_info["entry_byte_offset"]:
next_index = i + 1
if next_index < len(filtered_sorted_list): # Ensure the next item exists
next_item = filtered_sorted_list[next_index]
kernel_info["potential_end_offset"] = item[
"kernel_code_entry_byte_offset"
]
else:
kernel_info["potential_end_offset"] = sys.maxsize
break
# print("kernel_info", kernel_info)
pc_sample_host_trap = search_key_in_json(file_name, "pc_sample_host_trap")
# print(type(pc_sample_host_trap), len(pc_sample_host_trap))
# print(pc_sample_host_trap[0]["record"].get("pc", {}).get("code_object_offset"))
# print(search_pc_sampling_record(pc_sample_host_trap))
df = pd.DataFrame(
search_pc_sampling_record(pc_sample_host_trap),
columns=["code_object_id", "inst_index", "offset", "count"],
)
df = df[
(df["code_object_id"] == kernel_info["code_object_id"])
& (df["offset"] > kernel_info["entry_byte_offset"])
& (df["offset"] < kernel_info["potential_end_offset"])
][["inst_index", "offset", "count"]]
df["offset"] = df["offset"].apply(lambda x: hex(x))
pc_sample_instructions = search_key_in_json(file_name, "pc_sample_instructions")
# print(pc_sample_instructions)
df["instruction"] = df["inst_index"].apply(
lambda x: pc_sample_instructions[x] if x < len(pc_sample_instructions) else None
)
pc_sample_comments = search_key_in_json(file_name, "pc_sample_comments")
df["source_line"] = df["inst_index"].apply(
lambda x: (
".../" + Path(pc_sample_comments[x]).name
if x < len(pc_sample_instructions)
else None
)
)
return df[["source_line", "instruction", "offset", "count"]]
@demarcate
def load_pc_sampling_data(workload, dir, file_prefix):
"""
Load PC sampling raw data, filter and sort it by specified conditions,
then return df.
"""
if file_prefix.lower() == "none":
return pd.DataFrame()
# No kernel filter, return grouped and sorted csv directly
if not workload.filter_kernel_ids:
# NB: the default file name is subject to changes from rocprofv3
csv_file_path = Path.joinpath(
Path(dir), file_prefix + "_pc_sampling_host_trap.csv"
)
if not csv_file_path.exists():
console_error("PC sampling: can not read %s " % csv_file_path)
return pd.DataFrame()
else:
df = pd.read_csv(csv_file_path)
# Group by 'Instruction_Comment' and count occurrences
grouped_counts = (
df.groupby("Instruction_Comment")
.agg(
count=("Instruction_Comment", "count"),
instruction=("Instruction", "first"),
)
.reset_index()
.rename(columns={"Instruction_Comment": "source_line"})
)
grouped_counts = grouped_counts[["source_line", "instruction", "count"]]
grouped_counts["source_line"] = grouped_counts["source_line"].apply(
lambda x: (".../" + Path(x).name)
)
# Sort by the count of occurrences
sorted_counts = grouped_counts.sort_values(by="count", ascending=False)
# print(sorted_counts.info)
return sorted_counts
elif len(workload.filter_kernel_ids) > 1:
console_error(
"PC sampling supports single kernel only! Please specify -k with single kernel."
)
return pd.DataFrame()
elif len(workload.filter_kernel_ids) == 1:
# print("kernel id", workload.filter_kernel_ids[0])
# NB: the default file name is subject to changes from rocprofv3
json_file_path = Path.joinpath(Path(dir), file_prefix + "_results.json")
if not json_file_path.exists():
console_error("PC sampling: can not read %s " % json_file_path)
return pd.DataFrame()
else:
# NB:
# We should find better way to remove the dependency on kernel_top_table
kernel_top_df = workload.dfs[pmc_kernel_top_table_id]
file = Path.joinpath(Path(dir), kernel_top_df.loc[0, "from_csv"])
kernel_name = pd.read_csv(file).loc[
workload.filter_kernel_ids[0], "Kernel_Name"
]
return load_pc_sampling_data_per_kernel(json_file_path, kernel_name)
else:
console_warning("PC sampling: No data")
return pd.DataFrame()
@demarcate
def load_kernel_top(workload, dir):
# NB:
# - Do pmc_kernel_top.csv loading before eval_metric because we need the kernel names.
# - There might be a better way/timing to load raw_csv_table.
# FIXME:
# the func name load_kernel_top needs to be changed to load_non_mertrics_table
# NB:
# "from_csv", "from_csv_columnwise", and "from_pc_sampling"
# are 3 internal symbols converted in build_dfs() for non-metrics table.
# There might be better way to store these info without the orginal entry.
tmp = {}
for id, df in workload.dfs.items():
if "from_csv" in df.columns:
file = Path.joinpath(Path(dir), df.loc[0, "from_csv"])
if file.exists():
tmp[id] = pd.read_csv(file)
else:
console_warning(
f"Couldn't load {file.name}. This may result in missing analysis data."
)
# NB: Special case for sysinfo. Probably room for improvement in this whole function design
elif "from_csv_columnwise" in df.columns and id == 101:
tmp[id] = workload.sys_info.transpose()
# All transposed columns should be marked with a general header
tmp[id].columns = ["Info"]
elif "from_csv_columnwise" in df.columns:
# NB:
# Another way might be doing transpose in tty like metric_table.
# But we need to figure out headers and comparison properly.
file = Path.joinpath(Path(dir), df.loc[0, "from_csv_columnwise"])
if file.exists():
tmp[id] = pd.read_csv(file).transpose()
# NB:
# All transposed columns should be marked with a general header,
# so tty could detect them and show them correctly in comparison.
tmp[id].columns = ["Info"]
else:
console_warning(
f"Couldn't load {file.name}. This may result in missing analysis data."
)
elif "from_pc_sampling" in df.columns:
tmp[id] = load_pc_sampling_data(workload, dir, df.loc[0, "from_pc_sampling"])
# print("table id", id, "filter_kernel_ids", workload.filter_kernel_ids)
workload.dfs.update(tmp)
@demarcate
def load_table_data(workload, dir, is_gui, debug, verbose, skipKernelTop=False):
"""
- Load data for all "raw_csv_table"
- Load dat for "pc_sampling_table"
- Calculate mertric value for all "metric_table"
"""
if not skipKernelTop:
load_kernel_top(workload, dir)
eval_metric(
workload.dfs,
workload.dfs_type,
workload.sys_info.iloc[0],
apply_filters(workload, dir, is_gui, debug),
debug,
)
def build_comparable_columns(time_unit):
"""
Build comparable columns/headers for display
"""
comparable_columns = schema.supported_field
top_stat_base = ["Count", "Sum", "Mean", "Median", "Standard Deviation"]
for h in top_stat_base:
comparable_columns.append(h + "(" + time_unit + ")")
return comparable_columns
def correct_sys_info(mspec, specs_correction: dict):
"""
Correct system spec items manually
"""
# todo: more err checking for string specs_correction
pairs = dict(re.findall(r"(\w+):\s*(\d+)", specs_correction))
for k, v in pairs.items():
if not hasattr(mspec, str(k)):
console_error(
"analyze",
f"Invalid specs correction '{k}'. Please use --specs option to peak valid specs",
)
setattr(mspec, str(k), v)
return mspec.get_class_members()