[rocprof-compute] Improve standalone roofline plot generation (#1298)

* ruff formatting

* Update roofline.py function descriptions

* Update height calculation

* Add back cache level filtering in gui_analysis

* Update roofline_calc.py to take in ai_data for ceiling length calc

* format roofline.py

* update roof test cases

* update roofline legend plot table

* fix pdf generate cutoff

---------

Co-authored-by: cfallows-amd <Carrie.Fallows@amd.com>
Tá an tiomantas seo le fáil i:
jamessiddeley-amd
2025-10-10 14:23:23 -04:00
tiomanta ag GitHub
tuismitheoir 7f2ef6a602
tiomantas 64375c23d0
D'athraigh 4 comhad le 752 breiseanna agus 320 scriosta
@@ -220,7 +220,7 @@ class webui_analysis(OmniAnalyze_Base):
"device_id": 0,
"sort_type": "kernels",
"mem_level": "ALL",
"include_kernel_names": False,
"include_kernel_names": True,
"is_standalone": False,
"roofline_data_type": self.__roofline_data_type,
"kernel_filter": False,
Tá difríocht comhad cosc orthu toisc go bhfuil sé ró-mhór Difríocht Luchtaigh
@@ -26,7 +26,7 @@
import csv
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Union
from typing import Any, Optional, Union
import pandas as pd
@@ -201,9 +201,23 @@ def calc_ceilings(
roofline_parameters: dict[str, Any],
dtype: str,
benchmark_data: dict[str, list[str]],
ai_data: Optional[dict] = None,
) -> dict[str, list[Union[list[float], float, None]]]:
"""Given benchmarking data, calculate ceilings (or peak performance) for
empirical roofline"""
if ai_data:
max_ai = 0
for cache_level in ["ai_l1", "ai_l2", "ai_hbm"]:
if cache_level in ai_data and ai_data[cache_level][0]:
cache_max = max(ai_data[cache_level][0])
max_ai = max(max_ai, cache_max)
dynamic_xmax = max_ai * 1.2 if max_ai > 0 else 1000
else:
dynamic_xmax = 1000
print(XMAX, dynamic_xmax)
# TODO: This is where filtering by memory level will need to occur for standalone
graph_points: dict[str, list[Union[list[float], float, None]]] = {
"hbm": [],
@@ -281,18 +295,24 @@ def calc_ceilings(
# ----------------------------------------------------------------------------------
if dtype in PEAK_OPS_DATATYPES:
# Plot FMA roof
x0 = min(x2, XMAX) if x2 < XMAX else XMAX
x0 = min(x2, dynamic_xmax) if x2 < dynamic_xmax else dynamic_xmax
console_debug(f"FMA ROOF [{x0}, {XMAX}], [{peak_ops},{peak_ops}]")
graph_points["valu"].extend([[x0, XMAX], [peak_ops, peak_ops], peak_ops])
console_debug(f"FMA ROOF [{x0}, {dynamic_xmax}], [{peak_ops},{peak_ops}]")
graph_points["valu"].extend([
[x0, dynamic_xmax],
[peak_ops, peak_ops],
peak_ops,
])
# Plot MFMA roof
if dtype in MFMA_DATATYPES: # assert that mfma has been assigned
x0_mfma = min(x2_mfma, XMAX) if x2_mfma < XMAX else XMAX
x0_mfma = min(x2_mfma, dynamic_xmax) if x2_mfma < dynamic_xmax else dynamic_xmax
console_debug(f"MFMA ROOF [{x0_mfma}, {XMAX}], [{peak_mfma},{peak_mfma}]")
console_debug(
f"MFMA ROOF [{x0_mfma}, {dynamic_xmax}], [{peak_mfma},{peak_mfma}]"
)
graph_points["mfma"].extend([
[x0_mfma, XMAX],
[x0_mfma, dynamic_xmax],
[peak_mfma, peak_mfma],
peak_mfma,
])
@@ -774,7 +794,7 @@ def calc_ai_profile(
def construct_roof(
roofline_parameters: dict[str, Any], dtype: str
roofline_parameters: dict[str, Any], dtype: str, ai_data: Optional[dict] = None
) -> dict[str, list[Union[list[float], float, None]]]:
workload_dir = roofline_parameters.get("workload_dir")
if isinstance(workload_dir, list):
@@ -817,4 +837,4 @@ def construct_roof(
# ------------------
# Generate Roofline
# ------------------
return calc_ceilings(roofline_parameters, dtype, benchmark_data)
return calc_ceilings(roofline_parameters, dtype, benchmark_data, ai_data)
@@ -156,7 +156,6 @@ ALL_CSVS_MI350 = sorted([
ROOF_ONLY_FILES = sorted([
"empirRoof_gpu-0_FP32.pdf",
"kernelName_legend.pdf",
"pmc_perf.csv",
"pmc_perf_0.csv",
"pmc_perf_1.csv",
@@ -946,7 +945,6 @@ def test_roof_plot_modes(binary_handler_profile_rocprof_compute):
return
# Test `--kernel` filtering outputs are present and labelled correctly
filter_kernelName = "kernelName_legend_" + config["kernel_name_1"]
filter_empirRoof = "empirRoof_gpu-0_" + config["kernel_name_1"]
plot_configurations = [
@@ -967,7 +965,7 @@ def test_roof_plot_modes(binary_handler_profile_rocprof_compute):
"--kernel",
config["kernel_name_1"],
],
"expected_files": [filter_kernelName, filter_empirRoof],
"expected_files": [filter_empirRoof],
},
]
@@ -1387,10 +1385,7 @@ def test_roof_sort_dispatches(binary_handler_profile_rocprof_compute):
file_dict = test_utils.check_csv_files(workload_dir, 1, num_kernels)
expected_files = ROOF_ONLY_FILES.copy()
expected_files.remove("kernelName_legend.pdf")
expected_files = sorted(expected_files)
assert sorted(list(file_dict.keys())) == expected_files
assert sorted(list(file_dict.keys())) == ROOF_ONLY_FILES
validate(
inspect.stack()[0][3],
@@ -1420,10 +1415,7 @@ def test_roof_sort_kernels(binary_handler_profile_rocprof_compute):
assert returncode == 0
file_dict = test_utils.check_csv_files(workload_dir, 1, num_kernels)
expected_files = ROOF_ONLY_FILES.copy()
expected_files.remove("kernelName_legend.pdf")
expected_files = sorted(expected_files)
assert sorted(list(file_dict.keys())) == expected_files
assert sorted(list(file_dict.keys())) == ROOF_ONLY_FILES
validate(
inspect.stack()[0][3],
@@ -1453,10 +1445,7 @@ def test_roof_mem_levels_vL1D(binary_handler_profile_rocprof_compute):
assert returncode == 0
file_dict = test_utils.check_csv_files(workload_dir, 1, num_kernels)
expected_files = ROOF_ONLY_FILES.copy()
expected_files.remove("kernelName_legend.pdf")
expected_files = sorted(expected_files)
assert sorted(list(file_dict.keys())) == expected_files
assert sorted(list(file_dict.keys())) == ROOF_ONLY_FILES
validate(
inspect.stack()[0][3],
@@ -1486,10 +1475,7 @@ def test_roof_mem_levels_LDS(binary_handler_profile_rocprof_compute):
assert returncode == 0
file_dict = test_utils.check_csv_files(workload_dir, 1, num_kernels)
expected_files = ROOF_ONLY_FILES.copy()
expected_files.remove("kernelName_legend.pdf")
expected_files = sorted(expected_files)
assert sorted(list(file_dict.keys())) == expected_files
assert sorted(list(file_dict.keys())) == ROOF_ONLY_FILES
validate(
inspect.stack()[0][3],