[rocprof-compute] Improve standalone roofline plot generation (#1298)
* ruff formatting * Update roofline.py function descriptions * Update height calculation * Add back cache level filtering in gui_analysis * Update roofline_calc.py to take in ai_data for ceiling length calc * format roofline.py * update roof test cases * update roofline legend plot table * fix pdf generate cutoff --------- Co-authored-by: cfallows-amd <Carrie.Fallows@amd.com>
Tá an tiomantas seo le fáil i:
tiomanta ag
GitHub
tuismitheoir
7f2ef6a602
tiomantas
64375c23d0
+1
-1
@@ -220,7 +220,7 @@ class webui_analysis(OmniAnalyze_Base):
|
||||
"device_id": 0,
|
||||
"sort_type": "kernels",
|
||||
"mem_level": "ALL",
|
||||
"include_kernel_names": False,
|
||||
"include_kernel_names": True,
|
||||
"is_standalone": False,
|
||||
"roofline_data_type": self.__roofline_data_type,
|
||||
"kernel_filter": False,
|
||||
|
||||
Tá difríocht comhad cosc orthu toisc go bhfuil sé ró-mhór
Difríocht Luchtaigh
@@ -26,7 +26,7 @@
|
||||
import csv
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any, Union
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
import pandas as pd
|
||||
|
||||
@@ -201,9 +201,23 @@ def calc_ceilings(
|
||||
roofline_parameters: dict[str, Any],
|
||||
dtype: str,
|
||||
benchmark_data: dict[str, list[str]],
|
||||
ai_data: Optional[dict] = None,
|
||||
) -> dict[str, list[Union[list[float], float, None]]]:
|
||||
"""Given benchmarking data, calculate ceilings (or peak performance) for
|
||||
empirical roofline"""
|
||||
|
||||
if ai_data:
|
||||
max_ai = 0
|
||||
for cache_level in ["ai_l1", "ai_l2", "ai_hbm"]:
|
||||
if cache_level in ai_data and ai_data[cache_level][0]:
|
||||
cache_max = max(ai_data[cache_level][0])
|
||||
max_ai = max(max_ai, cache_max)
|
||||
|
||||
dynamic_xmax = max_ai * 1.2 if max_ai > 0 else 1000
|
||||
else:
|
||||
dynamic_xmax = 1000
|
||||
print(XMAX, dynamic_xmax)
|
||||
|
||||
# TODO: This is where filtering by memory level will need to occur for standalone
|
||||
graph_points: dict[str, list[Union[list[float], float, None]]] = {
|
||||
"hbm": [],
|
||||
@@ -281,18 +295,24 @@ def calc_ceilings(
|
||||
# ----------------------------------------------------------------------------------
|
||||
if dtype in PEAK_OPS_DATATYPES:
|
||||
# Plot FMA roof
|
||||
x0 = min(x2, XMAX) if x2 < XMAX else XMAX
|
||||
x0 = min(x2, dynamic_xmax) if x2 < dynamic_xmax else dynamic_xmax
|
||||
|
||||
console_debug(f"FMA ROOF [{x0}, {XMAX}], [{peak_ops},{peak_ops}]")
|
||||
graph_points["valu"].extend([[x0, XMAX], [peak_ops, peak_ops], peak_ops])
|
||||
console_debug(f"FMA ROOF [{x0}, {dynamic_xmax}], [{peak_ops},{peak_ops}]")
|
||||
graph_points["valu"].extend([
|
||||
[x0, dynamic_xmax],
|
||||
[peak_ops, peak_ops],
|
||||
peak_ops,
|
||||
])
|
||||
|
||||
# Plot MFMA roof
|
||||
if dtype in MFMA_DATATYPES: # assert that mfma has been assigned
|
||||
x0_mfma = min(x2_mfma, XMAX) if x2_mfma < XMAX else XMAX
|
||||
x0_mfma = min(x2_mfma, dynamic_xmax) if x2_mfma < dynamic_xmax else dynamic_xmax
|
||||
|
||||
console_debug(f"MFMA ROOF [{x0_mfma}, {XMAX}], [{peak_mfma},{peak_mfma}]")
|
||||
console_debug(
|
||||
f"MFMA ROOF [{x0_mfma}, {dynamic_xmax}], [{peak_mfma},{peak_mfma}]"
|
||||
)
|
||||
graph_points["mfma"].extend([
|
||||
[x0_mfma, XMAX],
|
||||
[x0_mfma, dynamic_xmax],
|
||||
[peak_mfma, peak_mfma],
|
||||
peak_mfma,
|
||||
])
|
||||
@@ -774,7 +794,7 @@ def calc_ai_profile(
|
||||
|
||||
|
||||
def construct_roof(
|
||||
roofline_parameters: dict[str, Any], dtype: str
|
||||
roofline_parameters: dict[str, Any], dtype: str, ai_data: Optional[dict] = None
|
||||
) -> dict[str, list[Union[list[float], float, None]]]:
|
||||
workload_dir = roofline_parameters.get("workload_dir")
|
||||
if isinstance(workload_dir, list):
|
||||
@@ -817,4 +837,4 @@ def construct_roof(
|
||||
# ------------------
|
||||
# Generate Roofline
|
||||
# ------------------
|
||||
return calc_ceilings(roofline_parameters, dtype, benchmark_data)
|
||||
return calc_ceilings(roofline_parameters, dtype, benchmark_data, ai_data)
|
||||
|
||||
@@ -156,7 +156,6 @@ ALL_CSVS_MI350 = sorted([
|
||||
|
||||
ROOF_ONLY_FILES = sorted([
|
||||
"empirRoof_gpu-0_FP32.pdf",
|
||||
"kernelName_legend.pdf",
|
||||
"pmc_perf.csv",
|
||||
"pmc_perf_0.csv",
|
||||
"pmc_perf_1.csv",
|
||||
@@ -946,7 +945,6 @@ def test_roof_plot_modes(binary_handler_profile_rocprof_compute):
|
||||
return
|
||||
|
||||
# Test `--kernel` filtering outputs are present and labelled correctly
|
||||
filter_kernelName = "kernelName_legend_" + config["kernel_name_1"]
|
||||
filter_empirRoof = "empirRoof_gpu-0_" + config["kernel_name_1"]
|
||||
|
||||
plot_configurations = [
|
||||
@@ -967,7 +965,7 @@ def test_roof_plot_modes(binary_handler_profile_rocprof_compute):
|
||||
"--kernel",
|
||||
config["kernel_name_1"],
|
||||
],
|
||||
"expected_files": [filter_kernelName, filter_empirRoof],
|
||||
"expected_files": [filter_empirRoof],
|
||||
},
|
||||
]
|
||||
|
||||
@@ -1387,10 +1385,7 @@ def test_roof_sort_dispatches(binary_handler_profile_rocprof_compute):
|
||||
|
||||
file_dict = test_utils.check_csv_files(workload_dir, 1, num_kernels)
|
||||
|
||||
expected_files = ROOF_ONLY_FILES.copy()
|
||||
expected_files.remove("kernelName_legend.pdf")
|
||||
expected_files = sorted(expected_files)
|
||||
assert sorted(list(file_dict.keys())) == expected_files
|
||||
assert sorted(list(file_dict.keys())) == ROOF_ONLY_FILES
|
||||
|
||||
validate(
|
||||
inspect.stack()[0][3],
|
||||
@@ -1420,10 +1415,7 @@ def test_roof_sort_kernels(binary_handler_profile_rocprof_compute):
|
||||
assert returncode == 0
|
||||
file_dict = test_utils.check_csv_files(workload_dir, 1, num_kernels)
|
||||
|
||||
expected_files = ROOF_ONLY_FILES.copy()
|
||||
expected_files.remove("kernelName_legend.pdf")
|
||||
expected_files = sorted(expected_files)
|
||||
assert sorted(list(file_dict.keys())) == expected_files
|
||||
assert sorted(list(file_dict.keys())) == ROOF_ONLY_FILES
|
||||
|
||||
validate(
|
||||
inspect.stack()[0][3],
|
||||
@@ -1453,10 +1445,7 @@ def test_roof_mem_levels_vL1D(binary_handler_profile_rocprof_compute):
|
||||
assert returncode == 0
|
||||
file_dict = test_utils.check_csv_files(workload_dir, 1, num_kernels)
|
||||
|
||||
expected_files = ROOF_ONLY_FILES.copy()
|
||||
expected_files.remove("kernelName_legend.pdf")
|
||||
expected_files = sorted(expected_files)
|
||||
assert sorted(list(file_dict.keys())) == expected_files
|
||||
assert sorted(list(file_dict.keys())) == ROOF_ONLY_FILES
|
||||
|
||||
validate(
|
||||
inspect.stack()[0][3],
|
||||
@@ -1486,10 +1475,7 @@ def test_roof_mem_levels_LDS(binary_handler_profile_rocprof_compute):
|
||||
assert returncode == 0
|
||||
file_dict = test_utils.check_csv_files(workload_dir, 1, num_kernels)
|
||||
|
||||
expected_files = ROOF_ONLY_FILES.copy()
|
||||
expected_files.remove("kernelName_legend.pdf")
|
||||
expected_files = sorted(expected_files)
|
||||
assert sorted(list(file_dict.keys())) == expected_files
|
||||
assert sorted(list(file_dict.keys())) == ROOF_ONLY_FILES
|
||||
|
||||
validate(
|
||||
inspect.stack()[0][3],
|
||||
|
||||
Tagairt in Eagrán Nua
Cuir bac ar úsáideoir