Files
vedithal-amd aa5dfb98f9 MI350 Fix L2 cache to HBM read counters/metrics (#2501)
* Fix rocprofiler-sdk metrics definition

* Use TCC_EA0_RDREQ_128B instead of TCC_BUBBLE counter for L2 cache to
  HBM counters and metrics

* Update MI350 counter definitions
    * FETCH_SIZE
    * BANDWIDTH_EA

* Update MI350 metrics definitions
    * System Speed of Light, L2-Fabric Read BW
    * Roofline Plot Points, AI (Arithmetic Intensity) HBM
    * Roofline Performance Rates, HBM Bandwidth

* Remove redundant definition for gfx950 and fix BANDWIDTH_EA definition

Test HBM bandwidth metric for memcopy workload

* Add memcopy.cpp workload

* Add metric validation test suite to validate HBM Bandwidth metric for
  memcopy workload

* Move gpu_soc() to test_utils.py for better re-usability

* Update TUI analysis config

* Fix hbm bandwidth formula for mi350 in calc_ai_profile

Co-authored-by: Alysa Liu <Alysa.Liu@amd.com>
2026-01-23 15:56:24 -05:00

139 líneas
4.7 KiB
Python

##############################################################################
# MIT License
#
# Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
##############################################################################
import csv
from pathlib import Path
import pytest
import test_utils
config = {}
config["vseq"] = ["./tests/vsequential_access"]
config["vrand"] = ["./tests/vrandom_access"]
config["cleanup"] = True
config["COUNTER_LOGGING"] = False
config["METRIC_COMPARE"] = False
config["METRIC_LOGGING"] = False
def load_metrics(csv_file_path):
"""
Reads the CSV file into a dictionary of dictionaries:
{
"Metric_1": {
"Avg": value,
"Min": value,
"Max": value,
"Unit": "unit"
},
"Metric_2": { ... },
...
}
"""
metrics_data = {}
with open(csv_file_path, newline="") as csvfile:
reader = csv.DictReader(csvfile) # reads header from first line
for row in reader:
metric_name = row["Metric"].strip()
metrics_data[metric_name] = {
"Avg": float(row["Avg"]) if row["Avg"] else None,
"Min": float(row["Min"]) if row["Min"] else None,
"Max": float(row["Max"]) if row["Max"] else None,
"Unit": row["Unit"].strip() if row["Unit"] else None,
}
return metrics_data
soc = test_utils.gpu_soc()
@pytest.mark.L1_cache
def test_L1_cache_counters(
binary_handler_profile_rocprof_compute, binary_handler_analyze_rocprof_compute
):
if not soc or "MI300" not in soc:
pytest.skip("Skipping L1 cache test for non-mi300 socs.")
# set up two apps: sequential and random access
app_names = ["vseq", "vrand"]
options = ["-b", "16"]
result = {}
metrics = ["Read Req", "Write Req", "Cache Hit Rate"]
base = Path(test_utils.get_output_dir())
for app_name in app_names:
workload_dir = f"{base}/{app_name}"
workload_dir_output = f"{base}_{app_name}"
# 1. profile the app
return_code = binary_handler_profile_rocprof_compute(
config,
workload_dir,
options,
check_success=False,
roof=False,
app_name=app_name,
)
assert return_code == 0
# 2. analyze the results
return_code = binary_handler_analyze_rocprof_compute([
"analyze",
"--path",
workload_dir,
"-b",
"16.3",
"--output-format",
"csv",
"--output-name",
workload_dir_output,
])
assert return_code == 0
# 3. save results in local
# FIXME: customize file name to avoid hardcode
csv_path = workload_dir_output + "/16.3_vL1D_cache_access_metrics.csv"
data = load_metrics(csv_path)
for metric in metrics:
if app_name not in result or not isinstance(result[app_name], dict):
result[app_name] = {}
result[app_name][metric] = data[metric]["Avg"]
# 4. clean local output
test_utils.clean_output_dir(config["cleanup"], workload_dir)
test_utils.clean_output_dir(config["cleanup"], workload_dir_output)
test_utils.clean_output_dir(config["cleanup"], base)
# 5. check results are expected
# FIXME: use a range for comparison to account for different results
assert result["vseq"]["Cache Hit Rate"] >= result["vrand"]["Cache Hit Rate"]
assert result["vseq"]["Read Req"] <= result["vrand"]["Read Req"]
assert result["vseq"]["Write Req"] <= result["vrand"]["Write Req"]