Enable roofline for MI350 series (#677)

Rework of roofline binaries generated from rocm-amdgpu-bench
- removed arch identifier in bin name
- removed rocm5 bins altogether

Updated required distros for roofline
- updated distro checks and bin naming
- moved up ubuntu20.04->22.04 and sles15.3->15.6 per rocm support

Enabled ctests for mi350 for test_roof_*
- removed mi350 series check to skip these specific tests

---------

Signed-off-by: Carrie Fallows <Carrie.Fallows@amd.com>

[ROCm/rocprofiler-compute commit: 41e73650d5]
This commit is contained in:
cfallows-amd
2025-04-28 16:08:23 -04:00
committed by GitHub
orang tua 668402042c
melakukan c8d67fc97e
19 mengubah file dengan 35 tambahan dan 31 penghapusan
@@ -45,6 +45,8 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs.
* Roofline support for RHEL 10
* Roofline support for MI350 series architecture
### Changed
* Change the default rocprof version to v3 when environment variable "ROCPROF" is not set
@@ -74,6 +76,10 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs.
* Analysis of new workloads might require providing shader/memory clock speed using
--specs-correction operation if `amd-smi` or `rocminfo` does not provide clock speeds.
### Removed
* Roofline support for Ubuntu 20.04 and SLES below 15.6
## ROCm Compute Profiler 3.1.0 for ROCm 6.4.0
### Added
@@ -89,7 +89,7 @@ high level.
* :ref:`Memory Chart Analysis panel <grafana-panel-memory-chart-analysis>`
* :ref:`Roofline Analysis panel <grafana-panel-roofline-analysis>`
(*Supported on MI200 and MI300 only, Ubuntu 20.04/22.04/24.04, SLES 15 SP3 or RHEL8*)
(*Supported on MI200 and above architectures only)
* :ref:`Command Processor (CP) panel <grafana-panel-cp>`
@@ -0,0 +1,12 @@
pmc: SQ_INSTS_VALU_ADD_F16 SQ_INSTS_VALU_MUL_F16 SQ_INSTS_VALU_FMA_F16 SQ_INSTS_VALU_TRANS_F16 SQ_INSTS_VALU_ADD_F32 SQ_INSTS_VALU_MUL_F32 SQ_INSTS_VALU_FMA_F32 SQ_INSTS_VALU_TRANS_F32
pmc: SQ_INSTS_VALU_ADD_F64 SQ_INSTS_VALU_MUL_F64 SQ_INSTS_VALU_FMA_F64 SQ_INSTS_VALU_TRANS_F64 SQ_INSTS_VALU_MFMA_MOPS_F16 SQ_INSTS_VALU_MFMA_MOPS_BF16 SQ_INSTS_VALU_MFMA_MOPS_F32 SQ_INSTS_VALU_MFMA_MOPS_F64 TCP_TCC_READ_REQ_sum TCC_EA0_RDREQ_32B_sum TCC_EA0_RDREQ_sum TCC_EA0_WRREQ_64B_sum TCC_EA0_WRREQ_sum TCC_BUBBLE_sum
pmc: SQ_LDS_IDX_ACTIVE SQ_LDS_BANK_CONFLICT TCP_TOTAL_CACHE_ACCESSES_sum TCP_TCC_WRITE_REQ_sum TCP_TCC_ATOMIC_WITH_RET_REQ_sum TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum
#ROOF:MI300 series and above
pmc: SQ_INSTS_VALU_MFMA_MOPS_F8
range:
gpu:
kernels:
@@ -46,17 +46,6 @@ class gfx950_soc(OmniSoC_Base):
)
)
)
else:
# NB: We're using generalized Mi300 perfmon configs
self.set_perfmon_dir(
str(
Path(str(config.rocprof_compute_home)).joinpath(
"rocprof_compute_soc",
"profile_configs",
"gfx950",
)
)
)
self.set_compatible_profilers(["rocprofv3"])
# Per IP block max number of simultaneous counters. GFX IP Blocks
self.set_perfmon_config(
@@ -48,6 +48,7 @@ SUPPORTED_DATATYPES = {
"gfx940": ["FP8", "FP16", "BF16", "FP32", "FP64", "I8", "I32", "I64"], # Unsupported:
"gfx941": ["FP8", "FP16", "BF16", "FP32", "FP64", "I8", "I32", "I64"], # Unsupported:
"gfx942": ["FP8", "FP16", "BF16", "FP32", "FP64", "I8", "I32", "I64"], # Unsupported:
"gfx950": ["FP8", "FP16", "BF16", "FP32", "FP64", "I8", "I32", "I64"], # Unsupported:
}
PEAK_OPS_DATATYPES = ["FP8", "FP16", "BF16", "FP32", "FP64", "I8", "I32", "I64"]
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -939,14 +939,14 @@ def detect_roofline(mspec):
elif (
(type(sles_distro) == str and len(sles_distro) >= 3)
and sles_distro[:2] == "15" # confirm string and len
and int(sles_distro[3]) >= 3 # SLES15 and SP >= 3
and int(sles_distro[3]) >= 6 # SLES15 and SP >= 6
):
# Must be a valid SLES machine
# Use SP3 binary for all forward compatible service pack versions
distro = "15.3"
elif ubuntu_distro == "20.04" or ubuntu_distro == "22.04" or ubuntu_distro == "24.04":
# Use SP6 binary for all forward compatible service pack versions
distro = "15.6"
elif ubuntu_distro == "22.04" or ubuntu_distro == "24.04":
# Must be a valid Ubuntu machine
distro = ubuntu_distro
distro = "22.04"
else:
console_error("roofline", "Cannot find a valid binary for your operating system")
@@ -984,10 +984,8 @@ def mibench(args, mspec):
distro_map = {
"platform:el8": "rhel8",
"15.3": "sles15sp5",
"20.04": "ubuntu20_04",
"22.04": "ubuntu20_04",
"24.04": "ubuntu20_04",
"15.6": "sles15sp6",
"22.04": "ubuntu22_04",
}
binary_paths = []
@@ -1008,8 +1006,6 @@ def mibench(args, mspec):
dir
+ "-"
+ distro_map[target_binary["distro"]]
+ "-"
+ mspec.gpu_series.lower()
+ "-rocm"
+ target_binary["rocm_ver"]
)
@@ -542,7 +542,7 @@ def test_path(binary_handler_profile_rocprof_compute):
@pytest.mark.misc
def test_roof_kernel_names(binary_handler_profile_rocprof_compute):
if soc in ("MI100", "MI350"):
if soc in ("MI100"):
# roofline is not supported on MI100
assert True
# Do not continue testing
@@ -557,7 +557,9 @@ def test_roof_kernel_names(binary_handler_profile_rocprof_compute):
# assert successful run
assert returncode == 0
file_dict = test_utils.check_csv_files(workload_dir, 1, num_kernels)
if soc == "MI200" in soc or "MI300" in soc:
if soc == "MI100":
assert sorted(list(file_dict.keys())) == ALL_CSVS_MI100
else:
assert sorted(list(file_dict.keys())) == sorted(
(
[f for f in ROOF_ONLY_FILES if f != "timestamps.csv"]
@@ -566,8 +568,6 @@ def test_roof_kernel_names(binary_handler_profile_rocprof_compute):
)
+ ["kernelName_legend.pdf"]
)
else:
assert sorted(list(file_dict.keys())) == ALL_CSVS_MI100
validate(
inspect.stack()[0][3],
@@ -1816,7 +1816,7 @@ def test_join_type_kernel(binary_handler_profile_rocprof_compute):
@pytest.mark.sort
def test_roof_sort_dispatches(binary_handler_profile_rocprof_compute):
# only test 1 device for roofline
if soc in ("MI100", "MI350"):
if soc in ("MI100"):
# roofline is not supported on MI100
assert True
# Do not continue testing
@@ -1851,7 +1851,7 @@ def test_roof_sort_dispatches(binary_handler_profile_rocprof_compute):
@pytest.mark.sort
def test_roof_sort_kernels(binary_handler_profile_rocprof_compute):
# only test 1 device for roofline
if soc in ("MI100", "MI350"):
if soc in ("MI100"):
# roofline is not supported on MI100
assert True
# Do not continue testing
@@ -1886,7 +1886,7 @@ def test_roof_sort_kernels(binary_handler_profile_rocprof_compute):
@pytest.mark.mem
def test_roof_mem_levels_vL1D(binary_handler_profile_rocprof_compute):
# only test 1 device for roofline
if soc in ("MI100", "MI350"):
if soc in ("MI100"):
# roofline is not supported on MI100
assert True
# Do not continue testing
@@ -1921,7 +1921,7 @@ def test_roof_mem_levels_vL1D(binary_handler_profile_rocprof_compute):
@pytest.mark.mem
def test_roof_mem_levels_LDS(binary_handler_profile_rocprof_compute):
# only test 1 device for roofline
if soc in ("MI100", "MI350"):
if soc in ("MI100"):
# roofline is not supported on MI100
assert True
# Do not continue testing