Enable roofline for MI350 series (#677)

Rework of roofline binaries generated from rocm-amdgpu-bench
- removed arch identifier in bin name
- removed rocm5 bins altogether

Updated required distros for roofline
- updated distro checks and bin naming
- moved up ubuntu20.04->22.04 and sles15.3->15.6 per rocm support

Enabled ctests for mi350 for test_roof_*
- removed mi350 series check to skip these specific tests

---------

Signed-off-by: Carrie Fallows <Carrie.Fallows@amd.com>

[ROCm/rocprofiler-compute commit: 41e73650d5]
Этот коммит содержится в:
cfallows-amd
2025-04-28 16:08:23 -04:00
коммит произвёл GitHub
родитель 668402042c
Коммит c8d67fc97e
19 изменённых файлов: 35 добавлений и 31 удалений
+6
Просмотреть файл
@@ -45,6 +45,8 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs.
* Roofline support for RHEL 10
* Roofline support for MI350 series architecture
### Changed
* Change the default rocprof version to v3 when environment variable "ROCPROF" is not set
@@ -74,6 +76,10 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs.
* Analysis of new workloads might require providing shader/memory clock speed using
--specs-correction operation if `amd-smi` or `rocminfo` does not provide clock speeds.
### Removed
* Roofline support for Ubuntu 20.04 and SLES below 15.6
## ROCm Compute Profiler 3.1.0 for ROCm 6.4.0
### Added
+1 -1
Просмотреть файл
@@ -89,7 +89,7 @@ high level.
* :ref:`Memory Chart Analysis panel <grafana-panel-memory-chart-analysis>`
* :ref:`Roofline Analysis panel <grafana-panel-roofline-analysis>`
(*Supported on MI200 and MI300 only, Ubuntu 20.04/22.04/24.04, SLES 15 SP3 or RHEL8*)
(*Supported on MI200 and above architectures only)
* :ref:`Command Processor (CP) panel <grafana-panel-cp>`
@@ -0,0 +1,12 @@
pmc: SQ_INSTS_VALU_ADD_F16 SQ_INSTS_VALU_MUL_F16 SQ_INSTS_VALU_FMA_F16 SQ_INSTS_VALU_TRANS_F16 SQ_INSTS_VALU_ADD_F32 SQ_INSTS_VALU_MUL_F32 SQ_INSTS_VALU_FMA_F32 SQ_INSTS_VALU_TRANS_F32
pmc: SQ_INSTS_VALU_ADD_F64 SQ_INSTS_VALU_MUL_F64 SQ_INSTS_VALU_FMA_F64 SQ_INSTS_VALU_TRANS_F64 SQ_INSTS_VALU_MFMA_MOPS_F16 SQ_INSTS_VALU_MFMA_MOPS_BF16 SQ_INSTS_VALU_MFMA_MOPS_F32 SQ_INSTS_VALU_MFMA_MOPS_F64 TCP_TCC_READ_REQ_sum TCC_EA0_RDREQ_32B_sum TCC_EA0_RDREQ_sum TCC_EA0_WRREQ_64B_sum TCC_EA0_WRREQ_sum TCC_BUBBLE_sum
pmc: SQ_LDS_IDX_ACTIVE SQ_LDS_BANK_CONFLICT TCP_TOTAL_CACHE_ACCESSES_sum TCP_TCC_WRITE_REQ_sum TCP_TCC_ATOMIC_WITH_RET_REQ_sum TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum
#ROOF:MI300 series and above
pmc: SQ_INSTS_VALU_MFMA_MOPS_F8
range:
gpu:
kernels:
-11
Просмотреть файл
@@ -46,17 +46,6 @@ class gfx950_soc(OmniSoC_Base):
)
)
)
else:
# NB: We're using generalized Mi300 perfmon configs
self.set_perfmon_dir(
str(
Path(str(config.rocprof_compute_home)).joinpath(
"rocprof_compute_soc",
"profile_configs",
"gfx950",
)
)
)
self.set_compatible_profilers(["rocprofv3"])
# Per IP block max number of simultaneous counters. GFX IP Blocks
self.set_perfmon_config(
+1
Просмотреть файл
@@ -48,6 +48,7 @@ SUPPORTED_DATATYPES = {
"gfx940": ["FP8", "FP16", "BF16", "FP32", "FP64", "I8", "I32", "I64"], # Unsupported:
"gfx941": ["FP8", "FP16", "BF16", "FP32", "FP64", "I8", "I32", "I64"], # Unsupported:
"gfx942": ["FP8", "FP16", "BF16", "FP32", "FP64", "I8", "I32", "I64"], # Unsupported:
"gfx950": ["FP8", "FP16", "BF16", "FP32", "FP64", "I8", "I32", "I64"], # Unsupported:
}
PEAK_OPS_DATATYPES = ["FP8", "FP16", "BF16", "FP32", "FP64", "I8", "I32", "I64"]
Двоичный файл не отображается.
Двоичный файл не отображается.
Двоичный файл не отображается.
Двоичные данные
Двоичный файл не отображается.
Двоичный файл не отображается.
Двоичный файл не отображается.
Двоичный файл не отображается.
Двоичные данные
Двоичный файл не отображается.
Двоичный файл не отображается.
Двоичный файл не отображается.
Двоичный файл не отображается.
Двоичные данные
Двоичный файл не отображается.
+7 -11
Просмотреть файл
@@ -939,14 +939,14 @@ def detect_roofline(mspec):
elif (
(type(sles_distro) == str and len(sles_distro) >= 3)
and sles_distro[:2] == "15" # confirm string and len
and int(sles_distro[3]) >= 3 # SLES15 and SP >= 3
and int(sles_distro[3]) >= 6 # SLES15 and SP >= 6
):
# Must be a valid SLES machine
# Use SP3 binary for all forward compatible service pack versions
distro = "15.3"
elif ubuntu_distro == "20.04" or ubuntu_distro == "22.04" or ubuntu_distro == "24.04":
# Use SP6 binary for all forward compatible service pack versions
distro = "15.6"
elif ubuntu_distro == "22.04" or ubuntu_distro == "24.04":
# Must be a valid Ubuntu machine
distro = ubuntu_distro
distro = "22.04"
else:
console_error("roofline", "Cannot find a valid binary for your operating system")
@@ -984,10 +984,8 @@ def mibench(args, mspec):
distro_map = {
"platform:el8": "rhel8",
"15.3": "sles15sp5",
"20.04": "ubuntu20_04",
"22.04": "ubuntu20_04",
"24.04": "ubuntu20_04",
"15.6": "sles15sp6",
"22.04": "ubuntu22_04",
}
binary_paths = []
@@ -1008,8 +1006,6 @@ def mibench(args, mspec):
dir
+ "-"
+ distro_map[target_binary["distro"]]
+ "-"
+ mspec.gpu_series.lower()
+ "-rocm"
+ target_binary["rocm_ver"]
)
+8 -8
Просмотреть файл
@@ -542,7 +542,7 @@ def test_path(binary_handler_profile_rocprof_compute):
@pytest.mark.misc
def test_roof_kernel_names(binary_handler_profile_rocprof_compute):
if soc in ("MI100", "MI350"):
if soc in ("MI100"):
# roofline is not supported on MI100
assert True
# Do not continue testing
@@ -557,7 +557,9 @@ def test_roof_kernel_names(binary_handler_profile_rocprof_compute):
# assert successful run
assert returncode == 0
file_dict = test_utils.check_csv_files(workload_dir, 1, num_kernels)
if soc == "MI200" in soc or "MI300" in soc:
if soc == "MI100":
assert sorted(list(file_dict.keys())) == ALL_CSVS_MI100
else:
assert sorted(list(file_dict.keys())) == sorted(
(
[f for f in ROOF_ONLY_FILES if f != "timestamps.csv"]
@@ -566,8 +568,6 @@ def test_roof_kernel_names(binary_handler_profile_rocprof_compute):
)
+ ["kernelName_legend.pdf"]
)
else:
assert sorted(list(file_dict.keys())) == ALL_CSVS_MI100
validate(
inspect.stack()[0][3],
@@ -1816,7 +1816,7 @@ def test_join_type_kernel(binary_handler_profile_rocprof_compute):
@pytest.mark.sort
def test_roof_sort_dispatches(binary_handler_profile_rocprof_compute):
# only test 1 device for roofline
if soc in ("MI100", "MI350"):
if soc in ("MI100"):
# roofline is not supported on MI100
assert True
# Do not continue testing
@@ -1851,7 +1851,7 @@ def test_roof_sort_dispatches(binary_handler_profile_rocprof_compute):
@pytest.mark.sort
def test_roof_sort_kernels(binary_handler_profile_rocprof_compute):
# only test 1 device for roofline
if soc in ("MI100", "MI350"):
if soc in ("MI100"):
# roofline is not supported on MI100
assert True
# Do not continue testing
@@ -1886,7 +1886,7 @@ def test_roof_sort_kernels(binary_handler_profile_rocprof_compute):
@pytest.mark.mem
def test_roof_mem_levels_vL1D(binary_handler_profile_rocprof_compute):
# only test 1 device for roofline
if soc in ("MI100", "MI350"):
if soc in ("MI100"):
# roofline is not supported on MI100
assert True
# Do not continue testing
@@ -1921,7 +1921,7 @@ def test_roof_mem_levels_vL1D(binary_handler_profile_rocprof_compute):
@pytest.mark.mem
def test_roof_mem_levels_LDS(binary_handler_profile_rocprof_compute):
# only test 1 device for roofline
if soc in ("MI100", "MI350"):
if soc in ("MI100"):
# roofline is not supported on MI100
assert True
# Do not continue testing