Enable roofline for MI350 series (#677)
Rework of roofline binaries generated from rocm-amdgpu-bench
- removed arch identifier in bin name
- removed rocm5 bins altogether
Updated required distros for roofline
- updated distro checks and bin naming
- moved up ubuntu20.04->22.04 and sles15.3->15.6 per rocm support
Enabled ctests for mi350 for test_roof_*
- removed mi350 series check to skip these specific tests
---------
Signed-off-by: Carrie Fallows <Carrie.Fallows@amd.com>
[ROCm/rocprofiler-compute commit: 41e73650d5]
This commit is contained in:
committed by
GitHub
orang tua
668402042c
melakukan
c8d67fc97e
@@ -45,6 +45,8 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs.
|
||||
|
||||
* Roofline support for RHEL 10
|
||||
|
||||
* Roofline support for MI350 series architecture
|
||||
|
||||
### Changed
|
||||
|
||||
* Change the default rocprof version to v3 when environment variable "ROCPROF" is not set
|
||||
@@ -74,6 +76,10 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs.
|
||||
* Analysis of new workloads might require providing shader/memory clock speed using
|
||||
--specs-correction operation if `amd-smi` or `rocminfo` does not provide clock speeds.
|
||||
|
||||
### Removed
|
||||
|
||||
* Roofline support for Ubuntu 20.04 and SLES below 15.6
|
||||
|
||||
## ROCm Compute Profiler 3.1.0 for ROCm 6.4.0
|
||||
|
||||
### Added
|
||||
|
||||
@@ -89,7 +89,7 @@ high level.
|
||||
* :ref:`Memory Chart Analysis panel <grafana-panel-memory-chart-analysis>`
|
||||
|
||||
* :ref:`Roofline Analysis panel <grafana-panel-roofline-analysis>`
|
||||
(*Supported on MI200 and MI300 only, Ubuntu 20.04/22.04/24.04, SLES 15 SP3 or RHEL8*)
|
||||
(*Supported on MI200 and above architectures only)
|
||||
|
||||
* :ref:`Command Processor (CP) panel <grafana-panel-cp>`
|
||||
|
||||
|
||||
+12
@@ -0,0 +1,12 @@
|
||||
pmc: SQ_INSTS_VALU_ADD_F16 SQ_INSTS_VALU_MUL_F16 SQ_INSTS_VALU_FMA_F16 SQ_INSTS_VALU_TRANS_F16 SQ_INSTS_VALU_ADD_F32 SQ_INSTS_VALU_MUL_F32 SQ_INSTS_VALU_FMA_F32 SQ_INSTS_VALU_TRANS_F32
|
||||
|
||||
pmc: SQ_INSTS_VALU_ADD_F64 SQ_INSTS_VALU_MUL_F64 SQ_INSTS_VALU_FMA_F64 SQ_INSTS_VALU_TRANS_F64 SQ_INSTS_VALU_MFMA_MOPS_F16 SQ_INSTS_VALU_MFMA_MOPS_BF16 SQ_INSTS_VALU_MFMA_MOPS_F32 SQ_INSTS_VALU_MFMA_MOPS_F64 TCP_TCC_READ_REQ_sum TCC_EA0_RDREQ_32B_sum TCC_EA0_RDREQ_sum TCC_EA0_WRREQ_64B_sum TCC_EA0_WRREQ_sum TCC_BUBBLE_sum
|
||||
|
||||
pmc: SQ_LDS_IDX_ACTIVE SQ_LDS_BANK_CONFLICT TCP_TOTAL_CACHE_ACCESSES_sum TCP_TCC_WRITE_REQ_sum TCP_TCC_ATOMIC_WITH_RET_REQ_sum TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum
|
||||
|
||||
#ROOF:MI300 series and above
|
||||
pmc: SQ_INSTS_VALU_MFMA_MOPS_F8
|
||||
|
||||
range:
|
||||
gpu:
|
||||
kernels:
|
||||
@@ -46,17 +46,6 @@ class gfx950_soc(OmniSoC_Base):
|
||||
)
|
||||
)
|
||||
)
|
||||
else:
|
||||
# NB: We're using generalized Mi300 perfmon configs
|
||||
self.set_perfmon_dir(
|
||||
str(
|
||||
Path(str(config.rocprof_compute_home)).joinpath(
|
||||
"rocprof_compute_soc",
|
||||
"profile_configs",
|
||||
"gfx950",
|
||||
)
|
||||
)
|
||||
)
|
||||
self.set_compatible_profilers(["rocprofv3"])
|
||||
# Per IP block max number of simultaneous counters. GFX IP Blocks
|
||||
self.set_perfmon_config(
|
||||
|
||||
@@ -48,6 +48,7 @@ SUPPORTED_DATATYPES = {
|
||||
"gfx940": ["FP8", "FP16", "BF16", "FP32", "FP64", "I8", "I32", "I64"], # Unsupported:
|
||||
"gfx941": ["FP8", "FP16", "BF16", "FP32", "FP64", "I8", "I32", "I64"], # Unsupported:
|
||||
"gfx942": ["FP8", "FP16", "BF16", "FP32", "FP64", "I8", "I32", "I64"], # Unsupported:
|
||||
"gfx950": ["FP8", "FP16", "BF16", "FP32", "FP64", "I8", "I32", "I64"], # Unsupported:
|
||||
}
|
||||
|
||||
PEAK_OPS_DATATYPES = ["FP8", "FP16", "BF16", "FP32", "FP64", "I8", "I32", "I64"]
|
||||
|
||||
TEMPAT SAMPAH
Binary file not shown.
TEMPAT SAMPAH
Binary file not shown.
TEMPAT SAMPAH
Binary file not shown.
TEMPAT SAMPAH
Binary file not shown.
TEMPAT SAMPAH
Binary file not shown.
TEMPAT SAMPAH
Binary file not shown.
TEMPAT SAMPAH
Binary file not shown.
TEMPAT SAMPAH
Binary file not shown.
TEMPAT SAMPAH
Binary file not shown.
TEMPAT SAMPAH
Binary file not shown.
TEMPAT SAMPAH
Binary file not shown.
TEMPAT SAMPAH
Binary file not shown.
@@ -939,14 +939,14 @@ def detect_roofline(mspec):
|
||||
elif (
|
||||
(type(sles_distro) == str and len(sles_distro) >= 3)
|
||||
and sles_distro[:2] == "15" # confirm string and len
|
||||
and int(sles_distro[3]) >= 3 # SLES15 and SP >= 3
|
||||
and int(sles_distro[3]) >= 6 # SLES15 and SP >= 6
|
||||
):
|
||||
# Must be a valid SLES machine
|
||||
# Use SP3 binary for all forward compatible service pack versions
|
||||
distro = "15.3"
|
||||
elif ubuntu_distro == "20.04" or ubuntu_distro == "22.04" or ubuntu_distro == "24.04":
|
||||
# Use SP6 binary for all forward compatible service pack versions
|
||||
distro = "15.6"
|
||||
elif ubuntu_distro == "22.04" or ubuntu_distro == "24.04":
|
||||
# Must be a valid Ubuntu machine
|
||||
distro = ubuntu_distro
|
||||
distro = "22.04"
|
||||
else:
|
||||
console_error("roofline", "Cannot find a valid binary for your operating system")
|
||||
|
||||
@@ -984,10 +984,8 @@ def mibench(args, mspec):
|
||||
|
||||
distro_map = {
|
||||
"platform:el8": "rhel8",
|
||||
"15.3": "sles15sp5",
|
||||
"20.04": "ubuntu20_04",
|
||||
"22.04": "ubuntu20_04",
|
||||
"24.04": "ubuntu20_04",
|
||||
"15.6": "sles15sp6",
|
||||
"22.04": "ubuntu22_04",
|
||||
}
|
||||
|
||||
binary_paths = []
|
||||
@@ -1008,8 +1006,6 @@ def mibench(args, mspec):
|
||||
dir
|
||||
+ "-"
|
||||
+ distro_map[target_binary["distro"]]
|
||||
+ "-"
|
||||
+ mspec.gpu_series.lower()
|
||||
+ "-rocm"
|
||||
+ target_binary["rocm_ver"]
|
||||
)
|
||||
|
||||
@@ -542,7 +542,7 @@ def test_path(binary_handler_profile_rocprof_compute):
|
||||
|
||||
@pytest.mark.misc
|
||||
def test_roof_kernel_names(binary_handler_profile_rocprof_compute):
|
||||
if soc in ("MI100", "MI350"):
|
||||
if soc in ("MI100"):
|
||||
# roofline is not supported on MI100
|
||||
assert True
|
||||
# Do not continue testing
|
||||
@@ -557,7 +557,9 @@ def test_roof_kernel_names(binary_handler_profile_rocprof_compute):
|
||||
# assert successful run
|
||||
assert returncode == 0
|
||||
file_dict = test_utils.check_csv_files(workload_dir, 1, num_kernels)
|
||||
if soc == "MI200" in soc or "MI300" in soc:
|
||||
if soc == "MI100":
|
||||
assert sorted(list(file_dict.keys())) == ALL_CSVS_MI100
|
||||
else:
|
||||
assert sorted(list(file_dict.keys())) == sorted(
|
||||
(
|
||||
[f for f in ROOF_ONLY_FILES if f != "timestamps.csv"]
|
||||
@@ -566,8 +568,6 @@ def test_roof_kernel_names(binary_handler_profile_rocprof_compute):
|
||||
)
|
||||
+ ["kernelName_legend.pdf"]
|
||||
)
|
||||
else:
|
||||
assert sorted(list(file_dict.keys())) == ALL_CSVS_MI100
|
||||
|
||||
validate(
|
||||
inspect.stack()[0][3],
|
||||
@@ -1816,7 +1816,7 @@ def test_join_type_kernel(binary_handler_profile_rocprof_compute):
|
||||
@pytest.mark.sort
|
||||
def test_roof_sort_dispatches(binary_handler_profile_rocprof_compute):
|
||||
# only test 1 device for roofline
|
||||
if soc in ("MI100", "MI350"):
|
||||
if soc in ("MI100"):
|
||||
# roofline is not supported on MI100
|
||||
assert True
|
||||
# Do not continue testing
|
||||
@@ -1851,7 +1851,7 @@ def test_roof_sort_dispatches(binary_handler_profile_rocprof_compute):
|
||||
@pytest.mark.sort
|
||||
def test_roof_sort_kernels(binary_handler_profile_rocprof_compute):
|
||||
# only test 1 device for roofline
|
||||
if soc in ("MI100", "MI350"):
|
||||
if soc in ("MI100"):
|
||||
# roofline is not supported on MI100
|
||||
assert True
|
||||
# Do not continue testing
|
||||
@@ -1886,7 +1886,7 @@ def test_roof_sort_kernels(binary_handler_profile_rocprof_compute):
|
||||
@pytest.mark.mem
|
||||
def test_roof_mem_levels_vL1D(binary_handler_profile_rocprof_compute):
|
||||
# only test 1 device for roofline
|
||||
if soc in ("MI100", "MI350"):
|
||||
if soc in ("MI100"):
|
||||
# roofline is not supported on MI100
|
||||
assert True
|
||||
# Do not continue testing
|
||||
@@ -1921,7 +1921,7 @@ def test_roof_mem_levels_vL1D(binary_handler_profile_rocprof_compute):
|
||||
@pytest.mark.mem
|
||||
def test_roof_mem_levels_LDS(binary_handler_profile_rocprof_compute):
|
||||
# only test 1 device for roofline
|
||||
if soc in ("MI100", "MI350"):
|
||||
if soc in ("MI100"):
|
||||
# roofline is not supported on MI100
|
||||
assert True
|
||||
# Do not continue testing
|
||||
|
||||
Reference in New Issue
Block a user