diff --git a/projects/rocprofiler-compute/CHANGELOG.md b/projects/rocprofiler-compute/CHANGELOG.md index 33042478fe..9b1bbdacc7 100644 --- a/projects/rocprofiler-compute/CHANGELOG.md +++ b/projects/rocprofiler-compute/CHANGELOG.md @@ -45,6 +45,8 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs. * Roofline support for RHEL 10 +* Roofline support for MI350 series architecture + ### Changed * Change the default rocprof version to v3 when environment variable "ROCPROF" is not set @@ -74,6 +76,10 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs. * Analysis of new workloads might require providing shader/memory clock speed using --specs-correction operation if `amd-smi` or `rocminfo` does not provide clock speeds. +### Removed + +* Roofline support for Ubuntu 20.04 and SLES below 15.6 + ## ROCm Compute Profiler 3.1.0 for ROCm 6.4.0 ### Added diff --git a/projects/rocprofiler-compute/docs/what-is-rocprof-compute.rst b/projects/rocprofiler-compute/docs/what-is-rocprof-compute.rst index bba1b24e6d..b40b1ad10d 100644 --- a/projects/rocprofiler-compute/docs/what-is-rocprof-compute.rst +++ b/projects/rocprofiler-compute/docs/what-is-rocprof-compute.rst @@ -89,7 +89,7 @@ high level. * :ref:`Memory Chart Analysis panel ` * :ref:`Roofline Analysis panel ` - (*Supported on MI200 and MI300 only, Ubuntu 20.04/22.04/24.04, SLES 15 SP3 or RHEL8*) + (*Supported on MI200 and above architectures only) * :ref:`Command Processor (CP) panel ` diff --git a/projects/rocprofiler-compute/src/rocprof_compute_soc/profile_configs/gfx950/roofline/pmc_roof_perf.txt b/projects/rocprofiler-compute/src/rocprof_compute_soc/profile_configs/gfx950/roofline/pmc_roof_perf.txt new file mode 100644 index 0000000000..b689b5534e --- /dev/null +++ b/projects/rocprofiler-compute/src/rocprof_compute_soc/profile_configs/gfx950/roofline/pmc_roof_perf.txt @@ -0,0 +1,12 @@ +pmc: SQ_INSTS_VALU_ADD_F16 SQ_INSTS_VALU_MUL_F16 SQ_INSTS_VALU_FMA_F16 SQ_INSTS_VALU_TRANS_F16 SQ_INSTS_VALU_ADD_F32 SQ_INSTS_VALU_MUL_F32 SQ_INSTS_VALU_FMA_F32 SQ_INSTS_VALU_TRANS_F32 + +pmc: SQ_INSTS_VALU_ADD_F64 SQ_INSTS_VALU_MUL_F64 SQ_INSTS_VALU_FMA_F64 SQ_INSTS_VALU_TRANS_F64 SQ_INSTS_VALU_MFMA_MOPS_F16 SQ_INSTS_VALU_MFMA_MOPS_BF16 SQ_INSTS_VALU_MFMA_MOPS_F32 SQ_INSTS_VALU_MFMA_MOPS_F64 TCP_TCC_READ_REQ_sum TCC_EA0_RDREQ_32B_sum TCC_EA0_RDREQ_sum TCC_EA0_WRREQ_64B_sum TCC_EA0_WRREQ_sum TCC_BUBBLE_sum + +pmc: SQ_LDS_IDX_ACTIVE SQ_LDS_BANK_CONFLICT TCP_TOTAL_CACHE_ACCESSES_sum TCP_TCC_WRITE_REQ_sum TCP_TCC_ATOMIC_WITH_RET_REQ_sum TCP_TCC_ATOMIC_WITHOUT_RET_REQ_sum + +#ROOF:MI300 series and above +pmc: SQ_INSTS_VALU_MFMA_MOPS_F8 + +range: +gpu: +kernels: diff --git a/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_gfx950.py b/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_gfx950.py index e3e3a1bdfb..73ab17651d 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_gfx950.py +++ b/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_gfx950.py @@ -46,17 +46,6 @@ class gfx950_soc(OmniSoC_Base): ) ) ) - else: - # NB: We're using generalized Mi300 perfmon configs - self.set_perfmon_dir( - str( - Path(str(config.rocprof_compute_home)).joinpath( - "rocprof_compute_soc", - "profile_configs", - "gfx950", - ) - ) - ) self.set_compatible_profilers(["rocprofv3"]) # Per IP block max number of simultaneous counters. GFX IP Blocks self.set_perfmon_config( diff --git a/projects/rocprofiler-compute/src/utils/roofline_calc.py b/projects/rocprofiler-compute/src/utils/roofline_calc.py index 089eed0e1c..57227fd3bb 100644 --- a/projects/rocprofiler-compute/src/utils/roofline_calc.py +++ b/projects/rocprofiler-compute/src/utils/roofline_calc.py @@ -48,6 +48,7 @@ SUPPORTED_DATATYPES = { "gfx940": ["FP8", "FP16", "BF16", "FP32", "FP64", "I8", "I32", "I64"], # Unsupported: "gfx941": ["FP8", "FP16", "BF16", "FP32", "FP64", "I8", "I32", "I64"], # Unsupported: "gfx942": ["FP8", "FP16", "BF16", "FP32", "FP64", "I8", "I32", "I64"], # Unsupported: + "gfx950": ["FP8", "FP16", "BF16", "FP32", "FP64", "I8", "I32", "I64"], # Unsupported: } PEAK_OPS_DATATYPES = ["FP8", "FP16", "BF16", "FP32", "FP64", "I8", "I32", "I64"] diff --git a/projects/rocprofiler-compute/src/utils/rooflines/roofline-rhel8-mi200-rocm5 b/projects/rocprofiler-compute/src/utils/rooflines/roofline-rhel8-mi200-rocm5 deleted file mode 100755 index fc293bf186..0000000000 Binary files a/projects/rocprofiler-compute/src/utils/rooflines/roofline-rhel8-mi200-rocm5 and /dev/null differ diff --git a/projects/rocprofiler-compute/src/utils/rooflines/roofline-rhel8-mi200-rocm6 b/projects/rocprofiler-compute/src/utils/rooflines/roofline-rhel8-mi200-rocm6 deleted file mode 100755 index b4f466bc5c..0000000000 Binary files a/projects/rocprofiler-compute/src/utils/rooflines/roofline-rhel8-mi200-rocm6 and /dev/null differ diff --git a/projects/rocprofiler-compute/src/utils/rooflines/roofline-rhel8-mi300-rocm6 b/projects/rocprofiler-compute/src/utils/rooflines/roofline-rhel8-mi300-rocm6 deleted file mode 100755 index b4f466bc5c..0000000000 Binary files a/projects/rocprofiler-compute/src/utils/rooflines/roofline-rhel8-mi300-rocm6 and /dev/null differ diff --git a/projects/rocprofiler-compute/src/utils/rooflines/roofline-rhel8-rocm6 b/projects/rocprofiler-compute/src/utils/rooflines/roofline-rhel8-rocm6 new file mode 100755 index 0000000000..50ff00e825 Binary files /dev/null and b/projects/rocprofiler-compute/src/utils/rooflines/roofline-rhel8-rocm6 differ diff --git a/projects/rocprofiler-compute/src/utils/rooflines/roofline-sles15sp5-mi200-rocm5 b/projects/rocprofiler-compute/src/utils/rooflines/roofline-sles15sp5-mi200-rocm5 deleted file mode 100755 index 20ff63b24f..0000000000 Binary files a/projects/rocprofiler-compute/src/utils/rooflines/roofline-sles15sp5-mi200-rocm5 and /dev/null differ diff --git a/projects/rocprofiler-compute/src/utils/rooflines/roofline-sles15sp5-mi200-rocm6 b/projects/rocprofiler-compute/src/utils/rooflines/roofline-sles15sp5-mi200-rocm6 deleted file mode 100755 index 7351498fac..0000000000 Binary files a/projects/rocprofiler-compute/src/utils/rooflines/roofline-sles15sp5-mi200-rocm6 and /dev/null differ diff --git a/projects/rocprofiler-compute/src/utils/rooflines/roofline-sles15sp5-mi300-rocm6 b/projects/rocprofiler-compute/src/utils/rooflines/roofline-sles15sp5-mi300-rocm6 deleted file mode 100755 index 7351498fac..0000000000 Binary files a/projects/rocprofiler-compute/src/utils/rooflines/roofline-sles15sp5-mi300-rocm6 and /dev/null differ diff --git a/projects/rocprofiler-compute/src/utils/rooflines/roofline-sles15sp6-rocm6 b/projects/rocprofiler-compute/src/utils/rooflines/roofline-sles15sp6-rocm6 new file mode 100755 index 0000000000..e890902115 Binary files /dev/null and b/projects/rocprofiler-compute/src/utils/rooflines/roofline-sles15sp6-rocm6 differ diff --git a/projects/rocprofiler-compute/src/utils/rooflines/roofline-ubuntu20_04-mi200-rocm5 b/projects/rocprofiler-compute/src/utils/rooflines/roofline-ubuntu20_04-mi200-rocm5 deleted file mode 100755 index 35dcedb975..0000000000 Binary files a/projects/rocprofiler-compute/src/utils/rooflines/roofline-ubuntu20_04-mi200-rocm5 and /dev/null differ diff --git a/projects/rocprofiler-compute/src/utils/rooflines/roofline-ubuntu20_04-mi200-rocm6 b/projects/rocprofiler-compute/src/utils/rooflines/roofline-ubuntu20_04-mi200-rocm6 deleted file mode 100755 index de3f68a74f..0000000000 Binary files a/projects/rocprofiler-compute/src/utils/rooflines/roofline-ubuntu20_04-mi200-rocm6 and /dev/null differ diff --git a/projects/rocprofiler-compute/src/utils/rooflines/roofline-ubuntu20_04-mi300-rocm6 b/projects/rocprofiler-compute/src/utils/rooflines/roofline-ubuntu20_04-mi300-rocm6 deleted file mode 100755 index de3f68a74f..0000000000 Binary files a/projects/rocprofiler-compute/src/utils/rooflines/roofline-ubuntu20_04-mi300-rocm6 and /dev/null differ diff --git a/projects/rocprofiler-compute/src/utils/rooflines/roofline-ubuntu22_04-rocm6 b/projects/rocprofiler-compute/src/utils/rooflines/roofline-ubuntu22_04-rocm6 new file mode 100755 index 0000000000..1f1d57b7f1 Binary files /dev/null and b/projects/rocprofiler-compute/src/utils/rooflines/roofline-ubuntu22_04-rocm6 differ diff --git a/projects/rocprofiler-compute/src/utils/utils.py b/projects/rocprofiler-compute/src/utils/utils.py index b77f916ac9..b6b382ce18 100644 --- a/projects/rocprofiler-compute/src/utils/utils.py +++ b/projects/rocprofiler-compute/src/utils/utils.py @@ -939,14 +939,14 @@ def detect_roofline(mspec): elif ( (type(sles_distro) == str and len(sles_distro) >= 3) and sles_distro[:2] == "15" # confirm string and len - and int(sles_distro[3]) >= 3 # SLES15 and SP >= 3 + and int(sles_distro[3]) >= 6 # SLES15 and SP >= 6 ): # Must be a valid SLES machine - # Use SP3 binary for all forward compatible service pack versions - distro = "15.3" - elif ubuntu_distro == "20.04" or ubuntu_distro == "22.04" or ubuntu_distro == "24.04": + # Use SP6 binary for all forward compatible service pack versions + distro = "15.6" + elif ubuntu_distro == "22.04" or ubuntu_distro == "24.04": # Must be a valid Ubuntu machine - distro = ubuntu_distro + distro = "22.04" else: console_error("roofline", "Cannot find a valid binary for your operating system") @@ -984,10 +984,8 @@ def mibench(args, mspec): distro_map = { "platform:el8": "rhel8", - "15.3": "sles15sp5", - "20.04": "ubuntu20_04", - "22.04": "ubuntu20_04", - "24.04": "ubuntu20_04", + "15.6": "sles15sp6", + "22.04": "ubuntu22_04", } binary_paths = [] @@ -1008,8 +1006,6 @@ def mibench(args, mspec): dir + "-" + distro_map[target_binary["distro"]] - + "-" - + mspec.gpu_series.lower() + "-rocm" + target_binary["rocm_ver"] ) diff --git a/projects/rocprofiler-compute/tests/test_profile_general.py b/projects/rocprofiler-compute/tests/test_profile_general.py index 944204d1d8..fdb06acd23 100644 --- a/projects/rocprofiler-compute/tests/test_profile_general.py +++ b/projects/rocprofiler-compute/tests/test_profile_general.py @@ -542,7 +542,7 @@ def test_path(binary_handler_profile_rocprof_compute): @pytest.mark.misc def test_roof_kernel_names(binary_handler_profile_rocprof_compute): - if soc in ("MI100", "MI350"): + if soc in ("MI100"): # roofline is not supported on MI100 assert True # Do not continue testing @@ -557,7 +557,9 @@ def test_roof_kernel_names(binary_handler_profile_rocprof_compute): # assert successful run assert returncode == 0 file_dict = test_utils.check_csv_files(workload_dir, 1, num_kernels) - if soc == "MI200" in soc or "MI300" in soc: + if soc == "MI100": + assert sorted(list(file_dict.keys())) == ALL_CSVS_MI100 + else: assert sorted(list(file_dict.keys())) == sorted( ( [f for f in ROOF_ONLY_FILES if f != "timestamps.csv"] @@ -566,8 +568,6 @@ def test_roof_kernel_names(binary_handler_profile_rocprof_compute): ) + ["kernelName_legend.pdf"] ) - else: - assert sorted(list(file_dict.keys())) == ALL_CSVS_MI100 validate( inspect.stack()[0][3], @@ -1816,7 +1816,7 @@ def test_join_type_kernel(binary_handler_profile_rocprof_compute): @pytest.mark.sort def test_roof_sort_dispatches(binary_handler_profile_rocprof_compute): # only test 1 device for roofline - if soc in ("MI100", "MI350"): + if soc in ("MI100"): # roofline is not supported on MI100 assert True # Do not continue testing @@ -1851,7 +1851,7 @@ def test_roof_sort_dispatches(binary_handler_profile_rocprof_compute): @pytest.mark.sort def test_roof_sort_kernels(binary_handler_profile_rocprof_compute): # only test 1 device for roofline - if soc in ("MI100", "MI350"): + if soc in ("MI100"): # roofline is not supported on MI100 assert True # Do not continue testing @@ -1886,7 +1886,7 @@ def test_roof_sort_kernels(binary_handler_profile_rocprof_compute): @pytest.mark.mem def test_roof_mem_levels_vL1D(binary_handler_profile_rocprof_compute): # only test 1 device for roofline - if soc in ("MI100", "MI350"): + if soc in ("MI100"): # roofline is not supported on MI100 assert True # Do not continue testing @@ -1921,7 +1921,7 @@ def test_roof_mem_levels_vL1D(binary_handler_profile_rocprof_compute): @pytest.mark.mem def test_roof_mem_levels_LDS(binary_handler_profile_rocprof_compute): # only test 1 device for roofline - if soc in ("MI100", "MI350"): + if soc in ("MI100"): # roofline is not supported on MI100 assert True # Do not continue testing