diff --git a/projects/rocprofiler-compute/CHANGELOG.md b/projects/rocprofiler-compute/CHANGELOG.md index 459288e14c..bb1a80a58f 100644 --- a/projects/rocprofiler-compute/CHANGELOG.md +++ b/projects/rocprofiler-compute/CHANGELOG.md @@ -66,6 +66,8 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs. ### Changed +* Change the default rocprof version to rocprofv3, this is used when environment variable "ROCPROF" is not set +* Change the rocprof version for unit tests to rocprofv3 on all SoCs except MI100 * Change normal_unit default to per_kernel * Change dependency from rocm-smi to amd-smi * Decrease profiling time by not collecting counters not used in post analysis @@ -83,6 +85,7 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs. ### Known issues +* Profiling on MI 100 will not work unless ROCPROF=rocprofv1 environment variable is explictly provided * GPU id filtering is not supported when using rocprof v3 * Analysis of previously collected workload data will not work due to sysinfo.csv schema change diff --git a/projects/rocprofiler-compute/docs/reference/compatible-accelerators.rst b/projects/rocprofiler-compute/docs/reference/compatible-accelerators.rst index 65a3f70a1b..3dd2548f97 100644 --- a/projects/rocprofiler-compute/docs/reference/compatible-accelerators.rst +++ b/projects/rocprofiler-compute/docs/reference/compatible-accelerators.rst @@ -23,6 +23,9 @@ GPU specifications. * - Platform - Status + * - AMD Instinct™ MI350 + - Supported ✅ + * - AMD Instinct™ MI300 - Supported ✅ diff --git a/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_gfx906.py b/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_gfx906.py deleted file mode 100644 index d0c34a1c9f..0000000000 --- a/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_gfx906.py +++ /dev/null @@ -1,63 +0,0 @@ -##############################################################################bl -# MIT License -# -# Copyright (c) 2021 - 2025 Advanced Micro Devices, Inc. All Rights Reserved. -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. -##############################################################################el - -from rocprof_compute_soc.soc_base import OmniSoC_Base -from utils.logger import console_error, demarcate -from utils.mi_gpu_spec import mi_gpu_specs - - -class gfx906_soc(OmniSoC_Base): - def __init__(self, args, mspec): - super().__init__(args, mspec) - self.set_arch("gfx906") - self.set_compatible_profilers(["rocprofv1"]) - # Per IP block max number of simultaneous counters. GFX IP Blocks - self.set_perfmon_config({mi_gpu_specs.get_perfmon_config("gfx906")}) - - # Set arch specific specs - self._mspec._l2_banks = 16 - self._mspec.lds_banks_per_cu = 32 - self._mspec.pipes_per_gpu = 4 - - # ----------------------- - # Required child methods - # ----------------------- - @demarcate - def profiling_setup(self): - """Perform any SoC-specific setup prior to profiling.""" - super().profiling_setup() - if self.get_args().roof_only: - console_error("%s does not support roofline analysis" % self.get_arch()) - # Perfmon filtering - self.perfmon_filter(self.get_args().roof_only) - - @demarcate - def post_profiling(self): - """Perform any SoC-specific post profiling activities.""" - super().post_profiling() - - @demarcate - def analysis_setup(self): - """Perform any SoC-specific setup prior to analysis.""" - super().analysis_setup() diff --git a/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_gfx908.py b/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_gfx908.py index 0cc6f0fdeb..b4cdfe42f4 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_gfx908.py +++ b/projects/rocprofiler-compute/src/rocprof_compute_soc/soc_gfx908.py @@ -35,7 +35,7 @@ class gfx908_soc(OmniSoC_Base): super().__init__(args, mspec) self.set_arch("gfx908") self.set_compatible_profilers( - ["rocprofv1", "rocprofv3", "rocprofiler-sdk"] + ["rocprofv1"] ) # Per IP block max number of simultaneous counters. GFX IP Blocks self.set_perfmon_config(mi_gpu_specs.get_perfmon_config("gfx908")) diff --git a/projects/rocprofiler-compute/src/utils/utils.py b/projects/rocprofiler-compute/src/utils/utils.py index 0a428432a2..43a12ae10b 100644 --- a/projects/rocprofiler-compute/src/utils/utils.py +++ b/projects/rocprofiler-compute/src/utils/utils.py @@ -323,7 +323,7 @@ def detect_rocprof(args): # detect rocprof if not "ROCPROF" in os.environ.keys(): # default rocprof - rocprof_cmd = "rocprof" + rocprof_cmd = "rocprofv3" else: rocprof_cmd = os.environ["ROCPROF"]