diff --git a/projects/rocprofiler-compute/sample/mat_mul_max.hip b/projects/rocprofiler-compute/sample/mat_mul_max.hip index 87a30198b8..8645a9b80c 100644 --- a/projects/rocprofiler-compute/sample/mat_mul_max.hip +++ b/projects/rocprofiler-compute/sample/mat_mul_max.hip @@ -1,3 +1,30 @@ +/* +############################################################################## +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +############################################################################## +*/ + #include #include #include @@ -86,4 +113,4 @@ int main() { delete[] h_B; return 0; -} \ No newline at end of file +} diff --git a/projects/rocprofiler-compute/src/rocprof_compute_profile/profiler_base.py b/projects/rocprofiler-compute/src/rocprof_compute_profile/profiler_base.py index 49c6d989d4..1aeabcd653 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_profile/profiler_base.py +++ b/projects/rocprofiler-compute/src/rocprof_compute_profile/profiler_base.py @@ -503,12 +503,10 @@ class RocProfCompute_Base: console_error("Profiler not supported") # PC sampling data is only collected when block "21" is specified - print(args.filter_blocks) - if not ( - "21" in args.filter_blocks - and "pc_sampling" in args.filter_blocks - and self.__profiler in ("rocprofv3", "rocprofiler-sdk") - ): + if not "21" in args.filter_blocks: + console_warning( + "PC sampling data collection skipped as block 21 is not specified." + ) return total_runs = len(list(Path(args.path).glob("perfmon/*.txt"))) diff --git a/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx9_config_template.yaml b/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx9_config_template.yaml new file mode 100644 index 0000000000..877cecbc27 --- /dev/null +++ b/projects/rocprofiler-compute/src/rocprof_compute_soc/analysis_configs/gfx9_config_template.yaml @@ -0,0 +1,260 @@ +latest_arch: gfx950 +panels: +- file: top_stats.yaml + panel_id: 0 + panel_title: Top Stats + panel_alias: topstats + data_sources: + - type: raw_csv_table + id: 1 + title: Top Kernels + - type: raw_csv_table + id: 2 + title: Dispatch List +- file: system_info.yaml + panel_id: 1 + panel_title: System Info + panel_alias: sysinfo + data_sources: + - type: raw_csv_table + id: 1 + title: System Info +- file: system_speed_of_light.yaml + panel_id: 2 + panel_title: System Speed-of-Light + panel_alias: sol + data_sources: + - type: metric_table + id: 1 + title: System Speed-of-Light +- file: memory_chart.yaml + panel_id: 3 + panel_title: Memory Chart + panel_alias: memchart + data_sources: + - type: metric_table + id: 1 + title: Memory Chart +- file: roofline.yaml + panel_id: 4 + panel_title: Roofline + panel_alias: roof + data_sources: + - type: metric_table + id: 1 + title: Roofline Performance Rates + - type: metric_table + id: 2 + title: Roofline Plot Points +- file: command_processor_cpc_cpf.yaml + panel_id: 5 + panel_title: Command Processor (CPC/CPF) + panel_alias: cpc + data_sources: + - type: metric_table + id: 1 + title: Command processor fetcher (CPF) + - type: metric_table + id: 2 + title: Command processor packet processor (CPC) +- file: workgroup_manager_spi.yaml + panel_id: 6 + panel_title: Workgroup Manager (SPI) + panel_alias: spi + data_sources: + - type: metric_table + id: 1 + title: Workgroup manager utilizations + - type: metric_table + id: 2 + title: Workgroup Manager - Resource Allocation +- file: wavefront.yaml + panel_id: 7 + panel_title: Wavefront + panel_alias: wavefront + data_sources: + - type: metric_table + id: 1 + title: Wavefront Launch Stats + - type: metric_table + id: 2 + title: Wavefront Runtime Stats +- file: compute_units_instruction_mix.yaml + panel_id: 10 + panel_title: Compute Units - Instruction Mix + panel_alias: cu_ins + data_sources: + - type: metric_table + id: 1 + title: Overall Instruction Mix + - type: metric_table + id: 2 + title: VALU Arithmetic Instruction Mix + - type: metric_table + id: 3 + title: VMEM Instruction Mix + - type: metric_table + id: 4 + title: MFMA Arithmetic Instruction Mix +- file: compute_units_compute_pipeline.yaml + panel_id: 11 + panel_title: Compute Units - Compute Pipeline + panel_alias: cu_pipe + data_sources: + - type: metric_table + id: 1 + title: Compute Speed-of-Light + - type: metric_table + id: 2 + title: Pipeline Statistics + - type: metric_table + id: 3 + title: Arithmetic Operations +- file: local_data_share_lds.yaml + panel_id: 12 + panel_title: Local Data Share (LDS) + panel_alias: lds + data_sources: + - type: metric_table + id: 1 + title: LDS Speed-of-Light + - type: metric_table + id: 2 + title: LDS Statistics +- file: instruction_cache.yaml + panel_id: 13 + panel_title: Instruction Cache + panel_alias: ins_cache + data_sources: + - type: metric_table + id: 1 + title: L1I Speed-of-Light + - type: metric_table + id: 2 + title: L1I cache accesses + - type: metric_table + id: 3 + title: L1I <-> L2 interface +- file: scalar_l1_data_cache.yaml + panel_id: 14 + panel_title: Scalar L1 Data Cache + panel_alias: sl1d + data_sources: + - type: metric_table + id: 1 + title: Scalar L1D Speed-of-Light + - type: metric_table + id: 2 + title: Scalar L1D cache accesses + - type: metric_table + id: 3 + title: Scalar L1D Cache - L2 Interface +- file: address_processing_unit_and_data_return_path_ta_td.yaml + panel_id: 15 + panel_title: Address Processing Unit and Data Return Path (TA/TD) + panel_alias: tatd + data_sources: + - type: metric_table + id: 1 + title: Busy and stall metrics + - type: metric_table + id: 2 + title: Instruction counts + - type: metric_table + id: 3 + title: Spill and stack metrics + - type: metric_table + id: 4 + title: Vector L1 data-return path or Texture Data (TD) +- file: vector_l1_data_cache.yaml + panel_id: 16 + panel_title: Vector L1 Data Cache + panel_alias: vl1d + data_sources: + - type: metric_table + id: 1 + title: vL1D Speed-of-Light + - type: metric_table + id: 2 + title: vL1D cache stall metrics + - type: metric_table + id: 3 + title: vL1D cache access metrics + - type: metric_table + id: 4 + title: L1D - L2 Transactions + - type: metric_table + id: 5 + title: L1 Unified Translation Cache (UTCL1) + - type: metric_table + id: 6 + title: L1D Addr Translation Stalls +- file: l2_cache.yaml + panel_id: 17 + panel_title: L2 Cache + panel_alias: l2 + data_sources: + - type: metric_table + id: 1 + title: L2 Speed-of-Light + - type: metric_table + id: 2 + title: L2-Fabric interface metrics + - type: metric_table + id: 3 + title: L2 Cache Accesses + - type: metric_table + id: 4 + title: L2 Cache Stalls + - type: metric_table + id: 5 + title: L2 - Fabric Interface stalls + - type: metric_table + id: 6 + title: L2 - Fabric interface detailed metrics +- file: l2_cache_per_channel.yaml + panel_id: 18 + panel_title: L2 Cache (per Channel) + panel_alias: l2_per_channel + data_sources: + - type: metric_table + id: 1 + title: Aggregate Stats (All channels) + - type: metric_table + id: 2 + title: L2 Cache Hit Rate (pct) + - type: metric_table + id: 3 + title: L2 Requests (per normUnit) + - type: metric_table + id: 4 + title: L2 Requests (per normUnit) + - type: metric_table + id: 5 + title: L2-Fabric Requests (per normUnit) + - type: metric_table + id: 6 + title: L2-Fabric Read Latency (Cycles) + - type: metric_table + id: 7 + title: L2-Fabric Write and Atomic Latency (Cycles) + - type: metric_table + id: 8 + title: L2-Fabric Atomic Latency (Cycles) + - type: metric_table + id: 9 + title: L2-Fabric Read Stall (Cycles per normUnit) + - type: metric_table + id: 10 + title: L2-Fabric Write and Atomic Stall (Cycles per normUnit) + - type: metric_table + id: 12 + title: L2-Fabric (128B read requests per normUnit) +- file: pc_sampling.yaml + panel_id: 21 + panel_title: PC Sampling + panel_alias: pc_sampling + data_sources: + - type: pc_sampling_table + id: 1 + title: PC Sampling diff --git a/projects/rocprofiler-compute/src/utils/utils.py b/projects/rocprofiler-compute/src/utils/utils.py index deb562707d..2362f58a5f 100644 --- a/projects/rocprofiler-compute/src/utils/utils.py +++ b/projects/rocprofiler-compute/src/utils/utils.py @@ -1642,7 +1642,9 @@ def load_yaml(filepath: str) -> dict[str, Any]: def get_panel_alias() -> dict[str, str]: - panel_yaml = load_yaml("tools/config_management/gfx9_config_template.yaml") + panel_yaml = load_yaml( + f"{config.rocprof_compute_home}/rocprof_compute_soc/analysis_configs/gfx9_config_template.yaml" + ) return { panel["panel_alias"]: str(panel["panel_id"]) for panel in panel_yaml["panels"] }