From 9873460265c0dac10cecefc3262e41794693df2e Mon Sep 17 00:00:00 2001 From: vedithal-amd Date: Sat, 21 Jun 2025 08:48:25 -0400 Subject: [PATCH] Fix counter collection for MI 350 (#764) * Fix TCC counter collection bug * Fix accumulate counter collection * Fix formatting [ROCm/rocprofiler-compute commit: 5950a02ff6b8c43db9cb3c75fb2860a2cd198a00] --- .../rocprof_compute_profile/profiler_base.py | 26 ++++---- .../profile_configs/accum_counters.yaml | 5 ++ .../rocprofiler-compute/src/utils/utils.py | 64 ++++++++++++++++--- 3 files changed, 75 insertions(+), 20 deletions(-) diff --git a/projects/rocprofiler-compute/src/rocprof_compute_profile/profiler_base.py b/projects/rocprofiler-compute/src/rocprof_compute_profile/profiler_base.py index 94236a06ca..d275379b95 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_profile/profiler_base.py +++ b/projects/rocprofiler-compute/src/rocprof_compute_profile/profiler_base.py @@ -358,18 +358,22 @@ class RocProfCompute_Base: input_files.sort() total_runs = len(input_files) - total_profiling_time_so_far = 0 - avg_profiling_time = 0 + total_profiling_time_so_far = 0 + avg_profiling_time = 0 for i, fname in enumerate(input_files): run_number = i + 1 - if i > 0: - avg_profiling_time = total_profiling_time_so_far / i - time_left_seconds = (total_runs - run_number) * avg_profiling_time - time_left = format_time(time_left_seconds) - console_log(f"[Run {run_number}/{total_runs}][Approximate profiling time left: {time_left}]...") - else: - console_log(f"[Run {run_number}/{total_runs}][Approximate profiling time left: pending first measurement...]") + if i > 0: + avg_profiling_time = total_profiling_time_so_far / i + time_left_seconds = (total_runs - run_number) * avg_profiling_time + time_left = format_time(time_left_seconds) + console_log( + f"[Run {run_number}/{total_runs}][Approximate profiling time left: {time_left}]..." + ) + else: + console_log( + f"[Run {run_number}/{total_runs}][Approximate profiling time left: pending first measurement...]" + ) # Kernel filtering (in-place replacement) if not self.__args.kernel == None: @@ -439,7 +443,7 @@ class RocProfCompute_Base: ) else: console_error("Profiler not supported") - total_profiling_time_so_far += actual_profiling_duration + total_profiling_time_so_far += actual_profiling_duration if self.__pc_sampling == True and self.__profiler in ( "rocprofv3", "rocprofiler-sdk", @@ -454,7 +458,7 @@ class RocProfCompute_Base: rocprofiler_sdk_library_path=self.get_args().rocprofiler_sdk_library_path, ) end_run_prof = time.time() - pc_sampling_duration = end_run_prof - start_run_prof + pc_sampling_duration = end_run_prof - start_run_prof console_debug( "The time of pc sampling profiling is {} m {} sec".format( int((end_run_prof - start_run_prof) / 60), diff --git a/projects/rocprofiler-compute/src/rocprof_compute_soc/profile_configs/accum_counters.yaml b/projects/rocprofiler-compute/src/rocprof_compute_soc/profile_configs/accum_counters.yaml index 9c9e9fabcb..649188eb48 100644 --- a/projects/rocprofiler-compute/src/rocprof_compute_soc/profile_configs/accum_counters.yaml +++ b/projects/rocprofiler-compute/src/rocprof_compute_soc/profile_configs/accum_counters.yaml @@ -10,6 +10,7 @@ rocprofiler-sdk: - gfx941 - gfx940 - gfx90a + - gfx950 expression: accumulate(SQ_IFETCH_LEVEL, HIGH_RES) - name: SQ_INST_LEVEL_LDS_ACCUM description: 'SQ_INST_LEVEL_LDS accumulation' @@ -20,6 +21,7 @@ rocprofiler-sdk: - gfx941 - gfx940 - gfx90a + - gfx950 expression: accumulate(SQ_INST_LEVEL_LDS, HIGH_RES) - name: SQ_INST_LEVEL_SMEM_ACCUM description: 'SQ_INST_LEVEL_SMEM accumulation' @@ -30,6 +32,7 @@ rocprofiler-sdk: - gfx941 - gfx940 - gfx90a + - gfx950 expression: accumulate(SQ_INST_LEVEL_SMEM, HIGH_RES) - name: SQ_INST_LEVEL_VMEM_ACCUM description: 'SQ_INST_LEVEL_VMEM accumulation' @@ -40,6 +43,7 @@ rocprofiler-sdk: - gfx941 - gfx940 - gfx90a + - gfx950 expression: accumulate(SQ_INST_LEVEL_VMEM, HIGH_RES) - name: SQ_LEVEL_WAVES_ACCUM description: 'SQ_LEVEL_WAVES accumulation' @@ -50,4 +54,5 @@ rocprofiler-sdk: - gfx941 - gfx940 - gfx90a + - gfx950 expression: accumulate(SQ_LEVEL_WAVES, HIGH_RES) diff --git a/projects/rocprofiler-compute/src/utils/utils.py b/projects/rocprofiler-compute/src/utils/utils.py index d5cc848827..adf34c0ffd 100644 --- a/projects/rocprofiler-compute/src/utils/utils.py +++ b/projects/rocprofiler-compute/src/utils/utils.py @@ -34,12 +34,14 @@ import selectors import shutil import subprocess import sys +import tempfile import time from collections import OrderedDict from itertools import product from pathlib import Path as path import pandas as pd +import yaml import config from utils.logger import ( @@ -789,7 +791,6 @@ def run_prof( console_debug("pmc file: %s" % path(fname).name) - path_counter_config_yaml = path(fname).with_suffix(".yaml") # standard rocprof options if rocprof_cmd == "rocprofiler-sdk": options = profiler_options @@ -805,17 +806,57 @@ def run_prof( else: options = ["-A", "absolute"] + options + new_env = None + + path_counter_config_yaml = path(fname).with_suffix(".yaml") if using_v3() and path_counter_config_yaml.exists(): - if rocprof_cmd == "rocprofiler-sdk": - with open(path_counter_config_yaml, "r") as file: - options["ROCPROF_EXTRA_COUNTERS_CONTENTS"] = file.read() - else: - options = ["-E", str(path_counter_config_yaml)] + options + # Get extra counter definitions + with open(path_counter_config_yaml, "r") as file: + extra_counter_defs = yaml.safe_load(file) + if extra_counter_defs: + # Get default counter definitions path + if rocprof_cmd == "rocprofiler-sdk": + counter_defs_path = ( + path(options["ROCP_TOOL_LIBRARIES"]) + .resolve() + .parent.parent.parent.joinpath( + "share", "rocprofiler-sdk", "counter_defs.yaml" + ) + ) + else: + counter_defs_path = ( + path(shutil.which(rocprof_cmd)) + .resolve() + .parent.parent.joinpath( + "share", "rocprofiler-sdk", "counter_defs.yaml" + ) + ) + # Get default counter definitions + with open(counter_defs_path, "r") as file: + counter_defs = yaml.safe_load(file) + # Merge counter definitions + counter_defs["rocprofiler-sdk"]["counters"].extend( + extra_counter_defs["rocprofiler-sdk"]["counters"] + ) + # Write merged counter definitions to a temporary file + tmp_dir = tempfile.mkdtemp(prefix="rocprof_counter_defs_", dir="/tmp") + tmpfile_path = path(tmp_dir) / "counter_defs.yaml" + with open(tmpfile_path, "w") as tmpfile: + yaml.dump( + counter_defs, tmpfile, default_flow_style=False, sort_keys=False + ) + # Set the environment variable to point to the temporary file + if not new_env: + new_env = os.environ.copy() + new_env["ROCPROFILER_METRICS_PATH"] = str(path(tmp_dir)) + console_debug( + f"Adding env var for extra counters: ROCPROFILER_METRICS_PATH={new_env['ROCPROFILER_METRICS_PATH']}" + ) # set required env var for mi300 - new_env = None if mspec.gpu_model.lower() not in ("mi50", "mi60", "mi210", "mi250", "mi250x"): - new_env = os.environ.copy() + if not new_env: + new_env = os.environ.copy() new_env["ROCPROFILER_INDIVIDUAL_XCC_MODE"] = "1" is_timestamps = False @@ -853,6 +894,10 @@ def run_prof( ) ) + # Delete temporary files + if new_env and "ROCPROFILER_METRICS_PATH" in new_env: + shutil.rmtree(new_env["ROCPROFILER_METRICS_PATH"], ignore_errors=True) + if not success: if loglevel > logging.INFO: for line in output.splitlines(): @@ -1553,6 +1598,7 @@ def convert_metric_id_to_panel_idx(metric_id): else: raise Exception(f"Invalid metric id: {metric_id}") + def format_time(seconds): hours = int(seconds // 3600) minutes = int((seconds % 3600) // 60) @@ -1564,4 +1610,4 @@ def format_time(seconds): parts.append(f"{minutes} minute{'s' if minutes != 1 else ''}") if secs > 0 or not parts: parts.append(f"{secs} second{'s' if secs != 1 else ''}") - return ', '.join(parts[:-1]) + (' and ' if len(parts) > 1 else '') + parts[-1] + return ", ".join(parts[:-1]) + (" and " if len(parts) > 1 else "") + parts[-1]