Fix counter collection for MI 350 (#764)
* Fix TCC counter collection bug
* Fix accumulate counter collection
* Fix formatting
[ROCm/rocprofiler-compute commit: 5950a02ff6]
Este commit está contenido en:
@@ -358,18 +358,22 @@ class RocProfCompute_Base:
|
||||
input_files.sort()
|
||||
|
||||
total_runs = len(input_files)
|
||||
total_profiling_time_so_far = 0
|
||||
avg_profiling_time = 0
|
||||
total_profiling_time_so_far = 0
|
||||
avg_profiling_time = 0
|
||||
|
||||
for i, fname in enumerate(input_files):
|
||||
run_number = i + 1
|
||||
if i > 0:
|
||||
avg_profiling_time = total_profiling_time_so_far / i
|
||||
time_left_seconds = (total_runs - run_number) * avg_profiling_time
|
||||
time_left = format_time(time_left_seconds)
|
||||
console_log(f"[Run {run_number}/{total_runs}][Approximate profiling time left: {time_left}]...")
|
||||
else:
|
||||
console_log(f"[Run {run_number}/{total_runs}][Approximate profiling time left: pending first measurement...]")
|
||||
if i > 0:
|
||||
avg_profiling_time = total_profiling_time_so_far / i
|
||||
time_left_seconds = (total_runs - run_number) * avg_profiling_time
|
||||
time_left = format_time(time_left_seconds)
|
||||
console_log(
|
||||
f"[Run {run_number}/{total_runs}][Approximate profiling time left: {time_left}]..."
|
||||
)
|
||||
else:
|
||||
console_log(
|
||||
f"[Run {run_number}/{total_runs}][Approximate profiling time left: pending first measurement...]"
|
||||
)
|
||||
|
||||
# Kernel filtering (in-place replacement)
|
||||
if not self.__args.kernel == None:
|
||||
@@ -439,7 +443,7 @@ class RocProfCompute_Base:
|
||||
)
|
||||
else:
|
||||
console_error("Profiler not supported")
|
||||
total_profiling_time_so_far += actual_profiling_duration
|
||||
total_profiling_time_so_far += actual_profiling_duration
|
||||
if self.__pc_sampling == True and self.__profiler in (
|
||||
"rocprofv3",
|
||||
"rocprofiler-sdk",
|
||||
@@ -454,7 +458,7 @@ class RocProfCompute_Base:
|
||||
rocprofiler_sdk_library_path=self.get_args().rocprofiler_sdk_library_path,
|
||||
)
|
||||
end_run_prof = time.time()
|
||||
pc_sampling_duration = end_run_prof - start_run_prof
|
||||
pc_sampling_duration = end_run_prof - start_run_prof
|
||||
console_debug(
|
||||
"The time of pc sampling profiling is {} m {} sec".format(
|
||||
int((end_run_prof - start_run_prof) / 60),
|
||||
|
||||
+5
@@ -10,6 +10,7 @@ rocprofiler-sdk:
|
||||
- gfx941
|
||||
- gfx940
|
||||
- gfx90a
|
||||
- gfx950
|
||||
expression: accumulate(SQ_IFETCH_LEVEL, HIGH_RES)
|
||||
- name: SQ_INST_LEVEL_LDS_ACCUM
|
||||
description: 'SQ_INST_LEVEL_LDS accumulation'
|
||||
@@ -20,6 +21,7 @@ rocprofiler-sdk:
|
||||
- gfx941
|
||||
- gfx940
|
||||
- gfx90a
|
||||
- gfx950
|
||||
expression: accumulate(SQ_INST_LEVEL_LDS, HIGH_RES)
|
||||
- name: SQ_INST_LEVEL_SMEM_ACCUM
|
||||
description: 'SQ_INST_LEVEL_SMEM accumulation'
|
||||
@@ -30,6 +32,7 @@ rocprofiler-sdk:
|
||||
- gfx941
|
||||
- gfx940
|
||||
- gfx90a
|
||||
- gfx950
|
||||
expression: accumulate(SQ_INST_LEVEL_SMEM, HIGH_RES)
|
||||
- name: SQ_INST_LEVEL_VMEM_ACCUM
|
||||
description: 'SQ_INST_LEVEL_VMEM accumulation'
|
||||
@@ -40,6 +43,7 @@ rocprofiler-sdk:
|
||||
- gfx941
|
||||
- gfx940
|
||||
- gfx90a
|
||||
- gfx950
|
||||
expression: accumulate(SQ_INST_LEVEL_VMEM, HIGH_RES)
|
||||
- name: SQ_LEVEL_WAVES_ACCUM
|
||||
description: 'SQ_LEVEL_WAVES accumulation'
|
||||
@@ -50,4 +54,5 @@ rocprofiler-sdk:
|
||||
- gfx941
|
||||
- gfx940
|
||||
- gfx90a
|
||||
- gfx950
|
||||
expression: accumulate(SQ_LEVEL_WAVES, HIGH_RES)
|
||||
|
||||
@@ -34,12 +34,14 @@ import selectors
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
from collections import OrderedDict
|
||||
from itertools import product
|
||||
from pathlib import Path as path
|
||||
|
||||
import pandas as pd
|
||||
import yaml
|
||||
|
||||
import config
|
||||
from utils.logger import (
|
||||
@@ -789,7 +791,6 @@ def run_prof(
|
||||
|
||||
console_debug("pmc file: %s" % path(fname).name)
|
||||
|
||||
path_counter_config_yaml = path(fname).with_suffix(".yaml")
|
||||
# standard rocprof options
|
||||
if rocprof_cmd == "rocprofiler-sdk":
|
||||
options = profiler_options
|
||||
@@ -805,17 +806,57 @@ def run_prof(
|
||||
else:
|
||||
options = ["-A", "absolute"] + options
|
||||
|
||||
new_env = None
|
||||
|
||||
path_counter_config_yaml = path(fname).with_suffix(".yaml")
|
||||
if using_v3() and path_counter_config_yaml.exists():
|
||||
if rocprof_cmd == "rocprofiler-sdk":
|
||||
with open(path_counter_config_yaml, "r") as file:
|
||||
options["ROCPROF_EXTRA_COUNTERS_CONTENTS"] = file.read()
|
||||
else:
|
||||
options = ["-E", str(path_counter_config_yaml)] + options
|
||||
# Get extra counter definitions
|
||||
with open(path_counter_config_yaml, "r") as file:
|
||||
extra_counter_defs = yaml.safe_load(file)
|
||||
if extra_counter_defs:
|
||||
# Get default counter definitions path
|
||||
if rocprof_cmd == "rocprofiler-sdk":
|
||||
counter_defs_path = (
|
||||
path(options["ROCP_TOOL_LIBRARIES"])
|
||||
.resolve()
|
||||
.parent.parent.parent.joinpath(
|
||||
"share", "rocprofiler-sdk", "counter_defs.yaml"
|
||||
)
|
||||
)
|
||||
else:
|
||||
counter_defs_path = (
|
||||
path(shutil.which(rocprof_cmd))
|
||||
.resolve()
|
||||
.parent.parent.joinpath(
|
||||
"share", "rocprofiler-sdk", "counter_defs.yaml"
|
||||
)
|
||||
)
|
||||
# Get default counter definitions
|
||||
with open(counter_defs_path, "r") as file:
|
||||
counter_defs = yaml.safe_load(file)
|
||||
# Merge counter definitions
|
||||
counter_defs["rocprofiler-sdk"]["counters"].extend(
|
||||
extra_counter_defs["rocprofiler-sdk"]["counters"]
|
||||
)
|
||||
# Write merged counter definitions to a temporary file
|
||||
tmp_dir = tempfile.mkdtemp(prefix="rocprof_counter_defs_", dir="/tmp")
|
||||
tmpfile_path = path(tmp_dir) / "counter_defs.yaml"
|
||||
with open(tmpfile_path, "w") as tmpfile:
|
||||
yaml.dump(
|
||||
counter_defs, tmpfile, default_flow_style=False, sort_keys=False
|
||||
)
|
||||
# Set the environment variable to point to the temporary file
|
||||
if not new_env:
|
||||
new_env = os.environ.copy()
|
||||
new_env["ROCPROFILER_METRICS_PATH"] = str(path(tmp_dir))
|
||||
console_debug(
|
||||
f"Adding env var for extra counters: ROCPROFILER_METRICS_PATH={new_env['ROCPROFILER_METRICS_PATH']}"
|
||||
)
|
||||
|
||||
# set required env var for mi300
|
||||
new_env = None
|
||||
if mspec.gpu_model.lower() not in ("mi50", "mi60", "mi210", "mi250", "mi250x"):
|
||||
new_env = os.environ.copy()
|
||||
if not new_env:
|
||||
new_env = os.environ.copy()
|
||||
new_env["ROCPROFILER_INDIVIDUAL_XCC_MODE"] = "1"
|
||||
|
||||
is_timestamps = False
|
||||
@@ -853,6 +894,10 @@ def run_prof(
|
||||
)
|
||||
)
|
||||
|
||||
# Delete temporary files
|
||||
if new_env and "ROCPROFILER_METRICS_PATH" in new_env:
|
||||
shutil.rmtree(new_env["ROCPROFILER_METRICS_PATH"], ignore_errors=True)
|
||||
|
||||
if not success:
|
||||
if loglevel > logging.INFO:
|
||||
for line in output.splitlines():
|
||||
@@ -1553,6 +1598,7 @@ def convert_metric_id_to_panel_idx(metric_id):
|
||||
else:
|
||||
raise Exception(f"Invalid metric id: {metric_id}")
|
||||
|
||||
|
||||
def format_time(seconds):
|
||||
hours = int(seconds // 3600)
|
||||
minutes = int((seconds % 3600) // 60)
|
||||
@@ -1564,4 +1610,4 @@ def format_time(seconds):
|
||||
parts.append(f"{minutes} minute{'s' if minutes != 1 else ''}")
|
||||
if secs > 0 or not parts:
|
||||
parts.append(f"{secs} second{'s' if secs != 1 else ''}")
|
||||
return ', '.join(parts[:-1]) + (' and ' if len(parts) > 1 else '') + parts[-1]
|
||||
return ", ".join(parts[:-1]) + (" and " if len(parts) > 1 else "") + parts[-1]
|
||||
|
||||
Referencia en una nueva incidencia
Block a user