Add support for MI 100 with rocprofiler-sdk (#768)
* Add custom rocprofiler-sdk counter definitions file for MI 100
* Update CHANGELOG to mention that accumulation counters will not be
collected when profiling on MI 100 using rocprofiler-sdk/rocprofv3
* Migrate accum_counters.yaml to code
[ROCm/rocprofiler-compute commit: a95a45d69a]
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
3a703cec00
Коммит
1d59cbb06d
@@ -81,6 +81,7 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs.
|
||||
|
||||
### Resolved issues
|
||||
|
||||
* Fixed MI 100 counters not being collected when rocprofv3 is used
|
||||
* Fixed option specs-correction
|
||||
* Fixed kernel name and kernel dispatch filtering when using rocprof v3
|
||||
* Fixed not collecting TCC channel counters in rocprof v3
|
||||
@@ -88,7 +89,9 @@ Full documentation for ROCm Compute Profiler is available at [https://rocm.docs.
|
||||
|
||||
### Known issues
|
||||
|
||||
* Profiling on MI 100 will not work unless ROCPROF=rocprofv1 environment variable is explictly provided
|
||||
* On MI 100, accumulation counters will not be collected and the following metrics will not show up in analysis: Instruction Fetch Latency, Wavefront Occupancy, LDS Latency
|
||||
* As a workaround, use ROCPROF=rocprof environement variable, to use rocprofv1 for profiling on MI 100
|
||||
|
||||
* GPU id filtering is not supported when using rocprof v3
|
||||
|
||||
* Analysis of previously collected workload data will not work due to sysinfo.csv schema change
|
||||
|
||||
@@ -33,7 +33,6 @@ from pathlib import Path
|
||||
|
||||
import pandas as pd
|
||||
|
||||
import config
|
||||
from utils.logger import (
|
||||
console_debug,
|
||||
console_error,
|
||||
|
||||
-58
@@ -1,58 +0,0 @@
|
||||
rocprofiler-sdk:
|
||||
counters-schema-version: 1
|
||||
counters:
|
||||
- name: SQ_IFETCH_LEVEL_ACCUM
|
||||
description: 'SQ_IFETCH_LEVEL accumulation'
|
||||
properties: []
|
||||
definitions:
|
||||
- architectures:
|
||||
- gfx942
|
||||
- gfx941
|
||||
- gfx940
|
||||
- gfx90a
|
||||
- gfx950
|
||||
expression: accumulate(SQ_IFETCH_LEVEL, HIGH_RES)
|
||||
- name: SQ_INST_LEVEL_LDS_ACCUM
|
||||
description: 'SQ_INST_LEVEL_LDS accumulation'
|
||||
properties: []
|
||||
definitions:
|
||||
- architectures:
|
||||
- gfx942
|
||||
- gfx941
|
||||
- gfx940
|
||||
- gfx90a
|
||||
- gfx950
|
||||
expression: accumulate(SQ_INST_LEVEL_LDS, HIGH_RES)
|
||||
- name: SQ_INST_LEVEL_SMEM_ACCUM
|
||||
description: 'SQ_INST_LEVEL_SMEM accumulation'
|
||||
properties: []
|
||||
definitions:
|
||||
- architectures:
|
||||
- gfx942
|
||||
- gfx941
|
||||
- gfx940
|
||||
- gfx90a
|
||||
- gfx950
|
||||
expression: accumulate(SQ_INST_LEVEL_SMEM, HIGH_RES)
|
||||
- name: SQ_INST_LEVEL_VMEM_ACCUM
|
||||
description: 'SQ_INST_LEVEL_VMEM accumulation'
|
||||
properties: []
|
||||
definitions:
|
||||
- architectures:
|
||||
- gfx942
|
||||
- gfx941
|
||||
- gfx940
|
||||
- gfx90a
|
||||
- gfx950
|
||||
expression: accumulate(SQ_INST_LEVEL_VMEM, HIGH_RES)
|
||||
- name: SQ_LEVEL_WAVES_ACCUM
|
||||
description: 'SQ_LEVEL_WAVES accumulation'
|
||||
properties: []
|
||||
definitions:
|
||||
- architectures:
|
||||
- gfx942
|
||||
- gfx941
|
||||
- gfx940
|
||||
- gfx90a
|
||||
- gfx950
|
||||
expression: accumulate(SQ_LEVEL_WAVES, HIGH_RES)
|
||||
+2841
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
@@ -47,13 +47,11 @@ from utils.mi_gpu_spec import mi_gpu_specs
|
||||
from utils.parser import build_in_vars, supported_denom
|
||||
from utils.utils import (
|
||||
add_counter_extra_config_input_yaml,
|
||||
add_counter_from_source_to_target_extra_config_input_yaml,
|
||||
capture_subprocess_output,
|
||||
convert_metric_id_to_panel_idx,
|
||||
detect_rocprof,
|
||||
get_base_spi_pipe_counter,
|
||||
get_submodules,
|
||||
is_counter_existed_in_extra_input_yaml,
|
||||
is_spi_pipe_counter,
|
||||
is_tcc_channel_counter,
|
||||
using_v3,
|
||||
@@ -495,6 +493,18 @@ class OmniSoC_Base:
|
||||
if "Name:" in line:
|
||||
counters, _ = self.parse_counters_text(line.split(":")[1].strip())
|
||||
rocprof_counters.update(counters)
|
||||
# Custom counter support for mi100 for rocprofv3
|
||||
if self._mspec.gpu_model.lower() == "mi100":
|
||||
counter_defs_path = (
|
||||
config.rocprof_compute_home
|
||||
/ "rocprof_compute_soc"
|
||||
/ "profile_configs"
|
||||
/ "gfx908_counter_defs.yaml"
|
||||
)
|
||||
with open(counter_defs_path, "r") as fp:
|
||||
counter_defs_contents = fp.read()
|
||||
counters, _ = self.parse_counters_text(counter_defs_contents)
|
||||
rocprof_counters.update(counters)
|
||||
|
||||
elif str(rocprof_cmd) == "rocprofiler-sdk":
|
||||
MAX_STR = 256
|
||||
@@ -556,6 +566,18 @@ class OmniSoC_Base:
|
||||
rocprof_counters.add(
|
||||
ctypes.cast(name_args, ctypes.c_char_p).value.decode("utf-8")
|
||||
)
|
||||
# Custom counter support for mi100 for rocprofiler-sdk
|
||||
if self._mspec.gpu_model.lower() == "mi100":
|
||||
counter_defs_path = (
|
||||
config.rocprof_compute_home
|
||||
/ "rocprof_compute_soc"
|
||||
/ "profile_configs"
|
||||
/ "gfx908_counter_defs.yaml"
|
||||
)
|
||||
with open(counter_defs_path, "r") as fp:
|
||||
counter_defs_contents = fp.read()
|
||||
counters, _ = self.parse_counters_text(counter_defs_contents)
|
||||
rocprof_counters.update(counters)
|
||||
|
||||
else:
|
||||
console_error(
|
||||
@@ -750,18 +772,6 @@ class OmniSoC_Base:
|
||||
|
||||
else:
|
||||
# Output to files
|
||||
with open(
|
||||
str(
|
||||
Path(config.rocprof_compute_home).joinpath(
|
||||
"rocprof_compute_soc",
|
||||
"profile_configs",
|
||||
"accum_counters.yaml",
|
||||
)
|
||||
),
|
||||
"r",
|
||||
) as fp:
|
||||
accum_counters_def = yaml.safe_load(fp)
|
||||
|
||||
for f in output_files:
|
||||
file_name_txt = str(Path(workload_perfmon_dir).joinpath(f.file_name_txt))
|
||||
file_name_yaml = str(
|
||||
@@ -777,16 +787,49 @@ class OmniSoC_Base:
|
||||
]:
|
||||
pmc.append(ctr)
|
||||
if using_v3():
|
||||
if is_counter_existed_in_extra_input_yaml(
|
||||
accum_counters_def, ctr
|
||||
) and not is_counter_existed_in_extra_input_yaml(
|
||||
counter_def, ctr
|
||||
):
|
||||
counter_def = (
|
||||
add_counter_from_source_to_target_extra_config_input_yaml(
|
||||
accum_counters_def, counter_def, ctr
|
||||
# MI 100 accumulate counters dont work with rocprofiler sdk
|
||||
if self._mspec.gpu_model.lower() != "mi100":
|
||||
# Add accumulation counters definitions
|
||||
if ctr == "SQ_IFETCH_LEVEL":
|
||||
counter_def = add_counter_extra_config_input_yaml(
|
||||
counter_def,
|
||||
"SQ_IFETCH_LEVEL_ACCUM",
|
||||
"SQ_IFETCH_LEVEL accumulation",
|
||||
"accumulate(SQ_IFETCH_LEVEL, HIGH_RES)",
|
||||
[self.__arch],
|
||||
)
|
||||
elif ctr == "SQ_INST_LEVEL_LDS":
|
||||
counter_def = add_counter_extra_config_input_yaml(
|
||||
counter_def,
|
||||
"SQ_INST_LEVEL_LDS_ACCUM",
|
||||
"SQ_INST_LEVEL_LDS accumulation",
|
||||
"accumulate(SQ_INST_LEVEL_LDS, HIGH_RES)",
|
||||
[self.__arch],
|
||||
)
|
||||
elif ctr == "SQ_INST_LEVEL_SMEM":
|
||||
counter_def = add_counter_extra_config_input_yaml(
|
||||
counter_def,
|
||||
"SQ_INST_LEVEL_SMEM_ACCUM",
|
||||
"SQ_INST_LEVEL_SMEM accumulation",
|
||||
"accumulate(SQ_INST_LEVEL_SMEM, HIGH_RES)",
|
||||
[self.__arch],
|
||||
)
|
||||
elif ctr == "SQ_INST_LEVEL_VMEM":
|
||||
counter_def = add_counter_extra_config_input_yaml(
|
||||
counter_def,
|
||||
"SQ_INST_LEVEL_VMEM_ACCUM",
|
||||
"SQ_INST_LEVEL_VMEM accumulation",
|
||||
"accumulate(SQ_INST_LEVEL_VMEM, HIGH_RES)",
|
||||
[self.__arch],
|
||||
)
|
||||
elif ctr == "SQ_LEVEL_WAVES":
|
||||
counter_def = add_counter_extra_config_input_yaml(
|
||||
counter_def,
|
||||
"SQ_LEVEL_WAVES_ACCUM",
|
||||
"SQ_LEVEL_WAVES accumulation",
|
||||
"accumulate(SQ_LEVEL_WAVES, HIGH_RES)",
|
||||
[self.__arch],
|
||||
)
|
||||
)
|
||||
# Add TCC channel counters definitions
|
||||
if is_tcc_channel_counter(ctr):
|
||||
counter_name = ctr.split("[")[0]
|
||||
@@ -813,10 +856,9 @@ class OmniSoC_Base:
|
||||
fd.close()
|
||||
|
||||
# Write counter definitions to file
|
||||
if using_v3():
|
||||
if counter_def:
|
||||
with open(file_name_yaml, "w") as fp:
|
||||
if counter_def:
|
||||
fp.write(yaml.dump(counter_def, sort_keys=False))
|
||||
fp.write(yaml.dump(counter_def, sort_keys=False))
|
||||
|
||||
# Add a timestamp file
|
||||
# TODO: Does v3 need this?
|
||||
|
||||
@@ -34,7 +34,7 @@ class gfx908_soc(OmniSoC_Base):
|
||||
def __init__(self, args, mspec):
|
||||
super().__init__(args, mspec)
|
||||
self.set_arch("gfx908")
|
||||
self.set_compatible_profilers(["rocprofv1"])
|
||||
self.set_compatible_profilers(["rocprofv1", "rocprofv3", "rocprofiler-sdk"])
|
||||
# Per IP block max number of simultaneous counters. GFX IP Blocks
|
||||
self.set_perfmon_config(mi_gpu_specs.get_perfmon_config("gfx908"))
|
||||
|
||||
|
||||
@@ -62,21 +62,6 @@ def is_tcc_channel_counter(counter):
|
||||
return counter.startswith("TCC") and counter.endswith("]")
|
||||
|
||||
|
||||
def is_counter_existed_in_extra_input_yaml(data: dict, counter_name: str) -> bool:
|
||||
"""
|
||||
Check if a counter with the given name exists in the rocprofiler-sdk counters.
|
||||
|
||||
Args:
|
||||
data (dict): The loaded YAML dictionary.
|
||||
counter_name (str): The name of the counter to check.
|
||||
|
||||
Returns:
|
||||
bool: True if the counter exists, False otherwise.
|
||||
"""
|
||||
counters = data.get("rocprofiler-sdk", {}).get("counters", [])
|
||||
return any(counter.get("name") == counter_name for counter in counters)
|
||||
|
||||
|
||||
def add_counter_extra_config_input_yaml(
|
||||
data: dict,
|
||||
counter_name: str,
|
||||
@@ -172,46 +157,6 @@ def extract_counter_info_extra_config_input_yaml(
|
||||
return None
|
||||
|
||||
|
||||
def add_counter_from_source_to_target_extra_config_input_yaml(
|
||||
source_data: dict, target_data: dict, counter_name: str
|
||||
) -> dict:
|
||||
"""
|
||||
Check if counter_name exists in source_data, and if yes, add it to target_data.
|
||||
|
||||
Args:
|
||||
source_data (dict): Source YAML dictionary to extract from.
|
||||
target_data (dict): Target YAML dictionary to add to.
|
||||
counter_name (str): Name of the counter to copy.
|
||||
|
||||
Returns:
|
||||
dict: Updated target_data dictionary.
|
||||
"""
|
||||
counter = extract_counter_info_extra_config_input_yaml(source_data, counter_name)
|
||||
if not counter:
|
||||
raise ValueError(f"Counter '{counter_name}' not found in source data")
|
||||
|
||||
# Extract required info
|
||||
name = counter.get("name")
|
||||
description = counter.get("description", "")
|
||||
properties = counter.get("properties", [])
|
||||
definitions = counter.get("definitions", [])
|
||||
|
||||
if not definitions:
|
||||
raise ValueError(f"Counter '{counter_name}' has no definitions")
|
||||
|
||||
architectures = definitions[0].get("architectures", [])
|
||||
expression = definitions[0].get("expression", "")
|
||||
|
||||
return add_counter_extra_config_input_yaml(
|
||||
target_data,
|
||||
counter_name=name,
|
||||
description=description,
|
||||
expression=expression,
|
||||
architectures=architectures,
|
||||
properties=properties,
|
||||
)
|
||||
|
||||
|
||||
def is_spi_pipe_counter(counter):
|
||||
for pattern in spi_pipe_counter_regexs:
|
||||
if re.match(pattern, counter):
|
||||
@@ -806,57 +751,66 @@ def run_prof(
|
||||
else:
|
||||
options = ["-A", "absolute"] + options
|
||||
|
||||
new_env = None
|
||||
new_env = os.environ.copy()
|
||||
|
||||
path_counter_config_yaml = path(fname).with_suffix(".yaml")
|
||||
if using_v3() and path_counter_config_yaml.exists():
|
||||
if using_v3():
|
||||
# Default counter definitions
|
||||
if rocprof_cmd == "rocprofiler-sdk":
|
||||
counter_defs_path = (
|
||||
path(options["ROCP_TOOL_LIBRARIES"])
|
||||
.resolve()
|
||||
.parent.parent.parent.joinpath(
|
||||
"share", "rocprofiler-sdk", "counter_defs.yaml"
|
||||
)
|
||||
)
|
||||
else:
|
||||
counter_defs_path = (
|
||||
path(shutil.which(rocprof_cmd))
|
||||
.resolve()
|
||||
.parent.parent.joinpath("share", "rocprofiler-sdk", "counter_defs.yaml")
|
||||
)
|
||||
# Custom counter definitions for MI 100
|
||||
if mspec.gpu_model.lower() == "mi100":
|
||||
counter_defs_path = (
|
||||
config.rocprof_compute_home
|
||||
/ "rocprof_compute_soc"
|
||||
/ "profile_configs"
|
||||
/ "gfx908_counter_defs.yaml"
|
||||
)
|
||||
# Read counter definitions
|
||||
with open(counter_defs_path, "r") as file:
|
||||
counter_defs = yaml.safe_load(file)
|
||||
# Get extra counter definitions
|
||||
with open(path_counter_config_yaml, "r") as file:
|
||||
extra_counter_defs = yaml.safe_load(file)
|
||||
if extra_counter_defs:
|
||||
# Get default counter definitions path
|
||||
if rocprof_cmd == "rocprofiler-sdk":
|
||||
counter_defs_path = (
|
||||
path(options["ROCP_TOOL_LIBRARIES"])
|
||||
.resolve()
|
||||
.parent.parent.parent.joinpath(
|
||||
"share", "rocprofiler-sdk", "counter_defs.yaml"
|
||||
)
|
||||
)
|
||||
else:
|
||||
counter_defs_path = (
|
||||
path(shutil.which(rocprof_cmd))
|
||||
.resolve()
|
||||
.parent.parent.joinpath(
|
||||
"share", "rocprofiler-sdk", "counter_defs.yaml"
|
||||
)
|
||||
)
|
||||
# Get default counter definitions
|
||||
with open(counter_defs_path, "r") as file:
|
||||
counter_defs = yaml.safe_load(file)
|
||||
# Merge counter definitions
|
||||
path_counter_config_yaml = path(fname).with_suffix(".yaml")
|
||||
if path_counter_config_yaml.exists():
|
||||
with open(path_counter_config_yaml, "r") as file:
|
||||
extra_counter_defs = yaml.safe_load(file)
|
||||
# Merge extra counter definitions
|
||||
counter_defs["rocprofiler-sdk"]["counters"].extend(
|
||||
extra_counter_defs["rocprofiler-sdk"]["counters"]
|
||||
)
|
||||
# Write merged counter definitions to a temporary file
|
||||
tmp_dir = tempfile.mkdtemp(prefix="rocprof_counter_defs_", dir="/tmp")
|
||||
tmpfile_path = path(tmp_dir) / "counter_defs.yaml"
|
||||
with open(tmpfile_path, "w") as tmpfile:
|
||||
yaml.dump(
|
||||
counter_defs, tmpfile, default_flow_style=False, sort_keys=False
|
||||
)
|
||||
# Set the environment variable to point to the temporary file
|
||||
if not new_env:
|
||||
new_env = os.environ.copy()
|
||||
new_env["ROCPROFILER_METRICS_PATH"] = str(path(tmp_dir))
|
||||
console_debug(
|
||||
f"Adding env var for extra counters: ROCPROFILER_METRICS_PATH={new_env['ROCPROFILER_METRICS_PATH']}"
|
||||
)
|
||||
# Write counter definitions to a temporary file
|
||||
tmpfile_path = (
|
||||
path(tempfile.mkdtemp(prefix="rocprof_counter_defs_", dir="/tmp"))
|
||||
/ "counter_defs.yaml"
|
||||
)
|
||||
with open(tmpfile_path, "w") as tmpfile:
|
||||
yaml.dump(counter_defs, tmpfile, default_flow_style=False, sort_keys=False)
|
||||
# Set rocprofiler sdk counter definitions
|
||||
new_env["ROCPROFILER_METRICS_PATH"] = str(tmpfile_path.parent)
|
||||
console_debug(
|
||||
f"Adding env var for counter definitions: ROCPROFILER_METRICS_PATH={new_env['ROCPROFILER_METRICS_PATH']}"
|
||||
)
|
||||
|
||||
# set required env var for mi300
|
||||
if mspec.gpu_model.lower() not in ("mi50", "mi60", "mi210", "mi250", "mi250x"):
|
||||
if not new_env:
|
||||
new_env = os.environ.copy()
|
||||
# set required env var for >= mi300
|
||||
if mspec.gpu_model.lower() not in (
|
||||
"mi50",
|
||||
"mi60",
|
||||
"mi100",
|
||||
"mi210",
|
||||
"mi250",
|
||||
"mi250x",
|
||||
):
|
||||
new_env["ROCPROFILER_INDIVIDUAL_XCC_MODE"] = "1"
|
||||
|
||||
is_timestamps = False
|
||||
@@ -866,8 +820,6 @@ def run_prof(
|
||||
|
||||
if rocprof_cmd == "rocprofiler-sdk":
|
||||
app_cmd = options.pop("APP_CMD")
|
||||
if not new_env:
|
||||
new_env = os.environ.copy()
|
||||
for key, value in options.items():
|
||||
new_env[key] = value
|
||||
console_debug("rocprof sdk env vars: {}".format(new_env))
|
||||
@@ -878,14 +830,9 @@ def run_prof(
|
||||
else:
|
||||
console_debug("rocprof command: {}".format([rocprof_cmd] + options))
|
||||
# profile the app
|
||||
if new_env:
|
||||
success, output = capture_subprocess_output(
|
||||
[rocprof_cmd] + options, new_env=new_env, profileMode=True
|
||||
)
|
||||
else:
|
||||
success, output = capture_subprocess_output(
|
||||
[rocprof_cmd] + options, profileMode=True
|
||||
)
|
||||
success, output = capture_subprocess_output(
|
||||
[rocprof_cmd] + options, new_env=new_env, profileMode=True
|
||||
)
|
||||
|
||||
time_2 = time.time()
|
||||
console_debug(
|
||||
@@ -894,8 +841,8 @@ def run_prof(
|
||||
)
|
||||
)
|
||||
|
||||
# Delete temporary files
|
||||
if new_env and "ROCPROFILER_METRICS_PATH" in new_env:
|
||||
# Delete counter definition temporary directory
|
||||
if new_env.get("ROCPROFILER_METRICS_PATH"):
|
||||
shutil.rmtree(new_env["ROCPROFILER_METRICS_PATH"], ignore_errors=True)
|
||||
|
||||
if not success:
|
||||
@@ -959,7 +906,7 @@ def run_prof(
|
||||
workload_dir + "/out/pmc_1/results_" + fbase + ".csv", index=False
|
||||
)
|
||||
|
||||
if new_env and not using_v3() and not using_v1():
|
||||
if not using_v3() and not using_v1():
|
||||
# flatten tcc for applicable mi300 input
|
||||
f = path(workload_dir + "/out/pmc_1/results_" + fbase + ".csv")
|
||||
xcds = mi_gpu_specs.get_num_xcds(
|
||||
|
||||
Ссылка в новой задаче
Block a user