Enable single pass counter collection (#833)

[ROCm/rocprofiler-compute commit: 6a77d241ed]
This commit is contained in:
xuchen-amd
2025-08-06 10:35:05 -04:00
کامیت شده توسط GitHub
والد 5084aad00b
کامیت 34dd26fb07
18فایلهای تغییر یافته به همراه778 افزوده شده و 56 حذف شده
@@ -254,6 +254,14 @@ add_test(
${PROJECT_SOURCE_DIR}/tests/test_profile_general.py
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
add_test(
NAME test_profile_sets_func
COMMAND
${Python3_EXECUTABLE} -m pytest -m sets_func
--junitxml=tests/test_profile_sets_func.xml ${COV_OPTION}
${PROJECT_SOURCE_DIR}/tests/test_profile_general.py
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
set_tests_properties(
test_profile_kernel_execution test_profile_dispatch test_profile_mem test_profile_join
test_profile_sort test_profile_misc PROPERTIES LABELS "profile" RESOURCE_GROUPS
@@ -41,35 +41,37 @@ addopts = [
]
pythonpath = [
".",
"src",
"src/rocprof_compute_soc",
"src/utils",
"src/rocprof_compute_analyze/utils",
"tests"
]
".",
"src",
"src/rocprof_compute_soc",
"src/utils",
"src/rocprof_compute_analyze/utils",
"tests"
]
markers = [
"section",
"kernel_execution",
"misc",
"mem",
"sort",
"join",
"verbosity",
"dispatch",
"list_metrics",
"filter_block",
"filter_kernel",
"dispatch",
"normal_unit",
"max_stat",
"time_unit",
"decimal",
"col",
"kernel_verbose",
"serial",
"L1_cache",
"num_xcds_spec_class",
"num_xcds_cli_output",
"section",
"kernel_execution",
"misc",
"mem",
"sort",
"join",
"verbosity",
"dispatch",
"list_metrics",
"filter_block",
"filter_kernel",
"dispatch",
"normal_unit",
"max_stat",
"time_unit",
"decimal",
"col",
"kernel_verbose",
"serial",
"L1_cache",
"num_xcds_spec_class",
"num_xcds_cli_output",
"sets_func",
"sets_perf"
]
@@ -27,12 +27,11 @@
import argparse
import os
import re
import shutil
from pathlib import Path
def print_avail_arch(avail_arch: list):
ret_str = "\t\tList all available metrics for analysis on specified arch:"
ret_str = "\t\t\tList all available metrics for analysis on specified arch:"
for arch in avail_arch:
ret_str += "\n\t\t\t {}".format(arch)
return ret_str
@@ -207,6 +206,19 @@ Examples:
choices=[""] + list(supported_archs.keys()), # ["gfx908", "gfx90a"],
help=print_avail_arch(supported_archs.keys()),
)
profile_group.add_argument(
"--list-sets",
action="store_true",
help="\t\t\tDisplay available metric sets and their descriptions",
)
profile_group.add_argument(
"--set",
default=None,
dest="set_selected",
help="\t\t\tProfile a set of metrics of topic of interest by collecting counters in a single pass.\n\t\t\tFor available sets, see --list-sets",
)
profile_group.add_argument(
"--config-dir",
dest="config_dir",
@@ -27,7 +27,6 @@
import argparse
import importlib
import os
import shutil
import socket
import sys
import time
@@ -55,6 +54,7 @@ from utils.utils import (
get_submodules,
get_version,
get_version_display,
parse_sets_yaml,
set_locale_encoding,
)
@@ -243,6 +243,53 @@ class RocProfCompute:
else:
console_error("Unsupported arch")
@demarcate
def list_sets(self):
sets_info = parse_sets_yaml(self.__mspec.gpu_arch)
if not sets_info:
console_error("No sets configuration found.")
print("\nAvailable Sets:")
print("=" * 115)
# Print header
print(
f"{'Set Option':<35} {'Set Title':<35} {'Metric Name':<30} {'Metric ID':<10}"
)
print("-" * 115)
# Print data grouped by set
for set_option, set_data in sets_info.items():
title = set_data.get("title", set_option)
metrics = set_data.get("metric", [])
first_row = True
for metric in metrics:
if isinstance(metric, dict) and metric:
metric_id = next(iter(metric.keys()))
metric_name = next(iter(metric.values()))
# Only show set info on first row of each set
set_display = set_option if first_row else ""
title_display = title if first_row else ""
print(
f"{set_display:<35} {title_display:<35} {metric_name:<30} {metric_id:<10}"
)
first_row = False
# Empty line between sets
print()
print("Usage Examples:")
if sets_info:
first_set = next(iter(sets_info.keys()))
print(f" rocprof-compute profile --set {first_set} # Profile this set")
print(f" rocprof-compute profile --list-sets # Show this help")
print()
sys.exit(0)
@demarcate
def run_profiler(self):
self.print_graphic()
@@ -250,6 +297,8 @@ class RocProfCompute:
if self.__args.list_metrics is not None:
self.list_metrics()
elif self.__args.list_sets:
self.list_sets()
elif self.__args.name is None:
sys.exit("Either --list-name or --name is required")
@@ -26,13 +26,12 @@
import csv
import glob
import logging
import os
import re
import shlex
import shutil
import time
from abc import ABC, abstractmethod
from abc import abstractmethod
from pathlib import Path
import pandas as pd
@@ -350,6 +349,10 @@ class RocProfCompute_Base:
console_log("Command: " + str(self.__args.remaining))
console_log("Kernel Selection: " + str(self.__args.kernel))
console_log("Dispatch Selection: " + str(self.__args.dispatch))
if self.get_args().set_selected:
console_log("Set Selection: " + str(self.__args.set_selected))
if self.get_args().filter_blocks is None:
console_log("Report Sections: All")
else:
@@ -0,0 +1,37 @@
# AUTOGENERATED FILE. Only edit for testing purposes, not for development. Generated from utils/unified_sets.yaml. Generated by utils/split_config.py
sets:
- title: Compute Throughput Utilization
set_option: compute_thruput_util
description: Placeholder
metric:
- 11.2.2: SALU Utilization
- 11.2.3: VALU Utilization
- title: Compute Throughput FLOPS
set_option: compute_thruput_flops
description: Placeholder
metric:
- 2.1.2: MFMA FLOPs (BF16)
- 2.1.3: MFMA FLOPs (F16)
- 2.1.4: MFMA FLOPs (F32)
- 2.1.5: MFMA FLOPs (F64)
- 2.1.6: MFMA IOPs (Int8)
- title: Memory Throughput
set_option: mem_thruput
description: Placeholder
metric:
- 2.1.16: Theoretical LDS Bandwidth
- 2.1.17: LDS Bank Conflicts/Access
- 16.1.2: Utilization
- 17.1.0: Utilization
- title: Launch Stats
set_option: launch_stats
description: Placeholder
metric:
- 7.1.0: Grid Size
- 7.1.1: Workgroup Size
- 7.1.2: Total Wavefronts
- 7.1.5: VGPRs
- 7.1.6: AGPRs
- 7.1.7: SGPRs
- 7.1.8: LDS Allocation
- 7.1.9: Scratch Allocation
@@ -0,0 +1,39 @@
# AUTOGENERATED FILE. Only edit for testing purposes, not for development. Generated from utils/unified_sets.yaml. Generated by utils/split_config.py
sets:
- title: Compute Throughput Utilization
set_option: compute_thruput_util
description: Placeholder
metric:
- 11.2.3: VALU Utilization
- 11.2.4: VMEM Utilization
- 11.2.5: Branch Utilization
- 11.2.6: VALU Active Threads
- title: Compute Throughput FLOPS
set_option: compute_thruput_flops
description: Placeholder
metric:
- 2.1.2: MFMA FLOPs (BF16)
- 2.1.3: MFMA FLOPs (F16)
- 2.1.4: MFMA FLOPs (F32)
- 2.1.5: MFMA FLOPs (F64)
- 2.1.6: MFMA IOPs (Int8)
- title: Memory Throughput
set_option: mem_thruput
description: Placeholder
metric:
- 2.1.16: Theoretical LDS Bandwidth
- 2.1.17: LDS Bank Conflicts/Access
- 16.1.2: Utilization
- 17.1.0: Utilization
- title: Launch Stats
set_option: launch_stats
description: Placeholder
metric:
- 7.1.0: Grid Size
- 7.1.1: Workgroup Size
- 7.1.2: Total Wavefronts
- 7.1.5: VGPRs
- 7.1.6: AGPRs
- 7.1.7: SGPRs
- 7.1.8: LDS Allocation
- 7.1.9: Scratch Allocation
@@ -0,0 +1,40 @@
# AUTOGENERATED FILE. Only edit for testing purposes, not for development. Generated from utils/unified_sets.yaml. Generated by utils/split_config.py
sets:
- title: Compute Throughput Utilization
set_option: compute_thruput_util
description: Placeholder
metric:
- 11.2.2: SALU Utilization
- 11.2.3: VALU Utilization
- 11.2.4: VMEM Utilization
- 11.2.5: Branch Utilization
- title: Compute Throughput FLOPS
set_option: compute_thruput_flops
description: Placeholder
metric:
- 2.1.2: MFMA FLOPs (F8)
- 2.1.3: MFMA FLOPs (BF16)
- 2.1.4: MFMA FLOPs (F16)
- 2.1.5: MFMA FLOPs (F32)
- 2.1.6: MFMA FLOPs (F64)
- 2.1.7: MFMA IOPs (Int8)
- title: Memory Throughput
set_option: mem_thruput
description: Placeholder
metric:
- 2.1.17: Theoretical LDS Bandwidth
- 2.1.18: LDS Bank Conflicts/Access
- 16.1.2: Utilization
- 17.1.0: Utilization
- title: Launch Stats
set_option: launch_stats
description: Placeholder
metric:
- 7.1.0: Grid Size
- 7.1.1: Workgroup Size
- 7.1.2: Total Wavefronts
- 7.1.5: VGPRs
- 7.1.6: AGPRs
- 7.1.7: SGPRs
- 7.1.8: LDS Allocation
- 7.1.9: Scratch Allocation
@@ -0,0 +1,40 @@
# AUTOGENERATED FILE. Only edit for testing purposes, not for development. Generated from utils/unified_sets.yaml. Generated by utils/split_config.py
sets:
- title: Compute Throughput Utilization
set_option: compute_thruput_util
description: Placeholder
metric:
- 11.2.2: SALU Utilization
- 11.2.3: VALU Utilization
- 11.2.4: VMEM Utilization
- 11.2.5: Branch Utilization
- title: Compute Throughput FLOPS
set_option: compute_thruput_flops
description: Placeholder
metric:
- 2.1.2: MFMA FLOPs (F8)
- 2.1.3: MFMA FLOPs (BF16)
- 2.1.4: MFMA FLOPs (F16)
- 2.1.5: MFMA FLOPs (F32)
- 2.1.6: MFMA FLOPs (F64)
- 2.1.7: MFMA IOPs (Int8)
- title: Memory Throughput
set_option: mem_thruput
description: Placeholder
metric:
- 2.1.17: Theoretical LDS Bandwidth
- 2.1.18: LDS Bank Conflicts/Access
- 16.1.2: Utilization
- 17.1.0: Utilization
- title: Launch Stats
set_option: launch_stats
description: Placeholder
metric:
- 7.1.0: Grid Size
- 7.1.1: Workgroup Size
- 7.1.2: Total Wavefronts
- 7.1.5: VGPRs
- 7.1.6: AGPRs
- 7.1.7: SGPRs
- 7.1.8: LDS Allocation
- 7.1.9: Scratch Allocation
@@ -0,0 +1,40 @@
# AUTOGENERATED FILE. Only edit for testing purposes, not for development. Generated from utils/unified_sets.yaml. Generated by utils/split_config.py
sets:
- title: Compute Throughput Utilization
set_option: compute_thruput_util
description: Placeholder
metric:
- 11.2.2: SALU Utilization
- 11.2.3: VALU Utilization
- 11.2.4: VMEM Utilization
- 11.2.5: Branch Utilization
- title: Compute Throughput FLOPS
set_option: compute_thruput_flops
description: Placeholder
metric:
- 2.1.2: MFMA FLOPs (F8)
- 2.1.3: MFMA FLOPs (BF16)
- 2.1.4: MFMA FLOPs (F16)
- 2.1.5: MFMA FLOPs (F32)
- 2.1.6: MFMA FLOPs (F64)
- 2.1.7: MFMA IOPs (Int8)
- title: Memory Throughput
set_option: mem_thruput
description: Placeholder
metric:
- 2.1.17: Theoretical LDS Bandwidth
- 2.1.18: LDS Bank Conflicts/Access
- 16.1.2: Utilization
- 17.1.0: Utilization
- title: Launch Stats
set_option: launch_stats
description: Placeholder
metric:
- 7.1.0: Grid Size
- 7.1.1: Workgroup Size
- 7.1.2: Total Wavefronts
- 7.1.5: VGPRs
- 7.1.6: AGPRs
- 7.1.7: SGPRs
- 7.1.8: LDS Allocation
- 7.1.9: Scratch Allocation
@@ -0,0 +1,40 @@
# AUTOGENERATED FILE. Only edit for testing purposes, not for development. Generated from utils/unified_sets.yaml. Generated by utils/split_config.py
sets:
- title: Compute Throughput Utilization
set_option: compute_thruput_util
description: Placeholder
metric:
- 11.2.2: SALU Utilization
- 11.2.3: VALU Utilization
- 11.2.5: VMEM Utilization
- 11.2.6: Branch Utilization
- title: Compute Throughput FLOPS
set_option: compute_thruput_flops
description: Placeholder
metric:
- 2.1.2: MFMA FLOPs (F8)
- 2.1.3: MFMA FLOPs (BF16)
- 2.1.4: MFMA FLOPs (F16)
- 2.1.5: MFMA FLOPs (F32)
- 2.1.6: MFMA FLOPs (F64)
- 2.1.8: MFMA IOPs (Int8)
- title: Memory Throughput
set_option: mem_thruput
description: Placeholder
metric:
- 2.1.18: Theoretical LDS Bandwidth
- 2.1.19: LDS Bank Conflicts/Access
- 16.1.2: Utilization
- 17.1.0: Utilization
- title: Launch Stats
set_option: launch_stats
description: Placeholder
metric:
- 7.1.0: Grid Size
- 7.1.1: Workgroup Size
- 7.1.2: Total Wavefronts
- 7.1.5: VGPRs
- 7.1.6: AGPRs
- 7.1.7: SGPRs
- 7.1.8: LDS Allocation
- 7.1.9: Scratch Allocation
@@ -24,7 +24,6 @@
##############################################################################
import ctypes
import glob
import json
import math
@@ -32,7 +31,6 @@ import os
import re
import shutil
import sys
import threading
from abc import abstractmethod
from pathlib import Path
@@ -56,6 +54,7 @@ from utils.utils import (
detect_rocprof,
get_submodules,
is_tcc_channel_counter,
parse_sets_yaml,
using_v3,
)
@@ -277,10 +276,26 @@ class OmniSoC_Base:
Path(filename).name.split("_")[0]: filename
for filename in glob.glob(f"{config_root_dir}/*.yaml")
}
texts = list()
set_selected = self.get_args().set_selected
if set_selected:
# NOTE: --blocks and --set are mutually exclusive
if self.get_args().filter_blocks:
console_error("--block and --set are exclusive options.")
sets_info = parse_sets_yaml(self.__arch)
if set_selected not in set(sets_info.keys()):
console_error(
f"argument --set: invalid choice: '{set_selected}' (choose from {sets_info.keys()})"
)
self.__args.filter_blocks = [
next(iter(metric.keys())) for metric in sets_info[set_selected]["metric"]
]
if not self.get_args().filter_blocks:
# Read all config files if no filter_blocks are specified
for filename in config_filename_dict.values():
with open(filename, "r") as stream:
texts.append(stream.read())
@@ -35,11 +35,8 @@ import re
import selectors
import shutil
import subprocess
import sys
import tempfile
import time
from collections import OrderedDict
from itertools import product
from pathlib import Path as path
from typing import Optional
@@ -1616,3 +1613,25 @@ def format_time(seconds):
if secs > 0 or not parts:
parts.append(f"{secs} second{'s' if secs != 1 else ''}")
return ", ".join(parts[:-1]) + (" and " if len(parts) > 1 else "") + parts[-1]
def parse_sets_yaml(arch):
filename = (
config.rocprof_compute_home
/ "rocprof_compute_soc"
/ "profile_configs"
/ "sets"
/ f"{arch}_sets.yaml"
)
with open(filename, "r") as file:
content = file.read()
data = yaml.safe_load(content)
sets_data = data.get("sets", [])
sets_info = {}
for set_item in sets_data:
set_option = set_item.get("set_option", "")
if set_option:
sets_info[set_option] = set_item
return sets_info
@@ -27,13 +27,9 @@
import inspect
import os
import re
import shutil
import subprocess
import sys
import tempfile
from importlib.machinery import SourceFileLoader
from pathlib import Path
from unittest.mock import patch
import pandas as pd
import pytest
@@ -1649,3 +1645,117 @@ def test_comprehensive_error_paths():
assert False, "Should raise exception for None coll_level"
except Exception as e:
assert "coll_level can not be None" in str(e)
@pytest.mark.sets_func
class TestSetsIntegration:
def test_memory_throughput_set(self, binary_handler_profile_rocprof_compute):
options = ["--set", "mem_thruput"]
workload_dir = test_utils.get_output_dir()
binary_handler_profile_rocprof_compute(
config,
workload_dir,
options,
check_success=True,
roof=False,
)
assert test_utils.get_num_pmc_file(workload_dir) == 1
memory_metrics = ["16.1.2", "17.1.0"]
for metric_id in memory_metrics:
assert (
metric_id in open(Path(workload_dir) / "log.txt", "r").read()
), f"Expected memory metric {metric_id} not found"
test_utils.clean_output_dir(config["cleanup"], workload_dir)
def test_launch_stats_set(self, binary_handler_profile_rocprof_compute):
options = ["--set", "launch_stats"]
workload_dir = test_utils.get_output_dir()
binary_handler_profile_rocprof_compute(
config,
workload_dir,
options,
check_success=True,
roof=False,
)
assert test_utils.get_num_pmc_file(workload_dir) == 1
test_utils.clean_output_dir(config["cleanup"], workload_dir)
def test_compute_thruput_util_set(self, binary_handler_profile_rocprof_compute):
options = ["--set", "compute_thruput_util"]
workload_dir = test_utils.get_output_dir()
binary_handler_profile_rocprof_compute(
config,
workload_dir,
options,
check_success=True,
roof=False,
)
assert test_utils.get_num_pmc_file(workload_dir) == 1
test_utils.clean_output_dir(config["cleanup"], workload_dir)
def test_compute_thruput_flops_set(self, binary_handler_profile_rocprof_compute):
options = ["--set", "compute_thruput_flops"]
workload_dir = test_utils.get_output_dir()
binary_handler_profile_rocprof_compute(
config,
workload_dir,
options,
check_success=True,
roof=False,
)
assert test_utils.get_num_pmc_file(workload_dir) == 1
test_utils.clean_output_dir(config["cleanup"], workload_dir)
def test_invalid_set_error_handling(self, binary_handler_profile_rocprof_compute):
options = ["--set", "nonexistent_set"]
workload_dir = test_utils.get_output_dir()
returncode = binary_handler_profile_rocprof_compute(
config,
workload_dir,
options,
check_success=False,
roof=False,
)
assert returncode == 1
test_utils.clean_output_dir(config["cleanup"], workload_dir)
def test_set_and_block_mutual_exclusion(self, binary_handler_profile_rocprof_compute):
options = ["--set", "compute_thruput_util", "--block", "12"]
workload_dir = test_utils.get_output_dir()
returncode = binary_handler_profile_rocprof_compute(
config, workload_dir, options, check_success=False, roof=False
)
assert returncode == 1
test_utils.clean_output_dir(config["cleanup"], workload_dir)
def test_list_sets_functionality(self, binary_handler_profile_rocprof_compute):
options = ["--list-sets"]
workload_dir = test_utils.get_output_dir()
binary_handler_profile_rocprof_compute(
config,
workload_dir,
options,
check_success=False,
roof=False,
)
# workload dir should be empty
assert not os.listdir(workload_dir)
test_utils.clean_output_dir(config["cleanup"], workload_dir)
@@ -29,7 +29,6 @@ import logging
logging.trace = lambda *args, **kwargs: None
import builtins
import glob
import inspect
import io
import json
@@ -38,16 +37,15 @@ import logging
import os
import pathlib
import re
import selectors
import shutil
import subprocess
import tempfile
from pathlib import Path
from types import SimpleNamespace
from unittest import mock
import pandas as pd
import pytest
import yaml
import utils.utils as utils
@@ -163,6 +161,16 @@ def check_csv_files(output_dir, num_devices, num_kernels):
return file_dict
def get_num_pmc_file(output_dir):
"""
Returns:
int: number of pmc perf text files in perfmon dir
"""
perfmon_path = Path(output_dir) / "perfmon"
return len([f for f in perfmon_path.iterdir() if f.is_file() and f.suffix == ".txt"])
# =============================================================================
# VERSION UTILITIES TESTS
# =============================================================================
@@ -5975,11 +5983,10 @@ def test_get_submodules_basic_functionality():
Returns:
None: Asserts function correctly lists submodules from a real package.
"""
from unittest.mock import MagicMock, patch
import utils.utils as utils_mod
mock_package = MagicMock()
mock_package = mock.MagicMock()
mock_package.__path__ = ["/fake/path"]
mock_submodules = [
@@ -5988,8 +5995,8 @@ def test_get_submodules_basic_functionality():
(None, "module_error", False),
]
with patch("importlib.import_module", return_value=mock_package):
with patch("pkgutil.walk_packages", return_value=mock_submodules):
with mock.patch("importlib.import_module", return_value=mock_package):
with mock.patch("pkgutil.walk_packages", return_value=mock_submodules):
result = utils_mod.get_submodules("test_package")
assert isinstance(result, list)
@@ -9490,3 +9497,12 @@ def test_replace_timestamps_no_other_csvs_to_update(
df_sysinfo_original = pd.read_csv(sysinfo_csv_path_str)
assert list(df_sysinfo_original["Start_Timestamp"]) == [5]
assert list(df_sysinfo_original["End_Timestamp"]) == [7]
def test_set_parser():
from utils.utils import parse_sets_yaml
result = parse_sets_yaml("gfx90a")
assert "compute_thruput_util" in result
assert result["compute_thruput_util"]["title"] == "Compute Throughput Utilization"
@@ -107,4 +107,10 @@ src/rocprof_compute_soc/analysis_configs/gfx940/2100_pc_sampling.yaml: 4f3af5504
src/rocprof_compute_soc/analysis_configs/gfx941/2100_pc_sampling.yaml: 4f3af55040c40bee5f1fd88d83e2324d06e5dc462c0adc3e6d5b19b3f31af5e7
src/rocprof_compute_soc/analysis_configs/gfx942/2100_pc_sampling.yaml: 4f3af55040c40bee5f1fd88d83e2324d06e5dc462c0adc3e6d5b19b3f31af5e7
src/rocprof_compute_soc/analysis_configs/gfx950/2100_pc_sampling.yaml: 4f3af55040c40bee5f1fd88d83e2324d06e5dc462c0adc3e6d5b19b3f31af5e7
src/rocprof_compute_soc/profile_configs/sets/gfx908_sets.yaml: ee28989e70d0537db8b0f0a4bc5499444b44ff0e73d3e7f2926943be11d0aeda
src/rocprof_compute_soc/profile_configs/sets/gfx90a_sets.yaml: 9c9533174a3f7bd5c8e09ec998743c7bb2642c4ce3f818b546673be9cafc40a8
src/rocprof_compute_soc/profile_configs/sets/gfx940_sets.yaml: 44cd2b32b050cafa73d0ead5703b82836edf25a057c21699046b6b8b8918b242
src/rocprof_compute_soc/profile_configs/sets/gfx941_sets.yaml: 44cd2b32b050cafa73d0ead5703b82836edf25a057c21699046b6b8b8918b242
src/rocprof_compute_soc/profile_configs/sets/gfx942_sets.yaml: 44cd2b32b050cafa73d0ead5703b82836edf25a057c21699046b6b8b8918b242
src/rocprof_compute_soc/profile_configs/sets/gfx950_sets.yaml: 238d9dc8a98cfead3fc904885bfe413e5bcb4f1af31e9820cd640388bcd1e1c2
docs/data/metrics_description.yaml: 819c08a584ae8b418e6983aa51108b95e43eda4f3b7892eab336c61d844b20bf
@@ -17,14 +17,23 @@ import yaml
ROOT_DIR = Path(__file__).parent.parent
SOURCE_DIR = ROOT_DIR.joinpath("utils")
TARGET_DIR = ROOT_DIR.joinpath("src", "rocprof_compute_soc", "analysis_configs")
SETS_TARGET_DIR = ROOT_DIR.joinpath(
"src", "rocprof_compute_soc", "profile_configs", "sets"
)
DOC_TARGET_DIR = ROOT_DIR.joinpath("docs", "data")
AUTOGEN_TEXT = "# AUTOGENERATED FILE. Only edit for testing purposes, not for development. Generated from utils/unified_config.yaml. Generated by utils/split_config.py\n"
HASH_FILE = ROOT_DIR.joinpath("utils", "autogen_hash.yaml")
HASH_FILE_MAP = {}
GFX_VERSIONS = ["gfx908", "gfx90a", "gfx940", "gfx941", "gfx942", "gfx950"]
METRIC_ID_TO_NAME_MAP = {gfx_version: {} for gfx_version in GFX_VERSIONS}
def get_autogen_text(config_file="utils/unified_config.yaml"):
return f"# AUTOGENERATED FILE. Only edit for testing purposes, not for development. Generated from {config_file}. Generated by utils/split_config.py\n"
def update_analysis_config():
global METRIC_ID_TO_NAME_MAP
# Read the unified config file
with open(SOURCE_DIR.joinpath("unified_config.yaml")) as file:
unified_config = yaml.safe_load(file)
@@ -38,6 +47,7 @@ def update_analysis_config():
key: value["plain"]
for key, value in panel_config.get("metrics_description", {}).items()
}
panel_id_int = panel_config["id"]
# Convert int into str with 4 digits
panel_id = str(panel_config["id"]).zfill(4)
# Replace parentehsis, hyphen, slash and space with underscore
@@ -57,19 +67,28 @@ def update_analysis_config():
# Select metrics from current gfx arch
new_panel_config["Panel Config"]["data source"] = []
for data_source_config in panel_config["data source"]:
for data_source_index, data_source_config in enumerate(
panel_config["data source"]
):
data_source_config = copy.deepcopy(data_source_config)
if "metric_table" in data_source_config:
data_source_config["metric_table"]["metric"] = data_source_config[
"metric_table"
]["metric"][gfx_version]
build_metric_id_mapping(
panel_id_int,
data_source_index,
data_source_config["metric_table"]["metric"],
gfx_version,
)
new_panel_config["Panel Config"]["data source"].append(data_source_config)
# Write panel config to file
filename = Path(
TARGET_DIR.joinpath(gfx_version, f"{panel_id}_{panel_title}.yaml")
)
with open(filename, "w") as file:
file.write(AUTOGEN_TEXT)
file.write(get_autogen_text())
yaml.dump(new_panel_config, file, sort_keys=False)
print(f"File write: {filename}")
# Calculate hash of filename
@@ -78,6 +97,56 @@ def update_analysis_config():
).hexdigest()
def build_metric_id_mapping(panel_id, data_source_index, metrics, gfx_version):
# Build metric id to metric name mapping
global METRIC_ID_TO_NAME_MAP
for metric_index, metric_name in enumerate(metrics.keys()):
metric_id = f"{panel_id // 100}.{data_source_index + 1}.{metric_index}"
METRIC_ID_TO_NAME_MAP[gfx_version][str(metric_id)] = metric_name
def update_sets_config():
# Create directory if it doesn't exist
if not SETS_TARGET_DIR.exists():
SETS_TARGET_DIR.mkdir()
print(f"Created directory: {SETS_TARGET_DIR}")
# Read the unified config file
with open(SOURCE_DIR.joinpath("unified_sets.yaml")) as file:
unified_sets = yaml.safe_load(file)
# Create per gfx version file
for gfx_version in GFX_VERSIONS:
new_sets = {"sets": []}
for sets in unified_sets["sets"]:
# Create new set object for each set
current_set = {
"title": sets["title"],
"set_option": sets["set_option"],
"description": sets["description"],
"metric": [],
}
for metric_id in sets["metric"][gfx_version]:
current_set["metric"].append(
{metric_id: METRIC_ID_TO_NAME_MAP[gfx_version][str(metric_id)]}
)
new_sets["sets"].append(current_set)
# Write gfx version sets to file
filename = Path(SETS_TARGET_DIR.joinpath(f"{gfx_version}_sets.yaml"))
with open(filename, "w") as file:
file.write(get_autogen_text("utils/unified_sets.yaml"))
yaml.dump(new_sets, file, sort_keys=False)
print(f"File write: {filename}")
# Calculate hash of filename
HASH_FILE_MAP[str(filename.relative_to(ROOT_DIR))] = hashlib.sha256(
filename.read_bytes()
).hexdigest()
def update_documentation():
# Documentation sections
section_panel_map = {
@@ -153,7 +222,7 @@ def update_documentation():
# Write documentation metrics description file
filename = Path(DOC_TARGET_DIR.joinpath("metrics_description.yaml"))
with open(filename, "w") as file:
file.write(AUTOGEN_TEXT)
file.write(get_autogen_text())
yaml.dump(section_metric_map, file, sort_keys=False)
print(f"File write: {filename}")
# Calculate hash of filename
@@ -165,12 +234,13 @@ def update_documentation():
def update_hash():
# Write hash file
with open(HASH_FILE, "w") as file:
file.write(AUTOGEN_TEXT)
file.write(get_autogen_text())
yaml.dump(HASH_FILE_MAP, file, sort_keys=False)
print(f"File write: {HASH_FILE}")
if __name__ == "__main__":
update_analysis_config()
update_sets_config()
update_documentation()
update_hash()
@@ -0,0 +1,176 @@
---
# Pre-defined sets containing a collection of relevant metrics that can be collected in a single pass.
# To profile customized set(s), append to this yaml file.
sets:
- title: Compute Throughput Utilization
set_option: compute_thruput_util
description: Placeholder
metric:
gfx908:
- 11.2.2
- 11.2.3
gfx90a:
- 11.2.3
- 11.2.4
- 11.2.5
- 11.2.6
gfx940:
- 11.2.2
- 11.2.3
- 11.2.4
- 11.2.5
gfx941:
- 11.2.2
- 11.2.3
- 11.2.4
- 11.2.5
gfx942:
- 11.2.2
- 11.2.3
- 11.2.4
- 11.2.5
gfx950:
- 11.2.2
- 11.2.3
- 11.2.5
- 11.2.6
- title: Compute Throughput FLOPS
set_option: compute_thruput_flops
description: Placeholder
metric:
gfx908:
- 2.1.2
- 2.1.3
- 2.1.4
- 2.1.5
- 2.1.6
gfx90a:
- 2.1.2
- 2.1.3
- 2.1.4
- 2.1.5
- 2.1.6
gfx940:
- 2.1.2
- 2.1.3
- 2.1.4
- 2.1.5
- 2.1.6
- 2.1.7
gfx941:
- 2.1.2
- 2.1.3
- 2.1.4
- 2.1.5
- 2.1.6
- 2.1.7
gfx942:
- 2.1.2
- 2.1.3
- 2.1.4
- 2.1.5
- 2.1.6
- 2.1.7
gfx950:
- 2.1.2
- 2.1.3
- 2.1.4
- 2.1.5
- 2.1.6
- 2.1.8
- title: Memory Throughput
set_option: mem_thruput
description: Placeholder
metric:
gfx908:
- 2.1.16
- 2.1.17
- 16.1.2
- 17.1.0
gfx90a:
- 2.1.16
- 2.1.17
- 16.1.2
- 17.1.0
gfx940:
- 2.1.17
- 2.1.18
- 16.1.2
- 17.1.0
gfx941:
- 2.1.17
- 2.1.18
- 16.1.2
- 17.1.0
gfx942:
- 2.1.17
- 2.1.18
- 16.1.2
- 17.1.0
gfx950:
- 2.1.18
- 2.1.19
- 16.1.2
- 17.1.0
- title: Launch Stats
set_option: launch_stats
description: Placeholder
metric:
gfx908:
- 7.1.0
- 7.1.1
- 7.1.2
- 7.1.5
- 7.1.6
- 7.1.7
- 7.1.8
- 7.1.9
gfx90a:
- 7.1.0
- 7.1.1
- 7.1.2
- 7.1.5
- 7.1.6
- 7.1.7
- 7.1.8
- 7.1.9
gfx940:
- 7.1.0
- 7.1.1
- 7.1.2
- 7.1.5
- 7.1.6
- 7.1.7
- 7.1.8
- 7.1.9
gfx941:
- 7.1.0
- 7.1.1
- 7.1.2
- 7.1.5
- 7.1.6
- 7.1.7
- 7.1.8
- 7.1.9
gfx942:
- 7.1.0
- 7.1.1
- 7.1.2
- 7.1.5
- 7.1.6
- 7.1.7
- 7.1.8
- 7.1.9
gfx950:
- 7.1.0
- 7.1.1
- 7.1.2
- 7.1.5
- 7.1.6
- 7.1.7
- 7.1.8
- 7.1.9