Enable single pass counter collection (#833)
[ROCm/rocprofiler-compute commit: 6a77d241ed]
This commit is contained in:
@@ -254,6 +254,14 @@ add_test(
|
||||
${PROJECT_SOURCE_DIR}/tests/test_profile_general.py
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
|
||||
|
||||
add_test(
|
||||
NAME test_profile_sets_func
|
||||
COMMAND
|
||||
${Python3_EXECUTABLE} -m pytest -m sets_func
|
||||
--junitxml=tests/test_profile_sets_func.xml ${COV_OPTION}
|
||||
${PROJECT_SOURCE_DIR}/tests/test_profile_general.py
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
|
||||
|
||||
set_tests_properties(
|
||||
test_profile_kernel_execution test_profile_dispatch test_profile_mem test_profile_join
|
||||
test_profile_sort test_profile_misc PROPERTIES LABELS "profile" RESOURCE_GROUPS
|
||||
|
||||
@@ -41,35 +41,37 @@ addopts = [
|
||||
]
|
||||
|
||||
pythonpath = [
|
||||
".",
|
||||
"src",
|
||||
"src/rocprof_compute_soc",
|
||||
"src/utils",
|
||||
"src/rocprof_compute_analyze/utils",
|
||||
"tests"
|
||||
]
|
||||
".",
|
||||
"src",
|
||||
"src/rocprof_compute_soc",
|
||||
"src/utils",
|
||||
"src/rocprof_compute_analyze/utils",
|
||||
"tests"
|
||||
]
|
||||
|
||||
markers = [
|
||||
"section",
|
||||
"kernel_execution",
|
||||
"misc",
|
||||
"mem",
|
||||
"sort",
|
||||
"join",
|
||||
"verbosity",
|
||||
"dispatch",
|
||||
"list_metrics",
|
||||
"filter_block",
|
||||
"filter_kernel",
|
||||
"dispatch",
|
||||
"normal_unit",
|
||||
"max_stat",
|
||||
"time_unit",
|
||||
"decimal",
|
||||
"col",
|
||||
"kernel_verbose",
|
||||
"serial",
|
||||
"L1_cache",
|
||||
"num_xcds_spec_class",
|
||||
"num_xcds_cli_output",
|
||||
"section",
|
||||
"kernel_execution",
|
||||
"misc",
|
||||
"mem",
|
||||
"sort",
|
||||
"join",
|
||||
"verbosity",
|
||||
"dispatch",
|
||||
"list_metrics",
|
||||
"filter_block",
|
||||
"filter_kernel",
|
||||
"dispatch",
|
||||
"normal_unit",
|
||||
"max_stat",
|
||||
"time_unit",
|
||||
"decimal",
|
||||
"col",
|
||||
"kernel_verbose",
|
||||
"serial",
|
||||
"L1_cache",
|
||||
"num_xcds_spec_class",
|
||||
"num_xcds_cli_output",
|
||||
"sets_func",
|
||||
"sets_perf"
|
||||
]
|
||||
|
||||
@@ -27,12 +27,11 @@
|
||||
import argparse
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def print_avail_arch(avail_arch: list):
|
||||
ret_str = "\t\tList all available metrics for analysis on specified arch:"
|
||||
ret_str = "\t\t\tList all available metrics for analysis on specified arch:"
|
||||
for arch in avail_arch:
|
||||
ret_str += "\n\t\t\t {}".format(arch)
|
||||
return ret_str
|
||||
@@ -207,6 +206,19 @@ Examples:
|
||||
choices=[""] + list(supported_archs.keys()), # ["gfx908", "gfx90a"],
|
||||
help=print_avail_arch(supported_archs.keys()),
|
||||
)
|
||||
|
||||
profile_group.add_argument(
|
||||
"--list-sets",
|
||||
action="store_true",
|
||||
help="\t\t\tDisplay available metric sets and their descriptions",
|
||||
)
|
||||
profile_group.add_argument(
|
||||
"--set",
|
||||
default=None,
|
||||
dest="set_selected",
|
||||
help="\t\t\tProfile a set of metrics of topic of interest by collecting counters in a single pass.\n\t\t\tFor available sets, see --list-sets",
|
||||
)
|
||||
|
||||
profile_group.add_argument(
|
||||
"--config-dir",
|
||||
dest="config_dir",
|
||||
|
||||
@@ -27,7 +27,6 @@
|
||||
import argparse
|
||||
import importlib
|
||||
import os
|
||||
import shutil
|
||||
import socket
|
||||
import sys
|
||||
import time
|
||||
@@ -55,6 +54,7 @@ from utils.utils import (
|
||||
get_submodules,
|
||||
get_version,
|
||||
get_version_display,
|
||||
parse_sets_yaml,
|
||||
set_locale_encoding,
|
||||
)
|
||||
|
||||
@@ -243,6 +243,53 @@ class RocProfCompute:
|
||||
else:
|
||||
console_error("Unsupported arch")
|
||||
|
||||
@demarcate
|
||||
def list_sets(self):
|
||||
sets_info = parse_sets_yaml(self.__mspec.gpu_arch)
|
||||
|
||||
if not sets_info:
|
||||
console_error("No sets configuration found.")
|
||||
|
||||
print("\nAvailable Sets:")
|
||||
print("=" * 115)
|
||||
|
||||
# Print header
|
||||
print(
|
||||
f"{'Set Option':<35} {'Set Title':<35} {'Metric Name':<30} {'Metric ID':<10}"
|
||||
)
|
||||
print("-" * 115)
|
||||
|
||||
# Print data grouped by set
|
||||
for set_option, set_data in sets_info.items():
|
||||
title = set_data.get("title", set_option)
|
||||
metrics = set_data.get("metric", [])
|
||||
|
||||
first_row = True
|
||||
for metric in metrics:
|
||||
if isinstance(metric, dict) and metric:
|
||||
metric_id = next(iter(metric.keys()))
|
||||
metric_name = next(iter(metric.values()))
|
||||
|
||||
# Only show set info on first row of each set
|
||||
set_display = set_option if first_row else ""
|
||||
title_display = title if first_row else ""
|
||||
|
||||
print(
|
||||
f"{set_display:<35} {title_display:<35} {metric_name:<30} {metric_id:<10}"
|
||||
)
|
||||
first_row = False
|
||||
# Empty line between sets
|
||||
print()
|
||||
|
||||
print("Usage Examples:")
|
||||
if sets_info:
|
||||
first_set = next(iter(sets_info.keys()))
|
||||
print(f" rocprof-compute profile --set {first_set} # Profile this set")
|
||||
print(f" rocprof-compute profile --list-sets # Show this help")
|
||||
print()
|
||||
|
||||
sys.exit(0)
|
||||
|
||||
@demarcate
|
||||
def run_profiler(self):
|
||||
self.print_graphic()
|
||||
@@ -250,6 +297,8 @@ class RocProfCompute:
|
||||
|
||||
if self.__args.list_metrics is not None:
|
||||
self.list_metrics()
|
||||
elif self.__args.list_sets:
|
||||
self.list_sets()
|
||||
elif self.__args.name is None:
|
||||
sys.exit("Either --list-name or --name is required")
|
||||
|
||||
|
||||
@@ -26,13 +26,12 @@
|
||||
|
||||
import csv
|
||||
import glob
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import shlex
|
||||
import shutil
|
||||
import time
|
||||
from abc import ABC, abstractmethod
|
||||
from abc import abstractmethod
|
||||
from pathlib import Path
|
||||
|
||||
import pandas as pd
|
||||
@@ -350,6 +349,10 @@ class RocProfCompute_Base:
|
||||
console_log("Command: " + str(self.__args.remaining))
|
||||
console_log("Kernel Selection: " + str(self.__args.kernel))
|
||||
console_log("Dispatch Selection: " + str(self.__args.dispatch))
|
||||
|
||||
if self.get_args().set_selected:
|
||||
console_log("Set Selection: " + str(self.__args.set_selected))
|
||||
|
||||
if self.get_args().filter_blocks is None:
|
||||
console_log("Report Sections: All")
|
||||
else:
|
||||
|
||||
+37
@@ -0,0 +1,37 @@
|
||||
# AUTOGENERATED FILE. Only edit for testing purposes, not for development. Generated from utils/unified_sets.yaml. Generated by utils/split_config.py
|
||||
sets:
|
||||
- title: Compute Throughput Utilization
|
||||
set_option: compute_thruput_util
|
||||
description: Placeholder
|
||||
metric:
|
||||
- 11.2.2: SALU Utilization
|
||||
- 11.2.3: VALU Utilization
|
||||
- title: Compute Throughput FLOPS
|
||||
set_option: compute_thruput_flops
|
||||
description: Placeholder
|
||||
metric:
|
||||
- 2.1.2: MFMA FLOPs (BF16)
|
||||
- 2.1.3: MFMA FLOPs (F16)
|
||||
- 2.1.4: MFMA FLOPs (F32)
|
||||
- 2.1.5: MFMA FLOPs (F64)
|
||||
- 2.1.6: MFMA IOPs (Int8)
|
||||
- title: Memory Throughput
|
||||
set_option: mem_thruput
|
||||
description: Placeholder
|
||||
metric:
|
||||
- 2.1.16: Theoretical LDS Bandwidth
|
||||
- 2.1.17: LDS Bank Conflicts/Access
|
||||
- 16.1.2: Utilization
|
||||
- 17.1.0: Utilization
|
||||
- title: Launch Stats
|
||||
set_option: launch_stats
|
||||
description: Placeholder
|
||||
metric:
|
||||
- 7.1.0: Grid Size
|
||||
- 7.1.1: Workgroup Size
|
||||
- 7.1.2: Total Wavefronts
|
||||
- 7.1.5: VGPRs
|
||||
- 7.1.6: AGPRs
|
||||
- 7.1.7: SGPRs
|
||||
- 7.1.8: LDS Allocation
|
||||
- 7.1.9: Scratch Allocation
|
||||
+39
@@ -0,0 +1,39 @@
|
||||
# AUTOGENERATED FILE. Only edit for testing purposes, not for development. Generated from utils/unified_sets.yaml. Generated by utils/split_config.py
|
||||
sets:
|
||||
- title: Compute Throughput Utilization
|
||||
set_option: compute_thruput_util
|
||||
description: Placeholder
|
||||
metric:
|
||||
- 11.2.3: VALU Utilization
|
||||
- 11.2.4: VMEM Utilization
|
||||
- 11.2.5: Branch Utilization
|
||||
- 11.2.6: VALU Active Threads
|
||||
- title: Compute Throughput FLOPS
|
||||
set_option: compute_thruput_flops
|
||||
description: Placeholder
|
||||
metric:
|
||||
- 2.1.2: MFMA FLOPs (BF16)
|
||||
- 2.1.3: MFMA FLOPs (F16)
|
||||
- 2.1.4: MFMA FLOPs (F32)
|
||||
- 2.1.5: MFMA FLOPs (F64)
|
||||
- 2.1.6: MFMA IOPs (Int8)
|
||||
- title: Memory Throughput
|
||||
set_option: mem_thruput
|
||||
description: Placeholder
|
||||
metric:
|
||||
- 2.1.16: Theoretical LDS Bandwidth
|
||||
- 2.1.17: LDS Bank Conflicts/Access
|
||||
- 16.1.2: Utilization
|
||||
- 17.1.0: Utilization
|
||||
- title: Launch Stats
|
||||
set_option: launch_stats
|
||||
description: Placeholder
|
||||
metric:
|
||||
- 7.1.0: Grid Size
|
||||
- 7.1.1: Workgroup Size
|
||||
- 7.1.2: Total Wavefronts
|
||||
- 7.1.5: VGPRs
|
||||
- 7.1.6: AGPRs
|
||||
- 7.1.7: SGPRs
|
||||
- 7.1.8: LDS Allocation
|
||||
- 7.1.9: Scratch Allocation
|
||||
+40
@@ -0,0 +1,40 @@
|
||||
# AUTOGENERATED FILE. Only edit for testing purposes, not for development. Generated from utils/unified_sets.yaml. Generated by utils/split_config.py
|
||||
sets:
|
||||
- title: Compute Throughput Utilization
|
||||
set_option: compute_thruput_util
|
||||
description: Placeholder
|
||||
metric:
|
||||
- 11.2.2: SALU Utilization
|
||||
- 11.2.3: VALU Utilization
|
||||
- 11.2.4: VMEM Utilization
|
||||
- 11.2.5: Branch Utilization
|
||||
- title: Compute Throughput FLOPS
|
||||
set_option: compute_thruput_flops
|
||||
description: Placeholder
|
||||
metric:
|
||||
- 2.1.2: MFMA FLOPs (F8)
|
||||
- 2.1.3: MFMA FLOPs (BF16)
|
||||
- 2.1.4: MFMA FLOPs (F16)
|
||||
- 2.1.5: MFMA FLOPs (F32)
|
||||
- 2.1.6: MFMA FLOPs (F64)
|
||||
- 2.1.7: MFMA IOPs (Int8)
|
||||
- title: Memory Throughput
|
||||
set_option: mem_thruput
|
||||
description: Placeholder
|
||||
metric:
|
||||
- 2.1.17: Theoretical LDS Bandwidth
|
||||
- 2.1.18: LDS Bank Conflicts/Access
|
||||
- 16.1.2: Utilization
|
||||
- 17.1.0: Utilization
|
||||
- title: Launch Stats
|
||||
set_option: launch_stats
|
||||
description: Placeholder
|
||||
metric:
|
||||
- 7.1.0: Grid Size
|
||||
- 7.1.1: Workgroup Size
|
||||
- 7.1.2: Total Wavefronts
|
||||
- 7.1.5: VGPRs
|
||||
- 7.1.6: AGPRs
|
||||
- 7.1.7: SGPRs
|
||||
- 7.1.8: LDS Allocation
|
||||
- 7.1.9: Scratch Allocation
|
||||
+40
@@ -0,0 +1,40 @@
|
||||
# AUTOGENERATED FILE. Only edit for testing purposes, not for development. Generated from utils/unified_sets.yaml. Generated by utils/split_config.py
|
||||
sets:
|
||||
- title: Compute Throughput Utilization
|
||||
set_option: compute_thruput_util
|
||||
description: Placeholder
|
||||
metric:
|
||||
- 11.2.2: SALU Utilization
|
||||
- 11.2.3: VALU Utilization
|
||||
- 11.2.4: VMEM Utilization
|
||||
- 11.2.5: Branch Utilization
|
||||
- title: Compute Throughput FLOPS
|
||||
set_option: compute_thruput_flops
|
||||
description: Placeholder
|
||||
metric:
|
||||
- 2.1.2: MFMA FLOPs (F8)
|
||||
- 2.1.3: MFMA FLOPs (BF16)
|
||||
- 2.1.4: MFMA FLOPs (F16)
|
||||
- 2.1.5: MFMA FLOPs (F32)
|
||||
- 2.1.6: MFMA FLOPs (F64)
|
||||
- 2.1.7: MFMA IOPs (Int8)
|
||||
- title: Memory Throughput
|
||||
set_option: mem_thruput
|
||||
description: Placeholder
|
||||
metric:
|
||||
- 2.1.17: Theoretical LDS Bandwidth
|
||||
- 2.1.18: LDS Bank Conflicts/Access
|
||||
- 16.1.2: Utilization
|
||||
- 17.1.0: Utilization
|
||||
- title: Launch Stats
|
||||
set_option: launch_stats
|
||||
description: Placeholder
|
||||
metric:
|
||||
- 7.1.0: Grid Size
|
||||
- 7.1.1: Workgroup Size
|
||||
- 7.1.2: Total Wavefronts
|
||||
- 7.1.5: VGPRs
|
||||
- 7.1.6: AGPRs
|
||||
- 7.1.7: SGPRs
|
||||
- 7.1.8: LDS Allocation
|
||||
- 7.1.9: Scratch Allocation
|
||||
+40
@@ -0,0 +1,40 @@
|
||||
# AUTOGENERATED FILE. Only edit for testing purposes, not for development. Generated from utils/unified_sets.yaml. Generated by utils/split_config.py
|
||||
sets:
|
||||
- title: Compute Throughput Utilization
|
||||
set_option: compute_thruput_util
|
||||
description: Placeholder
|
||||
metric:
|
||||
- 11.2.2: SALU Utilization
|
||||
- 11.2.3: VALU Utilization
|
||||
- 11.2.4: VMEM Utilization
|
||||
- 11.2.5: Branch Utilization
|
||||
- title: Compute Throughput FLOPS
|
||||
set_option: compute_thruput_flops
|
||||
description: Placeholder
|
||||
metric:
|
||||
- 2.1.2: MFMA FLOPs (F8)
|
||||
- 2.1.3: MFMA FLOPs (BF16)
|
||||
- 2.1.4: MFMA FLOPs (F16)
|
||||
- 2.1.5: MFMA FLOPs (F32)
|
||||
- 2.1.6: MFMA FLOPs (F64)
|
||||
- 2.1.7: MFMA IOPs (Int8)
|
||||
- title: Memory Throughput
|
||||
set_option: mem_thruput
|
||||
description: Placeholder
|
||||
metric:
|
||||
- 2.1.17: Theoretical LDS Bandwidth
|
||||
- 2.1.18: LDS Bank Conflicts/Access
|
||||
- 16.1.2: Utilization
|
||||
- 17.1.0: Utilization
|
||||
- title: Launch Stats
|
||||
set_option: launch_stats
|
||||
description: Placeholder
|
||||
metric:
|
||||
- 7.1.0: Grid Size
|
||||
- 7.1.1: Workgroup Size
|
||||
- 7.1.2: Total Wavefronts
|
||||
- 7.1.5: VGPRs
|
||||
- 7.1.6: AGPRs
|
||||
- 7.1.7: SGPRs
|
||||
- 7.1.8: LDS Allocation
|
||||
- 7.1.9: Scratch Allocation
|
||||
+40
@@ -0,0 +1,40 @@
|
||||
# AUTOGENERATED FILE. Only edit for testing purposes, not for development. Generated from utils/unified_sets.yaml. Generated by utils/split_config.py
|
||||
sets:
|
||||
- title: Compute Throughput Utilization
|
||||
set_option: compute_thruput_util
|
||||
description: Placeholder
|
||||
metric:
|
||||
- 11.2.2: SALU Utilization
|
||||
- 11.2.3: VALU Utilization
|
||||
- 11.2.5: VMEM Utilization
|
||||
- 11.2.6: Branch Utilization
|
||||
- title: Compute Throughput FLOPS
|
||||
set_option: compute_thruput_flops
|
||||
description: Placeholder
|
||||
metric:
|
||||
- 2.1.2: MFMA FLOPs (F8)
|
||||
- 2.1.3: MFMA FLOPs (BF16)
|
||||
- 2.1.4: MFMA FLOPs (F16)
|
||||
- 2.1.5: MFMA FLOPs (F32)
|
||||
- 2.1.6: MFMA FLOPs (F64)
|
||||
- 2.1.8: MFMA IOPs (Int8)
|
||||
- title: Memory Throughput
|
||||
set_option: mem_thruput
|
||||
description: Placeholder
|
||||
metric:
|
||||
- 2.1.18: Theoretical LDS Bandwidth
|
||||
- 2.1.19: LDS Bank Conflicts/Access
|
||||
- 16.1.2: Utilization
|
||||
- 17.1.0: Utilization
|
||||
- title: Launch Stats
|
||||
set_option: launch_stats
|
||||
description: Placeholder
|
||||
metric:
|
||||
- 7.1.0: Grid Size
|
||||
- 7.1.1: Workgroup Size
|
||||
- 7.1.2: Total Wavefronts
|
||||
- 7.1.5: VGPRs
|
||||
- 7.1.6: AGPRs
|
||||
- 7.1.7: SGPRs
|
||||
- 7.1.8: LDS Allocation
|
||||
- 7.1.9: Scratch Allocation
|
||||
@@ -24,7 +24,6 @@
|
||||
##############################################################################
|
||||
|
||||
|
||||
import ctypes
|
||||
import glob
|
||||
import json
|
||||
import math
|
||||
@@ -32,7 +31,6 @@ import os
|
||||
import re
|
||||
import shutil
|
||||
import sys
|
||||
import threading
|
||||
from abc import abstractmethod
|
||||
from pathlib import Path
|
||||
|
||||
@@ -56,6 +54,7 @@ from utils.utils import (
|
||||
detect_rocprof,
|
||||
get_submodules,
|
||||
is_tcc_channel_counter,
|
||||
parse_sets_yaml,
|
||||
using_v3,
|
||||
)
|
||||
|
||||
@@ -277,10 +276,26 @@ class OmniSoC_Base:
|
||||
Path(filename).name.split("_")[0]: filename
|
||||
for filename in glob.glob(f"{config_root_dir}/*.yaml")
|
||||
}
|
||||
|
||||
texts = list()
|
||||
|
||||
set_selected = self.get_args().set_selected
|
||||
|
||||
if set_selected:
|
||||
# NOTE: --blocks and --set are mutually exclusive
|
||||
if self.get_args().filter_blocks:
|
||||
console_error("--block and --set are exclusive options.")
|
||||
|
||||
sets_info = parse_sets_yaml(self.__arch)
|
||||
if set_selected not in set(sets_info.keys()):
|
||||
console_error(
|
||||
f"argument --set: invalid choice: '{set_selected}' (choose from {sets_info.keys()})"
|
||||
)
|
||||
self.__args.filter_blocks = [
|
||||
next(iter(metric.keys())) for metric in sets_info[set_selected]["metric"]
|
||||
]
|
||||
|
||||
if not self.get_args().filter_blocks:
|
||||
# Read all config files if no filter_blocks are specified
|
||||
for filename in config_filename_dict.values():
|
||||
with open(filename, "r") as stream:
|
||||
texts.append(stream.read())
|
||||
|
||||
@@ -35,11 +35,8 @@ import re
|
||||
import selectors
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
from collections import OrderedDict
|
||||
from itertools import product
|
||||
from pathlib import Path as path
|
||||
from typing import Optional
|
||||
|
||||
@@ -1616,3 +1613,25 @@ def format_time(seconds):
|
||||
if secs > 0 or not parts:
|
||||
parts.append(f"{secs} second{'s' if secs != 1 else ''}")
|
||||
return ", ".join(parts[:-1]) + (" and " if len(parts) > 1 else "") + parts[-1]
|
||||
|
||||
|
||||
def parse_sets_yaml(arch):
|
||||
filename = (
|
||||
config.rocprof_compute_home
|
||||
/ "rocprof_compute_soc"
|
||||
/ "profile_configs"
|
||||
/ "sets"
|
||||
/ f"{arch}_sets.yaml"
|
||||
)
|
||||
with open(filename, "r") as file:
|
||||
content = file.read()
|
||||
data = yaml.safe_load(content)
|
||||
|
||||
sets_data = data.get("sets", [])
|
||||
|
||||
sets_info = {}
|
||||
for set_item in sets_data:
|
||||
set_option = set_item.get("set_option", "")
|
||||
if set_option:
|
||||
sets_info[set_option] = set_item
|
||||
return sets_info
|
||||
|
||||
@@ -27,13 +27,9 @@
|
||||
import inspect
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
from importlib.machinery import SourceFileLoader
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
import pandas as pd
|
||||
import pytest
|
||||
@@ -1649,3 +1645,117 @@ def test_comprehensive_error_paths():
|
||||
assert False, "Should raise exception for None coll_level"
|
||||
except Exception as e:
|
||||
assert "coll_level can not be None" in str(e)
|
||||
|
||||
|
||||
@pytest.mark.sets_func
|
||||
class TestSetsIntegration:
|
||||
def test_memory_throughput_set(self, binary_handler_profile_rocprof_compute):
|
||||
options = ["--set", "mem_thruput"]
|
||||
workload_dir = test_utils.get_output_dir()
|
||||
|
||||
binary_handler_profile_rocprof_compute(
|
||||
config,
|
||||
workload_dir,
|
||||
options,
|
||||
check_success=True,
|
||||
roof=False,
|
||||
)
|
||||
|
||||
assert test_utils.get_num_pmc_file(workload_dir) == 1
|
||||
|
||||
memory_metrics = ["16.1.2", "17.1.0"]
|
||||
for metric_id in memory_metrics:
|
||||
assert (
|
||||
metric_id in open(Path(workload_dir) / "log.txt", "r").read()
|
||||
), f"Expected memory metric {metric_id} not found"
|
||||
|
||||
test_utils.clean_output_dir(config["cleanup"], workload_dir)
|
||||
|
||||
def test_launch_stats_set(self, binary_handler_profile_rocprof_compute):
|
||||
options = ["--set", "launch_stats"]
|
||||
workload_dir = test_utils.get_output_dir()
|
||||
|
||||
binary_handler_profile_rocprof_compute(
|
||||
config,
|
||||
workload_dir,
|
||||
options,
|
||||
check_success=True,
|
||||
roof=False,
|
||||
)
|
||||
|
||||
assert test_utils.get_num_pmc_file(workload_dir) == 1
|
||||
|
||||
test_utils.clean_output_dir(config["cleanup"], workload_dir)
|
||||
|
||||
def test_compute_thruput_util_set(self, binary_handler_profile_rocprof_compute):
|
||||
options = ["--set", "compute_thruput_util"]
|
||||
workload_dir = test_utils.get_output_dir()
|
||||
|
||||
binary_handler_profile_rocprof_compute(
|
||||
config,
|
||||
workload_dir,
|
||||
options,
|
||||
check_success=True,
|
||||
roof=False,
|
||||
)
|
||||
|
||||
assert test_utils.get_num_pmc_file(workload_dir) == 1
|
||||
|
||||
test_utils.clean_output_dir(config["cleanup"], workload_dir)
|
||||
|
||||
def test_compute_thruput_flops_set(self, binary_handler_profile_rocprof_compute):
|
||||
options = ["--set", "compute_thruput_flops"]
|
||||
workload_dir = test_utils.get_output_dir()
|
||||
|
||||
binary_handler_profile_rocprof_compute(
|
||||
config,
|
||||
workload_dir,
|
||||
options,
|
||||
check_success=True,
|
||||
roof=False,
|
||||
)
|
||||
|
||||
assert test_utils.get_num_pmc_file(workload_dir) == 1
|
||||
|
||||
test_utils.clean_output_dir(config["cleanup"], workload_dir)
|
||||
|
||||
def test_invalid_set_error_handling(self, binary_handler_profile_rocprof_compute):
|
||||
options = ["--set", "nonexistent_set"]
|
||||
workload_dir = test_utils.get_output_dir()
|
||||
|
||||
returncode = binary_handler_profile_rocprof_compute(
|
||||
config,
|
||||
workload_dir,
|
||||
options,
|
||||
check_success=False,
|
||||
roof=False,
|
||||
)
|
||||
|
||||
assert returncode == 1
|
||||
test_utils.clean_output_dir(config["cleanup"], workload_dir)
|
||||
|
||||
def test_set_and_block_mutual_exclusion(self, binary_handler_profile_rocprof_compute):
|
||||
options = ["--set", "compute_thruput_util", "--block", "12"]
|
||||
workload_dir = test_utils.get_output_dir()
|
||||
|
||||
returncode = binary_handler_profile_rocprof_compute(
|
||||
config, workload_dir, options, check_success=False, roof=False
|
||||
)
|
||||
|
||||
assert returncode == 1
|
||||
test_utils.clean_output_dir(config["cleanup"], workload_dir)
|
||||
|
||||
def test_list_sets_functionality(self, binary_handler_profile_rocprof_compute):
|
||||
options = ["--list-sets"]
|
||||
workload_dir = test_utils.get_output_dir()
|
||||
|
||||
binary_handler_profile_rocprof_compute(
|
||||
config,
|
||||
workload_dir,
|
||||
options,
|
||||
check_success=False,
|
||||
roof=False,
|
||||
)
|
||||
# workload dir should be empty
|
||||
assert not os.listdir(workload_dir)
|
||||
test_utils.clean_output_dir(config["cleanup"], workload_dir)
|
||||
|
||||
@@ -29,7 +29,6 @@ import logging
|
||||
logging.trace = lambda *args, **kwargs: None
|
||||
|
||||
import builtins
|
||||
import glob
|
||||
import inspect
|
||||
import io
|
||||
import json
|
||||
@@ -38,16 +37,15 @@ import logging
|
||||
import os
|
||||
import pathlib
|
||||
import re
|
||||
import selectors
|
||||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
from unittest import mock
|
||||
|
||||
import pandas as pd
|
||||
import pytest
|
||||
import yaml
|
||||
|
||||
import utils.utils as utils
|
||||
|
||||
@@ -163,6 +161,16 @@ def check_csv_files(output_dir, num_devices, num_kernels):
|
||||
return file_dict
|
||||
|
||||
|
||||
def get_num_pmc_file(output_dir):
|
||||
"""
|
||||
Returns:
|
||||
int: number of pmc perf text files in perfmon dir
|
||||
"""
|
||||
|
||||
perfmon_path = Path(output_dir) / "perfmon"
|
||||
return len([f for f in perfmon_path.iterdir() if f.is_file() and f.suffix == ".txt"])
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# VERSION UTILITIES TESTS
|
||||
# =============================================================================
|
||||
@@ -5975,11 +5983,10 @@ def test_get_submodules_basic_functionality():
|
||||
Returns:
|
||||
None: Asserts function correctly lists submodules from a real package.
|
||||
"""
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import utils.utils as utils_mod
|
||||
|
||||
mock_package = MagicMock()
|
||||
mock_package = mock.MagicMock()
|
||||
mock_package.__path__ = ["/fake/path"]
|
||||
|
||||
mock_submodules = [
|
||||
@@ -5988,8 +5995,8 @@ def test_get_submodules_basic_functionality():
|
||||
(None, "module_error", False),
|
||||
]
|
||||
|
||||
with patch("importlib.import_module", return_value=mock_package):
|
||||
with patch("pkgutil.walk_packages", return_value=mock_submodules):
|
||||
with mock.patch("importlib.import_module", return_value=mock_package):
|
||||
with mock.patch("pkgutil.walk_packages", return_value=mock_submodules):
|
||||
result = utils_mod.get_submodules("test_package")
|
||||
|
||||
assert isinstance(result, list)
|
||||
@@ -9490,3 +9497,12 @@ def test_replace_timestamps_no_other_csvs_to_update(
|
||||
df_sysinfo_original = pd.read_csv(sysinfo_csv_path_str)
|
||||
assert list(df_sysinfo_original["Start_Timestamp"]) == [5]
|
||||
assert list(df_sysinfo_original["End_Timestamp"]) == [7]
|
||||
|
||||
|
||||
def test_set_parser():
|
||||
from utils.utils import parse_sets_yaml
|
||||
|
||||
result = parse_sets_yaml("gfx90a")
|
||||
|
||||
assert "compute_thruput_util" in result
|
||||
assert result["compute_thruput_util"]["title"] == "Compute Throughput Utilization"
|
||||
|
||||
@@ -107,4 +107,10 @@ src/rocprof_compute_soc/analysis_configs/gfx940/2100_pc_sampling.yaml: 4f3af5504
|
||||
src/rocprof_compute_soc/analysis_configs/gfx941/2100_pc_sampling.yaml: 4f3af55040c40bee5f1fd88d83e2324d06e5dc462c0adc3e6d5b19b3f31af5e7
|
||||
src/rocprof_compute_soc/analysis_configs/gfx942/2100_pc_sampling.yaml: 4f3af55040c40bee5f1fd88d83e2324d06e5dc462c0adc3e6d5b19b3f31af5e7
|
||||
src/rocprof_compute_soc/analysis_configs/gfx950/2100_pc_sampling.yaml: 4f3af55040c40bee5f1fd88d83e2324d06e5dc462c0adc3e6d5b19b3f31af5e7
|
||||
src/rocprof_compute_soc/profile_configs/sets/gfx908_sets.yaml: ee28989e70d0537db8b0f0a4bc5499444b44ff0e73d3e7f2926943be11d0aeda
|
||||
src/rocprof_compute_soc/profile_configs/sets/gfx90a_sets.yaml: 9c9533174a3f7bd5c8e09ec998743c7bb2642c4ce3f818b546673be9cafc40a8
|
||||
src/rocprof_compute_soc/profile_configs/sets/gfx940_sets.yaml: 44cd2b32b050cafa73d0ead5703b82836edf25a057c21699046b6b8b8918b242
|
||||
src/rocprof_compute_soc/profile_configs/sets/gfx941_sets.yaml: 44cd2b32b050cafa73d0ead5703b82836edf25a057c21699046b6b8b8918b242
|
||||
src/rocprof_compute_soc/profile_configs/sets/gfx942_sets.yaml: 44cd2b32b050cafa73d0ead5703b82836edf25a057c21699046b6b8b8918b242
|
||||
src/rocprof_compute_soc/profile_configs/sets/gfx950_sets.yaml: 238d9dc8a98cfead3fc904885bfe413e5bcb4f1af31e9820cd640388bcd1e1c2
|
||||
docs/data/metrics_description.yaml: 819c08a584ae8b418e6983aa51108b95e43eda4f3b7892eab336c61d844b20bf
|
||||
|
||||
@@ -17,14 +17,23 @@ import yaml
|
||||
ROOT_DIR = Path(__file__).parent.parent
|
||||
SOURCE_DIR = ROOT_DIR.joinpath("utils")
|
||||
TARGET_DIR = ROOT_DIR.joinpath("src", "rocprof_compute_soc", "analysis_configs")
|
||||
SETS_TARGET_DIR = ROOT_DIR.joinpath(
|
||||
"src", "rocprof_compute_soc", "profile_configs", "sets"
|
||||
)
|
||||
DOC_TARGET_DIR = ROOT_DIR.joinpath("docs", "data")
|
||||
AUTOGEN_TEXT = "# AUTOGENERATED FILE. Only edit for testing purposes, not for development. Generated from utils/unified_config.yaml. Generated by utils/split_config.py\n"
|
||||
HASH_FILE = ROOT_DIR.joinpath("utils", "autogen_hash.yaml")
|
||||
HASH_FILE_MAP = {}
|
||||
GFX_VERSIONS = ["gfx908", "gfx90a", "gfx940", "gfx941", "gfx942", "gfx950"]
|
||||
METRIC_ID_TO_NAME_MAP = {gfx_version: {} for gfx_version in GFX_VERSIONS}
|
||||
|
||||
|
||||
def get_autogen_text(config_file="utils/unified_config.yaml"):
|
||||
return f"# AUTOGENERATED FILE. Only edit for testing purposes, not for development. Generated from {config_file}. Generated by utils/split_config.py\n"
|
||||
|
||||
|
||||
def update_analysis_config():
|
||||
global METRIC_ID_TO_NAME_MAP
|
||||
|
||||
# Read the unified config file
|
||||
with open(SOURCE_DIR.joinpath("unified_config.yaml")) as file:
|
||||
unified_config = yaml.safe_load(file)
|
||||
@@ -38,6 +47,7 @@ def update_analysis_config():
|
||||
key: value["plain"]
|
||||
for key, value in panel_config.get("metrics_description", {}).items()
|
||||
}
|
||||
panel_id_int = panel_config["id"]
|
||||
# Convert int into str with 4 digits
|
||||
panel_id = str(panel_config["id"]).zfill(4)
|
||||
# Replace parentehsis, hyphen, slash and space with underscore
|
||||
@@ -57,19 +67,28 @@ def update_analysis_config():
|
||||
|
||||
# Select metrics from current gfx arch
|
||||
new_panel_config["Panel Config"]["data source"] = []
|
||||
for data_source_config in panel_config["data source"]:
|
||||
for data_source_index, data_source_config in enumerate(
|
||||
panel_config["data source"]
|
||||
):
|
||||
data_source_config = copy.deepcopy(data_source_config)
|
||||
if "metric_table" in data_source_config:
|
||||
data_source_config["metric_table"]["metric"] = data_source_config[
|
||||
"metric_table"
|
||||
]["metric"][gfx_version]
|
||||
|
||||
build_metric_id_mapping(
|
||||
panel_id_int,
|
||||
data_source_index,
|
||||
data_source_config["metric_table"]["metric"],
|
||||
gfx_version,
|
||||
)
|
||||
new_panel_config["Panel Config"]["data source"].append(data_source_config)
|
||||
# Write panel config to file
|
||||
filename = Path(
|
||||
TARGET_DIR.joinpath(gfx_version, f"{panel_id}_{panel_title}.yaml")
|
||||
)
|
||||
with open(filename, "w") as file:
|
||||
file.write(AUTOGEN_TEXT)
|
||||
file.write(get_autogen_text())
|
||||
yaml.dump(new_panel_config, file, sort_keys=False)
|
||||
print(f"File write: {filename}")
|
||||
# Calculate hash of filename
|
||||
@@ -78,6 +97,56 @@ def update_analysis_config():
|
||||
).hexdigest()
|
||||
|
||||
|
||||
def build_metric_id_mapping(panel_id, data_source_index, metrics, gfx_version):
|
||||
# Build metric id to metric name mapping
|
||||
global METRIC_ID_TO_NAME_MAP
|
||||
for metric_index, metric_name in enumerate(metrics.keys()):
|
||||
metric_id = f"{panel_id // 100}.{data_source_index + 1}.{metric_index}"
|
||||
METRIC_ID_TO_NAME_MAP[gfx_version][str(metric_id)] = metric_name
|
||||
|
||||
|
||||
def update_sets_config():
|
||||
# Create directory if it doesn't exist
|
||||
if not SETS_TARGET_DIR.exists():
|
||||
SETS_TARGET_DIR.mkdir()
|
||||
print(f"Created directory: {SETS_TARGET_DIR}")
|
||||
|
||||
# Read the unified config file
|
||||
with open(SOURCE_DIR.joinpath("unified_sets.yaml")) as file:
|
||||
unified_sets = yaml.safe_load(file)
|
||||
|
||||
# Create per gfx version file
|
||||
for gfx_version in GFX_VERSIONS:
|
||||
new_sets = {"sets": []}
|
||||
|
||||
for sets in unified_sets["sets"]:
|
||||
# Create new set object for each set
|
||||
current_set = {
|
||||
"title": sets["title"],
|
||||
"set_option": sets["set_option"],
|
||||
"description": sets["description"],
|
||||
"metric": [],
|
||||
}
|
||||
|
||||
for metric_id in sets["metric"][gfx_version]:
|
||||
current_set["metric"].append(
|
||||
{metric_id: METRIC_ID_TO_NAME_MAP[gfx_version][str(metric_id)]}
|
||||
)
|
||||
|
||||
new_sets["sets"].append(current_set)
|
||||
|
||||
# Write gfx version sets to file
|
||||
filename = Path(SETS_TARGET_DIR.joinpath(f"{gfx_version}_sets.yaml"))
|
||||
with open(filename, "w") as file:
|
||||
file.write(get_autogen_text("utils/unified_sets.yaml"))
|
||||
yaml.dump(new_sets, file, sort_keys=False)
|
||||
print(f"File write: {filename}")
|
||||
# Calculate hash of filename
|
||||
HASH_FILE_MAP[str(filename.relative_to(ROOT_DIR))] = hashlib.sha256(
|
||||
filename.read_bytes()
|
||||
).hexdigest()
|
||||
|
||||
|
||||
def update_documentation():
|
||||
# Documentation sections
|
||||
section_panel_map = {
|
||||
@@ -153,7 +222,7 @@ def update_documentation():
|
||||
# Write documentation metrics description file
|
||||
filename = Path(DOC_TARGET_DIR.joinpath("metrics_description.yaml"))
|
||||
with open(filename, "w") as file:
|
||||
file.write(AUTOGEN_TEXT)
|
||||
file.write(get_autogen_text())
|
||||
yaml.dump(section_metric_map, file, sort_keys=False)
|
||||
print(f"File write: {filename}")
|
||||
# Calculate hash of filename
|
||||
@@ -165,12 +234,13 @@ def update_documentation():
|
||||
def update_hash():
|
||||
# Write hash file
|
||||
with open(HASH_FILE, "w") as file:
|
||||
file.write(AUTOGEN_TEXT)
|
||||
file.write(get_autogen_text())
|
||||
yaml.dump(HASH_FILE_MAP, file, sort_keys=False)
|
||||
print(f"File write: {HASH_FILE}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
update_analysis_config()
|
||||
update_sets_config()
|
||||
update_documentation()
|
||||
update_hash()
|
||||
|
||||
@@ -0,0 +1,176 @@
|
||||
---
|
||||
# Pre-defined sets containing a collection of relevant metrics that can be collected in a single pass.
|
||||
# To profile customized set(s), append to this yaml file.
|
||||
|
||||
sets:
|
||||
- title: Compute Throughput Utilization
|
||||
set_option: compute_thruput_util
|
||||
description: Placeholder
|
||||
metric:
|
||||
gfx908:
|
||||
- 11.2.2
|
||||
- 11.2.3
|
||||
gfx90a:
|
||||
- 11.2.3
|
||||
- 11.2.4
|
||||
- 11.2.5
|
||||
- 11.2.6
|
||||
gfx940:
|
||||
- 11.2.2
|
||||
- 11.2.3
|
||||
- 11.2.4
|
||||
- 11.2.5
|
||||
gfx941:
|
||||
- 11.2.2
|
||||
- 11.2.3
|
||||
- 11.2.4
|
||||
- 11.2.5
|
||||
gfx942:
|
||||
- 11.2.2
|
||||
- 11.2.3
|
||||
- 11.2.4
|
||||
- 11.2.5
|
||||
gfx950:
|
||||
- 11.2.2
|
||||
- 11.2.3
|
||||
- 11.2.5
|
||||
- 11.2.6
|
||||
|
||||
- title: Compute Throughput FLOPS
|
||||
set_option: compute_thruput_flops
|
||||
description: Placeholder
|
||||
metric:
|
||||
gfx908:
|
||||
- 2.1.2
|
||||
- 2.1.3
|
||||
- 2.1.4
|
||||
- 2.1.5
|
||||
- 2.1.6
|
||||
gfx90a:
|
||||
- 2.1.2
|
||||
- 2.1.3
|
||||
- 2.1.4
|
||||
- 2.1.5
|
||||
- 2.1.6
|
||||
gfx940:
|
||||
- 2.1.2
|
||||
- 2.1.3
|
||||
- 2.1.4
|
||||
- 2.1.5
|
||||
- 2.1.6
|
||||
- 2.1.7
|
||||
gfx941:
|
||||
- 2.1.2
|
||||
- 2.1.3
|
||||
- 2.1.4
|
||||
- 2.1.5
|
||||
- 2.1.6
|
||||
- 2.1.7
|
||||
gfx942:
|
||||
- 2.1.2
|
||||
- 2.1.3
|
||||
- 2.1.4
|
||||
- 2.1.5
|
||||
- 2.1.6
|
||||
- 2.1.7
|
||||
gfx950:
|
||||
- 2.1.2
|
||||
- 2.1.3
|
||||
- 2.1.4
|
||||
- 2.1.5
|
||||
- 2.1.6
|
||||
- 2.1.8
|
||||
|
||||
- title: Memory Throughput
|
||||
set_option: mem_thruput
|
||||
description: Placeholder
|
||||
metric:
|
||||
gfx908:
|
||||
- 2.1.16
|
||||
- 2.1.17
|
||||
- 16.1.2
|
||||
- 17.1.0
|
||||
gfx90a:
|
||||
- 2.1.16
|
||||
- 2.1.17
|
||||
- 16.1.2
|
||||
- 17.1.0
|
||||
gfx940:
|
||||
- 2.1.17
|
||||
- 2.1.18
|
||||
- 16.1.2
|
||||
- 17.1.0
|
||||
gfx941:
|
||||
- 2.1.17
|
||||
- 2.1.18
|
||||
- 16.1.2
|
||||
- 17.1.0
|
||||
gfx942:
|
||||
- 2.1.17
|
||||
- 2.1.18
|
||||
- 16.1.2
|
||||
- 17.1.0
|
||||
gfx950:
|
||||
- 2.1.18
|
||||
- 2.1.19
|
||||
- 16.1.2
|
||||
- 17.1.0
|
||||
|
||||
- title: Launch Stats
|
||||
set_option: launch_stats
|
||||
description: Placeholder
|
||||
metric:
|
||||
gfx908:
|
||||
- 7.1.0
|
||||
- 7.1.1
|
||||
- 7.1.2
|
||||
- 7.1.5
|
||||
- 7.1.6
|
||||
- 7.1.7
|
||||
- 7.1.8
|
||||
- 7.1.9
|
||||
gfx90a:
|
||||
- 7.1.0
|
||||
- 7.1.1
|
||||
- 7.1.2
|
||||
- 7.1.5
|
||||
- 7.1.6
|
||||
- 7.1.7
|
||||
- 7.1.8
|
||||
- 7.1.9
|
||||
gfx940:
|
||||
- 7.1.0
|
||||
- 7.1.1
|
||||
- 7.1.2
|
||||
- 7.1.5
|
||||
- 7.1.6
|
||||
- 7.1.7
|
||||
- 7.1.8
|
||||
- 7.1.9
|
||||
gfx941:
|
||||
- 7.1.0
|
||||
- 7.1.1
|
||||
- 7.1.2
|
||||
- 7.1.5
|
||||
- 7.1.6
|
||||
- 7.1.7
|
||||
- 7.1.8
|
||||
- 7.1.9
|
||||
gfx942:
|
||||
- 7.1.0
|
||||
- 7.1.1
|
||||
- 7.1.2
|
||||
- 7.1.5
|
||||
- 7.1.6
|
||||
- 7.1.7
|
||||
- 7.1.8
|
||||
- 7.1.9
|
||||
gfx950:
|
||||
- 7.1.0
|
||||
- 7.1.1
|
||||
- 7.1.2
|
||||
- 7.1.5
|
||||
- 7.1.6
|
||||
- 7.1.7
|
||||
- 7.1.8
|
||||
- 7.1.9
|
||||
مرجع در شماره جدید
Block a user