From 85bfa73e2c29104a36b3d7adb37383d9fde21cfd Mon Sep 17 00:00:00 2001 From: xuchen-amd Date: Mon, 28 Apr 2025 11:29:14 -0400 Subject: [PATCH] Add test for gfx942 number of xcds. (#674) * Add test for 9fx942 number of xcds. * Improve the structure of mi gpu specs, add num_xcds_spec_class test. * Add to ctest. --------- Signed-off-by: xuchen-amd --- CMakeLists.txt | 30 +- pyproject.toml | 2 + src/rocprof_compute_analyze/analysis_webui.py | 2 +- src/rocprof_compute_base.py | 5 +- src/rocprof_compute_soc/soc_base.py | 11 +- src/utils/mi_gpu_spec.py | 315 ++++++++++-------- src/utils/specs.py | 26 +- src/utils/utils.py | 4 +- tests/test_gpu_specs.py | 150 +++++++++ 9 files changed, 377 insertions(+), 168 deletions(-) create mode 100644 tests/test_gpu_specs.py diff --git a/CMakeLists.txt b/CMakeLists.txt index 4e7caf003f..a6ebdd30e2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -247,8 +247,9 @@ add_test( add_test( NAME test_profile_section COMMAND - ${Python3_EXECUTABLE} -m pytest -m section --junitxml=tests/test_profile_misc.xml - ${COV_OPTION} ${PROJECT_SOURCE_DIR}/tests/test_profile_general.py + ${Python3_EXECUTABLE} -m pytest -m section + --junitxml=tests/test_profile_section.xml ${COV_OPTION} + ${PROJECT_SOURCE_DIR}/tests/test_profile_general.py WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) set_tests_properties( @@ -292,8 +293,29 @@ add_test( add_test( NAME test_L1_cache_counters COMMAND - ${Python3_EXECUTABLE} -m pytest -m L1_cache --junitxml=tests/test_TCP_counters.xml - ${COV_OPTION} ${PROJECT_SOURCE_DIR}/tests/test_TCP_counters.py + ${Python3_EXECUTABLE} -m pytest -m L1_cache + --junitxml=tests/test_L1_cache_counters.xml ${COV_OPTION} + ${PROJECT_SOURCE_DIR}/tests/test_TCP_counters.py + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) + +# --------------------------- +# Spec tests +# --------------------------- + +add_test( + NAME test_num_xcds_spec_class + COMMAND + ${Python3_EXECUTABLE} -m pytest -m num_xcds_spec_class + --junitxml=tests/test_num_xcds_spec_class.xml ${COV_OPTION} + ${PROJECT_SOURCE_DIR}/tests/test_gpu_specs.py + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) + +add_test( + NAME test_num_xcds_cli_output + COMMAND + ${Python3_EXECUTABLE} -m pytest -m num_xcds_cli_output + --junitxml=tests/test_num_xcds_cli_output.xml ${COV_OPTION} + ${PROJECT_SOURCE_DIR}/tests/test_gpu_specs.py WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) # --------- diff --git a/pyproject.toml b/pyproject.toml index b954c336d5..696427d52b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -71,4 +71,6 @@ markers = [ "kernel_verbose", "serial", "L1_cache", + "num_xcds_spec_class", + "num_xcds_cli_output", ] diff --git a/src/rocprof_compute_analyze/analysis_webui.py b/src/rocprof_compute_analyze/analysis_webui.py index b124b0d16f..2f9f567d31 100644 --- a/src/rocprof_compute_analyze/analysis_webui.py +++ b/src/rocprof_compute_analyze/analysis_webui.py @@ -348,7 +348,7 @@ class webui_analysis(OmniAnalyze_Base): debug=False, host="0.0.0.0", port=random.randint(1024, 49151) ) else: - self.app.run_server(debug=False, host="0.0.0.0", port=args.gui) + self.app.run(debug=False, host="0.0.0.0", port=args.gui) @demarcate diff --git a/src/rocprof_compute_base.py b/src/rocprof_compute_base.py index 1a9556e6b5..deeac145d6 100644 --- a/src/rocprof_compute_base.py +++ b/src/rocprof_compute_base.py @@ -47,7 +47,7 @@ from utils.logger import ( setup_file_handler, setup_logging_priority, ) -from utils.mi_gpu_spec import get_gpu_series_dict, parse_mi_gpu_spec +from utils.mi_gpu_spec import mi_gpu_specs from utils.specs import MachineSpecs, generate_machine_specs from utils.utils import ( detect_rocprof, @@ -72,8 +72,7 @@ class RocProfCompute: "ver_pretty": None, } self.__options = {} - parse_mi_gpu_spec() - self.__supported_archs = get_gpu_series_dict() + self.__supported_archs = mi_gpu_specs.get_gpu_series_dict() self.__mspec: MachineSpecs = None # to be initalized in load_soc_specs() setup_console_handler() self.set_version() diff --git a/src/rocprof_compute_soc/soc_base.py b/src/rocprof_compute_soc/soc_base.py index 12fe7e03cd..594957fc99 100644 --- a/src/rocprof_compute_soc/soc_base.py +++ b/src/rocprof_compute_soc/soc_base.py @@ -42,7 +42,7 @@ from utils.logger import ( console_warning, demarcate, ) -from utils.mi_gpu_spec import get_gpu_model, get_gpu_series, get_num_xcds +from utils.mi_gpu_spec import mi_gpu_specs from utils.parser import build_in_vars, supported_denom from utils.utils import ( capture_subprocess_output, @@ -104,7 +104,6 @@ class OmniSoC_Base: def get_compatible_profilers(self): return self.__compatible_profilers - @demarcate def populate_mspec(self): from utils.specs import run, search, total_sqc @@ -181,13 +180,15 @@ class OmniSoC_Base: self._mspec.cur_sclk = self._mspec.max_sclk self._mspec.cur_mclk = self._mspec.max_mclk - self._mspec.gpu_series = get_gpu_series(self._mspec.gpu_arch) + self._mspec.gpu_series = mi_gpu_specs.get_gpu_series(self._mspec.gpu_arch) # specify gpu model name for gfx942 hardware - self._mspec.gpu_model = get_gpu_model( + self._mspec.gpu_model = mi_gpu_specs.get_gpu_model( self._mspec.gpu_arch, self._mspec.gpu_chip_id ) self._mspec.num_xcd = str( - get_num_xcds(self._mspec.gpu_model, self._mspec.compute_partition) + mi_gpu_specs.get_num_xcds( + self._mspec.gpu_model, self._mspec.compute_partition + ) ) @demarcate diff --git a/src/utils/mi_gpu_spec.py b/src/utils/mi_gpu_spec.py index eaee6e7bb2..a75a2a7802 100644 --- a/src/utils/mi_gpu_spec.py +++ b/src/utils/mi_gpu_spec.py @@ -1,4 +1,5 @@ import os +from dataclasses import dataclass, field from typing import Any, Dict import yaml @@ -21,175 +22,205 @@ MI_CONSTANS = { MI350: "mi350", } -gpu_series_dict = {} # key: gpu arch -gpu_model_dict = {} # key: gpu_arch -num_xcds_dict = {} # key: gpu model -chip_id_dict = {} # key: chip id (int) - # ---------------------------- -# YAML Parsing and Data Handling +# Data Class handling to preserve the hierarchical gpu information # ---------------------------- -def load_yaml(file_path: str) -> Dict[str, Any]: - """ - Loads MI GPU YAML data /util into a Python dictionary. +@dataclass +class MIGPUSpecs: + _instance = None - Args: - file_path (str): The path to the YAML file. + _gpu_series_dict = {} # key: gpu arch + _gpu_model_dict = {} # key: gpu_arch + _num_xcds_dict = {} # key: gpu model + _chip_id_dict = {} # key: chip id (int) - Returns: - Dict[str, Any]: Parsed YAML data as a nested dictionary. - Exit with console error if an error occurs. - """ - console_debug("[load_yaml]") - try: - with open(file_path, "r") as file: - data = yaml.safe_load(file) - return data - except FileNotFoundError: - console_error(f"Error: The file '{file_path}' was not found.") - except yaml.YAMLError as exc: - console_error(f"Error parsing YAML file '{file_path}': {exc}") - except Exception as e: - console_error( - f"An unexpected error occurred while loading YAML file '{file_path}': {e}" - ) + _initialized = False + def __new__(cls): + if cls._instance is None: + cls._instance = super().__new__(cls) + cls._initialize() + return cls._instance -def parse_mi_gpu_spec(): - """ - Parse out mi gpu data from yaml file and store in memory. - MI GPUs - |-- series - |-- architecture (list) - |-- gpu model - |-- chip_ids - |-- partition_mode - """ + @classmethod + def _initialize(cls): + if not cls._initialized: + cls._parse_mi_gpu_spec() + cls._initialized = True - current_dir = os.path.dirname(__file__) - yaml_file_path = os.path.join(current_dir, "mi_gpu_spec.yaml") + # ---------------------------- + # YAML Parsing and Data Handling + # ---------------------------- - # Load the YAML data - yaml_data = load_yaml(yaml_file_path) + @classmethod + def _load_yaml(cls, file_path: str) -> Dict[str, Any]: + """ + Loads MI GPU YAML data /util into a Python dictionary. - for series in yaml_data["mi_gpu_spec"]: - curr_gpu_series = series["gpu_series"] - console_debug("[parse_mi_gpu_spec] Processing series: %s" % curr_gpu_series) - for archs in series["gpu_archs"]: - curr_gpu_arch = archs["gpu_arch"] - gpu_series_dict[curr_gpu_arch] = curr_gpu_series - gpu_model_dict[curr_gpu_arch] = [] - for models in archs["models"]: - curr_gpu_model = models["gpu_model"] - gpu_model_dict[curr_gpu_arch].append(curr_gpu_model) - num_xcds_dict[curr_gpu_model] = ( - models.get("partition_mode", {}) - .get("compute_partition_mode", {}) - .get("num_xcds", {}) - ) - if "chip_ids" in models and "physical" in models["chip_ids"]: - chip_id_dict[models["chip_ids"]["physical"]] = curr_gpu_model - if "chip_ids" in models and "virtual" in models["chip_ids"]: - chip_id_dict[models["chip_ids"]["virtual"]] = curr_gpu_model + Args: + file_path (str): The path to the YAML file. + Returns: + Dict[str, Any]: Parsed YAML data as a nested dictionary. + Exit with console error if an error occurs. + """ + console_debug("[load_yaml]") + try: + with open(file_path, "r") as file: + data = yaml.safe_load(file) + return data + except FileNotFoundError: + console_error(f"Error: The file '{file_path}' was not found.") + except yaml.YAMLError as exc: + console_error(f"Error parsing YAML file '{file_path}': {exc}") + except Exception as e: + console_error( + f"An unexpected error occurred while loading YAML file '{file_path}': {e}" + ) -def get_gpu_series_dict(): - if not gpu_series_dict: - console_error( - "gpu_series_dict not yet populated, did you run parse_mi_gpu_spec()?" - ) - return None - return gpu_series_dict + @classmethod + def _parse_mi_gpu_spec(cls): + """ + Parse out mi gpu data from yaml file and store in memory. + MI GPUs + |-- series + |-- architecture (list) + |-- gpu model + |-- chip_ids + |-- partition_mode + """ + current_dir = os.path.dirname(__file__) + yaml_file_path = os.path.join(current_dir, "mi_gpu_spec.yaml") -def get_gpu_series(gpu_arch_): - if not gpu_series_dict: - console_error( - "gpu_series_dict not yet populated, did you run parse_mi_gpu_spec()?" - ) - return None + # Load the YAML data + yaml_data = cls._load_yaml(yaml_file_path) - # Normalize the key by checking both the raw and lowercase versions - gpu_series = gpu_series_dict.get(gpu_arch_) or gpu_series_dict.get(gpu_arch_.lower()) - if gpu_series: - return gpu_series.upper() + for series in yaml_data["mi_gpu_spec"]: + curr_gpu_series = series["gpu_series"] + console_debug("[parse_mi_gpu_spec] Processing series: %s" % curr_gpu_series) + for archs in series["gpu_archs"]: + curr_gpu_arch = archs["gpu_arch"] + cls._gpu_series_dict[curr_gpu_arch] = curr_gpu_series + cls._gpu_model_dict[curr_gpu_arch] = [] + for models in archs["models"]: + curr_gpu_model = models["gpu_model"] + cls._gpu_model_dict[curr_gpu_arch].append(curr_gpu_model) + cls._num_xcds_dict[curr_gpu_model] = ( + models.get("partition_mode", {}) + .get("compute_partition_mode", {}) + .get("num_xcds", {}) + ) + if "chip_ids" in models and "physical" in models["chip_ids"]: + cls._chip_id_dict[models["chip_ids"]["physical"]] = curr_gpu_model + if "chip_ids" in models and "virtual" in models["chip_ids"]: + cls._chip_id_dict[models["chip_ids"]["virtual"]] = curr_gpu_model - console_warning(f"No matching gpu series found for gpu arch: {gpu_arch_}") - return None + @classmethod + def get_gpu_series_dict(cls): + if not cls._gpu_series_dict: + console_error( + "gpu_series_dict not yet populated, did you run parse_mi_gpu_spec()?" + ) + return None + return cls._gpu_series_dict - -def get_gpu_model(gpu_arch_, chip_id_): - # Check that gpu_model_dict is populated first - if not gpu_model_dict: - console_error( - "gpu_model_dict not yet populated. Did you run parse_mi_gpu_spec()?" - ) - return None - - gpu_arch_lower = gpu_arch_.lower() - - # Handle gfx942 with chip_id mapping - if gpu_arch_lower not in ("gfx906", "gfx908", "gfx90a"): - if chip_id_ and int(chip_id_) in chip_id_dict: - gpu_model = chip_id_dict.get(int(chip_id_)) - else: - console_warning(f"No gpu model found for chip id: {chip_id_}") + @classmethod + def get_gpu_series(cls, gpu_arch_): + if not cls._gpu_series_dict: + console_error( + "gpu_series_dict not yet populated, did you run parse_mi_gpu_spec()?" + ) return None - # Otherwise use gpu_model_dict mapping for other mi architectures - elif gpu_arch_lower in gpu_model_dict: - # NOTE: take the first element works for now - gpu_model = gpu_model_dict[gpu_arch_lower][0] - else: - console_warning(f"No gpu model found for gpu arch: {gpu_arch_lower}") - return None - - if not gpu_model: - console_warning(f"No gpu model found for gpu arch: {gpu_arch_lower}") - return None - - return gpu_model.upper() - - -def get_num_xcds(gpu_model_, compute_partition_): - # Only gpu in and above mi 300 series have more than one XCDs - if gpu_model_.lower() in ("mi50", "mi60", "mi100", "mi210", "mi250", "mi250x"): - return 1 - - if not num_xcds_dict: - console_error( - "mi300_num_xcds_dict not yet populated, did you run parse_mi_gpu_spec()?" + # Normalize the key by checking both the raw and lowercase versions + gpu_series = cls._gpu_series_dict.get(gpu_arch_) or cls._gpu_series_dict.get( + gpu_arch_.lower() ) + if gpu_series: + return gpu_series.upper() + + console_warning(f"No matching gpu series found for gpu arch: {gpu_arch_}") return None - gpu_model_lower = gpu_model_.lower() - partition_lower = compute_partition_.lower() + @classmethod + def get_gpu_model(cls, gpu_arch_, chip_id_): + # Check that gpu_model_dict is populated first + if not cls._gpu_model_dict: + console_error( + "gpu_model_dict not yet populated. Did you run parse_mi_gpu_spec()?" + ) + return None - if gpu_model_lower not in num_xcds_dict: - return None + gpu_arch_lower = gpu_arch_.lower() - model_dict = num_xcds_dict[gpu_model_lower] - if partition_lower not in model_dict: - console_log(f"Unknown compute partition: {compute_partition_}") - return None + # Handle gfx942 with chip_id mapping + if gpu_arch_lower not in ("gfx906", "gfx908", "gfx90a"): + if chip_id_ and int(chip_id_) in cls._chip_id_dict: + gpu_model = cls._chip_id_dict.get(int(chip_id_)) + else: + console_warning(f"No gpu model found for chip id: {chip_id_}") + return None - num_xcds = model_dict[partition_lower] - if not num_xcds: - console_warning( - "Unknown compute partition found for %s / %s", compute_partition_, gpu_model_ - ) - return None + # Otherwise use gpu_model_dict mapping for other mi architectures + elif gpu_arch_lower in cls._gpu_model_dict: + # NOTE: take the first element works for now + gpu_model = cls._gpu_model_dict[gpu_arch_lower][0] + else: + console_warning(f"No gpu model found for gpu arch: {gpu_arch_lower}") + return None - return num_xcds + if not gpu_model: + console_warning(f"No gpu model found for gpu arch: {gpu_arch_lower}") + return None + + return gpu_model.upper() + + @classmethod + def get_num_xcds(cls, gpu_model_, compute_partition_): + # Only gpu in and above mi 300 series have more than one XCDs + if gpu_model_.lower() in ("mi50", "mi60", "mi100", "mi210", "mi250", "mi250x"): + return 1 + + if not cls._num_xcds_dict: + console_error( + "mi300_num_xcds_dict not yet populated, did you run parse_mi_gpu_spec()?" + ) + return None + + gpu_model_lower = gpu_model_.lower() + partition_lower = compute_partition_.lower() + + if gpu_model_lower not in cls._num_xcds_dict: + return None + + model_dict = cls._num_xcds_dict[gpu_model_lower] + if partition_lower not in model_dict: + console_log(f"Unknown compute partition: {compute_partition_}") + return None + + num_xcds = model_dict[partition_lower] + if not num_xcds: + console_warning( + "Unknown compute partition found for %s / %s", + compute_partition_, + gpu_model_, + ) + return None + + return num_xcds + + @classmethod + def get_chip_id_dict(cls): + if cls._chip_id_dict: + return cls._chip_id_dict + else: + console_error() -def get_chip_id_dict(): - if chip_id_dict: - return chip_id_dict - else: - console_error( - "mi300_chip_id_dict not yet populated, did you run parse_mi_gpu_spec()?" - ) +# pre-initialize the instance when module loads + +mi_gpu_specs = MIGPUSpecs() diff --git a/src/utils/specs.py b/src/utils/specs.py index 2b9cb2b1dd..7bb65b32dc 100644 --- a/src/utils/specs.py +++ b/src/utils/specs.py @@ -38,8 +38,14 @@ from pathlib import Path as path import pandas as pd import config -from utils.logger import console_debug, console_error, console_log, console_warning -from utils.mi_gpu_spec import get_chip_id_dict, get_gpu_series_dict, get_num_xcds +from utils.logger import ( + console_debug, + console_error, + console_log, + console_warning, + demarcate, +) +from utils.mi_gpu_spec import mi_gpu_specs from utils.tty import get_table_string from utils.utils import get_version @@ -59,12 +65,12 @@ def detect_arch(_rocminfo): for idx1, linetext in enumerate(_rocminfo): # NOTE: currently supported socs are gfx archs only gpu_arch = search(r"^\s*Name\s*:\s* ([Gg][Ff][Xx][a-zA-Z0-9]+).*\s*$", linetext) - if gpu_arch in get_gpu_series_dict().keys(): + if gpu_arch in mi_gpu_specs.get_gpu_series_dict().keys(): break - if str(gpu_arch) in get_gpu_series_dict().keys(): + if str(gpu_arch) in mi_gpu_specs.get_gpu_series_dict().keys(): gpu_arch = str(gpu_arch) break - if not gpu_arch in get_gpu_series_dict().keys(): + if not gpu_arch in mi_gpu_specs.get_gpu_series_dict().keys(): console_error("Cannot find a supported arch in rocminfo: " + str(gpu_arch)) else: return (gpu_arch, idx1) @@ -83,8 +89,8 @@ def detect_gpu_chip_id(_rocminfo): if not gpu_chip_id: console_warning("No Chip ID detected: " + str(gpu_chip_id)) elif ( - gpu_chip_id not in get_chip_id_dict().keys() - and int(gpu_chip_id) not in get_chip_id_dict().keys() + gpu_chip_id not in mi_gpu_specs.get_chip_id_dict().keys() + and int(gpu_chip_id) not in mi_gpu_specs.get_chip_id_dict().keys() ): console_warning("Unknown Chip ID detected: " + str(gpu_chip_id)) return gpu_chip_id @@ -534,9 +540,7 @@ class MachineSpecs: def get_hbm_channels(self): if self.memory_partition.lower().startswith("nps"): hbmchannels = 128 - if self.memory_partition.lower() == "nps2": - hbmchannels /= 2 - elif self.memory_partition.lower() == "nps4": + if self.memory_partition.lower() == "nps4": hbmchannels /= 4 elif self.memory_partition.lower() == "nps8": hbmchannels /= 8 @@ -670,7 +674,7 @@ def total_sqc(archname, numCUs, numSEs): def total_l2_banks(archname, L2Banks, compute_partition): - xcds = get_num_xcds(archname, compute_partition) + xcds = mi_gpu_specs.get_num_xcds(archname, compute_partition) totalL2Banks = L2Banks * xcds return totalL2Banks diff --git a/src/utils/utils.py b/src/utils/utils.py index dcab0bf152..fa0629fc63 100644 --- a/src/utils/utils.py +++ b/src/utils/utils.py @@ -49,7 +49,7 @@ from utils.logger import ( console_warning, demarcate, ) -from utils.mi_gpu_spec import get_num_xcds +from utils.mi_gpu_spec import mi_gpu_specs rocprof_cmd = "" rocprof_args = "" @@ -675,7 +675,7 @@ def run_prof( if new_env and not using_v3() and not using_v1(): # flatten tcc for applicable mi300 input f = path(workload_dir + "/out/pmc_1/results_" + fbase + ".csv") - xcds = get_num_xcds(mspec.gpu_model, mspec.compute_partition) + xcds = mi_gpu_specs.get_num_xcds(mspec.gpu_model, mspec.compute_partition) df = flatten_tcc_info_across_xcds(f, xcds, int(mspec._l2_banks)) df.to_csv(f, index=False) diff --git a/tests/test_gpu_specs.py b/tests/test_gpu_specs.py new file mode 100644 index 0000000000..eb7109f1a1 --- /dev/null +++ b/tests/test_gpu_specs.py @@ -0,0 +1,150 @@ +import re +import subprocess +import sys +from importlib.machinery import SourceFileLoader +from unittest.mock import patch + +import pandas as pd +import pytest + +from src.utils.specs import generate_machine_specs + +rocprof_compute = SourceFileLoader("rocprof-compute", "src/rocprof-compute").load_module() + + +# NOTE: Only testing gfx942 for now. +GFX942_CHIP_IDS = ["29856", "29876", "29857", "29877", "29858", "29878", "29861", "29881"] + +# Expected result +GFX942_NUM_XCDS = {"spx": 8, "dpx": 4, "qpx": 2, "cpx": 1} + + +# helper to strip ANSI color codes if your app uses them +ANSI_ESCAPE = re.compile(r"\x1B[@-_][0-?]*[ -/]*[@-~]") + + +def strip_ansi(s: str) -> str: + return ANSI_ESCAPE.sub("", s) + + +def parse_table_dict(output: str) -> dict: + """ + Parse an ASCII table into a dict mapping Spec -> Value. + """ + lines = [l for l in output.splitlines() if l.startswith("│")] + # locate header row (the one containing 'Spec' and 'Value') + header_idx = next( + (i for i, ln in enumerate(lines) if "Spec" in ln and "Value" in ln), None + ) + if header_idx is None: + raise ValueError("Header row with Spec and Value not found") + + header_cells = [c.strip() for c in lines[header_idx].strip("│").split("│")] + + spec_i = header_cells.index("Spec") + value_i = header_cells.index("Value") + + result = {} + for ln in lines[header_idx + 2 :]: + if ln.startswith("├") or ln.startswith("╘"): + continue + cells = [c.strip() for c in ln.strip("│").split("│")] + if len(cells) <= max(spec_i, value_i): + continue + spec = cells[spec_i] + value = cells[value_i] + if spec: + result[spec] = value + return result + + +def run(cmd): + p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + if cmd[0] == "amd-smi" and p.returncode == 8: + print("ERROR: No GPU detected. Unable to load amd-smi") + assert 0 + return p.stdout.decode("utf-8") + + +def gpu_arch(): + gpu_arch = None + + ## 1) Parse arch details from rocminfo + rocminfo = str( + # decode with utf-8 to account for rocm-smi changes in latest rocm + subprocess.run( + ["rocminfo"], stdout=subprocess.PIPE, stderr=subprocess.PIPE + ).stdout.decode("utf-8") + ) + rocminfo = rocminfo.split("\n") + soc_regex = re.compile(r"^\s*Name\s*:\s+ ([a-zA-Z0-9]+)\s*$", re.MULTILINE) + devices = list(filter(soc_regex.match, rocminfo)) + gpu_arch = devices[0].split()[1] + + if not gpu_arch: + ## 2) Try parse arch details from chip id. + chip_id = re.compile(r"^\s*Chip ID:\s+ ([a-zA-Z0-9]+)\s*", re.MULTILINE) + ids = list(filter(chip_id.match, rocminfo)) + for id in ids: + chip_id = re.match(r"^[^()]+", id.split()[2]).group(0) + + if chip_id in GFX942_CHIP_IDS: + gpu_arch = "gfx942" + + return gpu_arch + + +@pytest.mark.num_xcds_spec_class +def test_num_xcds_spec_class(monkeypatch): + arch = gpu_arch() + + # 1. Check if gfx942 soc + if not arch or "gfx942" not in arch.lower(): + pytest.skip("Skipping num xcds test for non-gfx942 socs.") + + # 2. load machine specs + machine_spec = generate_machine_specs(None) + + # 3. check results are expected + assert machine_spec.compute_partition is not None + assert int(machine_spec.num_xcd) == GFX942_NUM_XCDS.get( + machine_spec.compute_partition.lower(), -1 + ) + + +@pytest.mark.num_xcds_cli_output +def test_num_xcds_cli_output(): + arch = gpu_arch() + + # 1. Check if gfx942 soc + if not arch or "gfx942" not in arch.lower(): + pytest.skip("Skipping num xcds test for non-gfx942 socs.") + + # 2. Run rocprof-compute -s and grab rocprof-compute num_xcd + proc = subprocess.run( + ["rocprof-compute", "-s"], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + assert ( + proc.returncode == 0 + ), f"Non-zero exit ({proc.returncode}), stderr:\n{proc.stderr}" + + # 3. strip ANSI, parse table + clean = strip_ansi(proc.stdout) + return_dict = parse_table_dict(clean) + + # 4. check results are expected + assert ( + "Compute Partition" in return_dict + ), "Spec 'Compute Partition' not found in table" + assert "Num XCDs" in return_dict, "Spec 'Num XCDs' not found in table" + + compute_partition_actual = return_dict["Compute Partition"] + num_xcd_actual = return_dict["Num XCDs"] + + assert compute_partition_actual is not None + assert int(num_xcd_actual) == GFX942_NUM_XCDS.get( + compute_partition_actual.lower(), -1 + )