Add test for gfx942 number of xcds. (#674)
* Add test for 9fx942 number of xcds. * Improve the structure of mi gpu specs, add num_xcds_spec_class test. * Add to ctest. --------- Signed-off-by: xuchen-amd <xuchen@amd.com>
Dieser Commit ist enthalten in:
+26
-4
@@ -247,8 +247,9 @@ add_test(
|
||||
add_test(
|
||||
NAME test_profile_section
|
||||
COMMAND
|
||||
${Python3_EXECUTABLE} -m pytest -m section --junitxml=tests/test_profile_misc.xml
|
||||
${COV_OPTION} ${PROJECT_SOURCE_DIR}/tests/test_profile_general.py
|
||||
${Python3_EXECUTABLE} -m pytest -m section
|
||||
--junitxml=tests/test_profile_section.xml ${COV_OPTION}
|
||||
${PROJECT_SOURCE_DIR}/tests/test_profile_general.py
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
|
||||
|
||||
set_tests_properties(
|
||||
@@ -292,8 +293,29 @@ add_test(
|
||||
add_test(
|
||||
NAME test_L1_cache_counters
|
||||
COMMAND
|
||||
${Python3_EXECUTABLE} -m pytest -m L1_cache --junitxml=tests/test_TCP_counters.xml
|
||||
${COV_OPTION} ${PROJECT_SOURCE_DIR}/tests/test_TCP_counters.py
|
||||
${Python3_EXECUTABLE} -m pytest -m L1_cache
|
||||
--junitxml=tests/test_L1_cache_counters.xml ${COV_OPTION}
|
||||
${PROJECT_SOURCE_DIR}/tests/test_TCP_counters.py
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
|
||||
|
||||
# ---------------------------
|
||||
# Spec tests
|
||||
# ---------------------------
|
||||
|
||||
add_test(
|
||||
NAME test_num_xcds_spec_class
|
||||
COMMAND
|
||||
${Python3_EXECUTABLE} -m pytest -m num_xcds_spec_class
|
||||
--junitxml=tests/test_num_xcds_spec_class.xml ${COV_OPTION}
|
||||
${PROJECT_SOURCE_DIR}/tests/test_gpu_specs.py
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
|
||||
|
||||
add_test(
|
||||
NAME test_num_xcds_cli_output
|
||||
COMMAND
|
||||
${Python3_EXECUTABLE} -m pytest -m num_xcds_cli_output
|
||||
--junitxml=tests/test_num_xcds_cli_output.xml ${COV_OPTION}
|
||||
${PROJECT_SOURCE_DIR}/tests/test_gpu_specs.py
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
|
||||
|
||||
# ---------
|
||||
|
||||
@@ -71,4 +71,6 @@ markers = [
|
||||
"kernel_verbose",
|
||||
"serial",
|
||||
"L1_cache",
|
||||
"num_xcds_spec_class",
|
||||
"num_xcds_cli_output",
|
||||
]
|
||||
|
||||
@@ -348,7 +348,7 @@ class webui_analysis(OmniAnalyze_Base):
|
||||
debug=False, host="0.0.0.0", port=random.randint(1024, 49151)
|
||||
)
|
||||
else:
|
||||
self.app.run_server(debug=False, host="0.0.0.0", port=args.gui)
|
||||
self.app.run(debug=False, host="0.0.0.0", port=args.gui)
|
||||
|
||||
|
||||
@demarcate
|
||||
|
||||
@@ -47,7 +47,7 @@ from utils.logger import (
|
||||
setup_file_handler,
|
||||
setup_logging_priority,
|
||||
)
|
||||
from utils.mi_gpu_spec import get_gpu_series_dict, parse_mi_gpu_spec
|
||||
from utils.mi_gpu_spec import mi_gpu_specs
|
||||
from utils.specs import MachineSpecs, generate_machine_specs
|
||||
from utils.utils import (
|
||||
detect_rocprof,
|
||||
@@ -72,8 +72,7 @@ class RocProfCompute:
|
||||
"ver_pretty": None,
|
||||
}
|
||||
self.__options = {}
|
||||
parse_mi_gpu_spec()
|
||||
self.__supported_archs = get_gpu_series_dict()
|
||||
self.__supported_archs = mi_gpu_specs.get_gpu_series_dict()
|
||||
self.__mspec: MachineSpecs = None # to be initalized in load_soc_specs()
|
||||
setup_console_handler()
|
||||
self.set_version()
|
||||
|
||||
@@ -42,7 +42,7 @@ from utils.logger import (
|
||||
console_warning,
|
||||
demarcate,
|
||||
)
|
||||
from utils.mi_gpu_spec import get_gpu_model, get_gpu_series, get_num_xcds
|
||||
from utils.mi_gpu_spec import mi_gpu_specs
|
||||
from utils.parser import build_in_vars, supported_denom
|
||||
from utils.utils import (
|
||||
capture_subprocess_output,
|
||||
@@ -104,7 +104,6 @@ class OmniSoC_Base:
|
||||
def get_compatible_profilers(self):
|
||||
return self.__compatible_profilers
|
||||
|
||||
@demarcate
|
||||
def populate_mspec(self):
|
||||
from utils.specs import run, search, total_sqc
|
||||
|
||||
@@ -181,13 +180,15 @@ class OmniSoC_Base:
|
||||
self._mspec.cur_sclk = self._mspec.max_sclk
|
||||
self._mspec.cur_mclk = self._mspec.max_mclk
|
||||
|
||||
self._mspec.gpu_series = get_gpu_series(self._mspec.gpu_arch)
|
||||
self._mspec.gpu_series = mi_gpu_specs.get_gpu_series(self._mspec.gpu_arch)
|
||||
# specify gpu model name for gfx942 hardware
|
||||
self._mspec.gpu_model = get_gpu_model(
|
||||
self._mspec.gpu_model = mi_gpu_specs.get_gpu_model(
|
||||
self._mspec.gpu_arch, self._mspec.gpu_chip_id
|
||||
)
|
||||
self._mspec.num_xcd = str(
|
||||
get_num_xcds(self._mspec.gpu_model, self._mspec.compute_partition)
|
||||
mi_gpu_specs.get_num_xcds(
|
||||
self._mspec.gpu_model, self._mspec.compute_partition
|
||||
)
|
||||
)
|
||||
|
||||
@demarcate
|
||||
|
||||
+173
-142
@@ -1,4 +1,5 @@
|
||||
import os
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict
|
||||
|
||||
import yaml
|
||||
@@ -21,175 +22,205 @@ MI_CONSTANS = {
|
||||
MI350: "mi350",
|
||||
}
|
||||
|
||||
gpu_series_dict = {} # key: gpu arch
|
||||
gpu_model_dict = {} # key: gpu_arch
|
||||
num_xcds_dict = {} # key: gpu model
|
||||
chip_id_dict = {} # key: chip id (int)
|
||||
|
||||
|
||||
# ----------------------------
|
||||
# YAML Parsing and Data Handling
|
||||
# Data Class handling to preserve the hierarchical gpu information
|
||||
# ----------------------------
|
||||
|
||||
|
||||
def load_yaml(file_path: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Loads MI GPU YAML data /util into a Python dictionary.
|
||||
@dataclass
|
||||
class MIGPUSpecs:
|
||||
_instance = None
|
||||
|
||||
Args:
|
||||
file_path (str): The path to the YAML file.
|
||||
_gpu_series_dict = {} # key: gpu arch
|
||||
_gpu_model_dict = {} # key: gpu_arch
|
||||
_num_xcds_dict = {} # key: gpu model
|
||||
_chip_id_dict = {} # key: chip id (int)
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: Parsed YAML data as a nested dictionary.
|
||||
Exit with console error if an error occurs.
|
||||
"""
|
||||
console_debug("[load_yaml]")
|
||||
try:
|
||||
with open(file_path, "r") as file:
|
||||
data = yaml.safe_load(file)
|
||||
return data
|
||||
except FileNotFoundError:
|
||||
console_error(f"Error: The file '{file_path}' was not found.")
|
||||
except yaml.YAMLError as exc:
|
||||
console_error(f"Error parsing YAML file '{file_path}': {exc}")
|
||||
except Exception as e:
|
||||
console_error(
|
||||
f"An unexpected error occurred while loading YAML file '{file_path}': {e}"
|
||||
)
|
||||
_initialized = False
|
||||
|
||||
def __new__(cls):
|
||||
if cls._instance is None:
|
||||
cls._instance = super().__new__(cls)
|
||||
cls._initialize()
|
||||
return cls._instance
|
||||
|
||||
def parse_mi_gpu_spec():
|
||||
"""
|
||||
Parse out mi gpu data from yaml file and store in memory.
|
||||
MI GPUs
|
||||
|-- series
|
||||
|-- architecture (list)
|
||||
|-- gpu model
|
||||
|-- chip_ids
|
||||
|-- partition_mode
|
||||
"""
|
||||
@classmethod
|
||||
def _initialize(cls):
|
||||
if not cls._initialized:
|
||||
cls._parse_mi_gpu_spec()
|
||||
cls._initialized = True
|
||||
|
||||
current_dir = os.path.dirname(__file__)
|
||||
yaml_file_path = os.path.join(current_dir, "mi_gpu_spec.yaml")
|
||||
# ----------------------------
|
||||
# YAML Parsing and Data Handling
|
||||
# ----------------------------
|
||||
|
||||
# Load the YAML data
|
||||
yaml_data = load_yaml(yaml_file_path)
|
||||
@classmethod
|
||||
def _load_yaml(cls, file_path: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Loads MI GPU YAML data /util into a Python dictionary.
|
||||
|
||||
for series in yaml_data["mi_gpu_spec"]:
|
||||
curr_gpu_series = series["gpu_series"]
|
||||
console_debug("[parse_mi_gpu_spec] Processing series: %s" % curr_gpu_series)
|
||||
for archs in series["gpu_archs"]:
|
||||
curr_gpu_arch = archs["gpu_arch"]
|
||||
gpu_series_dict[curr_gpu_arch] = curr_gpu_series
|
||||
gpu_model_dict[curr_gpu_arch] = []
|
||||
for models in archs["models"]:
|
||||
curr_gpu_model = models["gpu_model"]
|
||||
gpu_model_dict[curr_gpu_arch].append(curr_gpu_model)
|
||||
num_xcds_dict[curr_gpu_model] = (
|
||||
models.get("partition_mode", {})
|
||||
.get("compute_partition_mode", {})
|
||||
.get("num_xcds", {})
|
||||
)
|
||||
if "chip_ids" in models and "physical" in models["chip_ids"]:
|
||||
chip_id_dict[models["chip_ids"]["physical"]] = curr_gpu_model
|
||||
if "chip_ids" in models and "virtual" in models["chip_ids"]:
|
||||
chip_id_dict[models["chip_ids"]["virtual"]] = curr_gpu_model
|
||||
Args:
|
||||
file_path (str): The path to the YAML file.
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: Parsed YAML data as a nested dictionary.
|
||||
Exit with console error if an error occurs.
|
||||
"""
|
||||
console_debug("[load_yaml]")
|
||||
try:
|
||||
with open(file_path, "r") as file:
|
||||
data = yaml.safe_load(file)
|
||||
return data
|
||||
except FileNotFoundError:
|
||||
console_error(f"Error: The file '{file_path}' was not found.")
|
||||
except yaml.YAMLError as exc:
|
||||
console_error(f"Error parsing YAML file '{file_path}': {exc}")
|
||||
except Exception as e:
|
||||
console_error(
|
||||
f"An unexpected error occurred while loading YAML file '{file_path}': {e}"
|
||||
)
|
||||
|
||||
def get_gpu_series_dict():
|
||||
if not gpu_series_dict:
|
||||
console_error(
|
||||
"gpu_series_dict not yet populated, did you run parse_mi_gpu_spec()?"
|
||||
)
|
||||
return None
|
||||
return gpu_series_dict
|
||||
@classmethod
|
||||
def _parse_mi_gpu_spec(cls):
|
||||
"""
|
||||
Parse out mi gpu data from yaml file and store in memory.
|
||||
MI GPUs
|
||||
|-- series
|
||||
|-- architecture (list)
|
||||
|-- gpu model
|
||||
|-- chip_ids
|
||||
|-- partition_mode
|
||||
"""
|
||||
|
||||
current_dir = os.path.dirname(__file__)
|
||||
yaml_file_path = os.path.join(current_dir, "mi_gpu_spec.yaml")
|
||||
|
||||
def get_gpu_series(gpu_arch_):
|
||||
if not gpu_series_dict:
|
||||
console_error(
|
||||
"gpu_series_dict not yet populated, did you run parse_mi_gpu_spec()?"
|
||||
)
|
||||
return None
|
||||
# Load the YAML data
|
||||
yaml_data = cls._load_yaml(yaml_file_path)
|
||||
|
||||
# Normalize the key by checking both the raw and lowercase versions
|
||||
gpu_series = gpu_series_dict.get(gpu_arch_) or gpu_series_dict.get(gpu_arch_.lower())
|
||||
if gpu_series:
|
||||
return gpu_series.upper()
|
||||
for series in yaml_data["mi_gpu_spec"]:
|
||||
curr_gpu_series = series["gpu_series"]
|
||||
console_debug("[parse_mi_gpu_spec] Processing series: %s" % curr_gpu_series)
|
||||
for archs in series["gpu_archs"]:
|
||||
curr_gpu_arch = archs["gpu_arch"]
|
||||
cls._gpu_series_dict[curr_gpu_arch] = curr_gpu_series
|
||||
cls._gpu_model_dict[curr_gpu_arch] = []
|
||||
for models in archs["models"]:
|
||||
curr_gpu_model = models["gpu_model"]
|
||||
cls._gpu_model_dict[curr_gpu_arch].append(curr_gpu_model)
|
||||
cls._num_xcds_dict[curr_gpu_model] = (
|
||||
models.get("partition_mode", {})
|
||||
.get("compute_partition_mode", {})
|
||||
.get("num_xcds", {})
|
||||
)
|
||||
if "chip_ids" in models and "physical" in models["chip_ids"]:
|
||||
cls._chip_id_dict[models["chip_ids"]["physical"]] = curr_gpu_model
|
||||
if "chip_ids" in models and "virtual" in models["chip_ids"]:
|
||||
cls._chip_id_dict[models["chip_ids"]["virtual"]] = curr_gpu_model
|
||||
|
||||
console_warning(f"No matching gpu series found for gpu arch: {gpu_arch_}")
|
||||
return None
|
||||
@classmethod
|
||||
def get_gpu_series_dict(cls):
|
||||
if not cls._gpu_series_dict:
|
||||
console_error(
|
||||
"gpu_series_dict not yet populated, did you run parse_mi_gpu_spec()?"
|
||||
)
|
||||
return None
|
||||
return cls._gpu_series_dict
|
||||
|
||||
|
||||
def get_gpu_model(gpu_arch_, chip_id_):
|
||||
# Check that gpu_model_dict is populated first
|
||||
if not gpu_model_dict:
|
||||
console_error(
|
||||
"gpu_model_dict not yet populated. Did you run parse_mi_gpu_spec()?"
|
||||
)
|
||||
return None
|
||||
|
||||
gpu_arch_lower = gpu_arch_.lower()
|
||||
|
||||
# Handle gfx942 with chip_id mapping
|
||||
if gpu_arch_lower not in ("gfx906", "gfx908", "gfx90a"):
|
||||
if chip_id_ and int(chip_id_) in chip_id_dict:
|
||||
gpu_model = chip_id_dict.get(int(chip_id_))
|
||||
else:
|
||||
console_warning(f"No gpu model found for chip id: {chip_id_}")
|
||||
@classmethod
|
||||
def get_gpu_series(cls, gpu_arch_):
|
||||
if not cls._gpu_series_dict:
|
||||
console_error(
|
||||
"gpu_series_dict not yet populated, did you run parse_mi_gpu_spec()?"
|
||||
)
|
||||
return None
|
||||
|
||||
# Otherwise use gpu_model_dict mapping for other mi architectures
|
||||
elif gpu_arch_lower in gpu_model_dict:
|
||||
# NOTE: take the first element works for now
|
||||
gpu_model = gpu_model_dict[gpu_arch_lower][0]
|
||||
else:
|
||||
console_warning(f"No gpu model found for gpu arch: {gpu_arch_lower}")
|
||||
return None
|
||||
|
||||
if not gpu_model:
|
||||
console_warning(f"No gpu model found for gpu arch: {gpu_arch_lower}")
|
||||
return None
|
||||
|
||||
return gpu_model.upper()
|
||||
|
||||
|
||||
def get_num_xcds(gpu_model_, compute_partition_):
|
||||
# Only gpu in and above mi 300 series have more than one XCDs
|
||||
if gpu_model_.lower() in ("mi50", "mi60", "mi100", "mi210", "mi250", "mi250x"):
|
||||
return 1
|
||||
|
||||
if not num_xcds_dict:
|
||||
console_error(
|
||||
"mi300_num_xcds_dict not yet populated, did you run parse_mi_gpu_spec()?"
|
||||
# Normalize the key by checking both the raw and lowercase versions
|
||||
gpu_series = cls._gpu_series_dict.get(gpu_arch_) or cls._gpu_series_dict.get(
|
||||
gpu_arch_.lower()
|
||||
)
|
||||
if gpu_series:
|
||||
return gpu_series.upper()
|
||||
|
||||
console_warning(f"No matching gpu series found for gpu arch: {gpu_arch_}")
|
||||
return None
|
||||
|
||||
gpu_model_lower = gpu_model_.lower()
|
||||
partition_lower = compute_partition_.lower()
|
||||
@classmethod
|
||||
def get_gpu_model(cls, gpu_arch_, chip_id_):
|
||||
# Check that gpu_model_dict is populated first
|
||||
if not cls._gpu_model_dict:
|
||||
console_error(
|
||||
"gpu_model_dict not yet populated. Did you run parse_mi_gpu_spec()?"
|
||||
)
|
||||
return None
|
||||
|
||||
if gpu_model_lower not in num_xcds_dict:
|
||||
return None
|
||||
gpu_arch_lower = gpu_arch_.lower()
|
||||
|
||||
model_dict = num_xcds_dict[gpu_model_lower]
|
||||
if partition_lower not in model_dict:
|
||||
console_log(f"Unknown compute partition: {compute_partition_}")
|
||||
return None
|
||||
# Handle gfx942 with chip_id mapping
|
||||
if gpu_arch_lower not in ("gfx906", "gfx908", "gfx90a"):
|
||||
if chip_id_ and int(chip_id_) in cls._chip_id_dict:
|
||||
gpu_model = cls._chip_id_dict.get(int(chip_id_))
|
||||
else:
|
||||
console_warning(f"No gpu model found for chip id: {chip_id_}")
|
||||
return None
|
||||
|
||||
num_xcds = model_dict[partition_lower]
|
||||
if not num_xcds:
|
||||
console_warning(
|
||||
"Unknown compute partition found for %s / %s", compute_partition_, gpu_model_
|
||||
)
|
||||
return None
|
||||
# Otherwise use gpu_model_dict mapping for other mi architectures
|
||||
elif gpu_arch_lower in cls._gpu_model_dict:
|
||||
# NOTE: take the first element works for now
|
||||
gpu_model = cls._gpu_model_dict[gpu_arch_lower][0]
|
||||
else:
|
||||
console_warning(f"No gpu model found for gpu arch: {gpu_arch_lower}")
|
||||
return None
|
||||
|
||||
return num_xcds
|
||||
if not gpu_model:
|
||||
console_warning(f"No gpu model found for gpu arch: {gpu_arch_lower}")
|
||||
return None
|
||||
|
||||
return gpu_model.upper()
|
||||
|
||||
@classmethod
|
||||
def get_num_xcds(cls, gpu_model_, compute_partition_):
|
||||
# Only gpu in and above mi 300 series have more than one XCDs
|
||||
if gpu_model_.lower() in ("mi50", "mi60", "mi100", "mi210", "mi250", "mi250x"):
|
||||
return 1
|
||||
|
||||
if not cls._num_xcds_dict:
|
||||
console_error(
|
||||
"mi300_num_xcds_dict not yet populated, did you run parse_mi_gpu_spec()?"
|
||||
)
|
||||
return None
|
||||
|
||||
gpu_model_lower = gpu_model_.lower()
|
||||
partition_lower = compute_partition_.lower()
|
||||
|
||||
if gpu_model_lower not in cls._num_xcds_dict:
|
||||
return None
|
||||
|
||||
model_dict = cls._num_xcds_dict[gpu_model_lower]
|
||||
if partition_lower not in model_dict:
|
||||
console_log(f"Unknown compute partition: {compute_partition_}")
|
||||
return None
|
||||
|
||||
num_xcds = model_dict[partition_lower]
|
||||
if not num_xcds:
|
||||
console_warning(
|
||||
"Unknown compute partition found for %s / %s",
|
||||
compute_partition_,
|
||||
gpu_model_,
|
||||
)
|
||||
return None
|
||||
|
||||
return num_xcds
|
||||
|
||||
@classmethod
|
||||
def get_chip_id_dict(cls):
|
||||
if cls._chip_id_dict:
|
||||
return cls._chip_id_dict
|
||||
else:
|
||||
console_error()
|
||||
|
||||
|
||||
def get_chip_id_dict():
|
||||
if chip_id_dict:
|
||||
return chip_id_dict
|
||||
else:
|
||||
console_error(
|
||||
"mi300_chip_id_dict not yet populated, did you run parse_mi_gpu_spec()?"
|
||||
)
|
||||
# pre-initialize the instance when module loads
|
||||
|
||||
mi_gpu_specs = MIGPUSpecs()
|
||||
|
||||
+15
-11
@@ -38,8 +38,14 @@ from pathlib import Path as path
|
||||
import pandas as pd
|
||||
|
||||
import config
|
||||
from utils.logger import console_debug, console_error, console_log, console_warning
|
||||
from utils.mi_gpu_spec import get_chip_id_dict, get_gpu_series_dict, get_num_xcds
|
||||
from utils.logger import (
|
||||
console_debug,
|
||||
console_error,
|
||||
console_log,
|
||||
console_warning,
|
||||
demarcate,
|
||||
)
|
||||
from utils.mi_gpu_spec import mi_gpu_specs
|
||||
from utils.tty import get_table_string
|
||||
from utils.utils import get_version
|
||||
|
||||
@@ -59,12 +65,12 @@ def detect_arch(_rocminfo):
|
||||
for idx1, linetext in enumerate(_rocminfo):
|
||||
# NOTE: currently supported socs are gfx archs only
|
||||
gpu_arch = search(r"^\s*Name\s*:\s* ([Gg][Ff][Xx][a-zA-Z0-9]+).*\s*$", linetext)
|
||||
if gpu_arch in get_gpu_series_dict().keys():
|
||||
if gpu_arch in mi_gpu_specs.get_gpu_series_dict().keys():
|
||||
break
|
||||
if str(gpu_arch) in get_gpu_series_dict().keys():
|
||||
if str(gpu_arch) in mi_gpu_specs.get_gpu_series_dict().keys():
|
||||
gpu_arch = str(gpu_arch)
|
||||
break
|
||||
if not gpu_arch in get_gpu_series_dict().keys():
|
||||
if not gpu_arch in mi_gpu_specs.get_gpu_series_dict().keys():
|
||||
console_error("Cannot find a supported arch in rocminfo: " + str(gpu_arch))
|
||||
else:
|
||||
return (gpu_arch, idx1)
|
||||
@@ -83,8 +89,8 @@ def detect_gpu_chip_id(_rocminfo):
|
||||
if not gpu_chip_id:
|
||||
console_warning("No Chip ID detected: " + str(gpu_chip_id))
|
||||
elif (
|
||||
gpu_chip_id not in get_chip_id_dict().keys()
|
||||
and int(gpu_chip_id) not in get_chip_id_dict().keys()
|
||||
gpu_chip_id not in mi_gpu_specs.get_chip_id_dict().keys()
|
||||
and int(gpu_chip_id) not in mi_gpu_specs.get_chip_id_dict().keys()
|
||||
):
|
||||
console_warning("Unknown Chip ID detected: " + str(gpu_chip_id))
|
||||
return gpu_chip_id
|
||||
@@ -534,9 +540,7 @@ class MachineSpecs:
|
||||
def get_hbm_channels(self):
|
||||
if self.memory_partition.lower().startswith("nps"):
|
||||
hbmchannels = 128
|
||||
if self.memory_partition.lower() == "nps2":
|
||||
hbmchannels /= 2
|
||||
elif self.memory_partition.lower() == "nps4":
|
||||
if self.memory_partition.lower() == "nps4":
|
||||
hbmchannels /= 4
|
||||
elif self.memory_partition.lower() == "nps8":
|
||||
hbmchannels /= 8
|
||||
@@ -670,7 +674,7 @@ def total_sqc(archname, numCUs, numSEs):
|
||||
|
||||
|
||||
def total_l2_banks(archname, L2Banks, compute_partition):
|
||||
xcds = get_num_xcds(archname, compute_partition)
|
||||
xcds = mi_gpu_specs.get_num_xcds(archname, compute_partition)
|
||||
totalL2Banks = L2Banks * xcds
|
||||
return totalL2Banks
|
||||
|
||||
|
||||
@@ -49,7 +49,7 @@ from utils.logger import (
|
||||
console_warning,
|
||||
demarcate,
|
||||
)
|
||||
from utils.mi_gpu_spec import get_num_xcds
|
||||
from utils.mi_gpu_spec import mi_gpu_specs
|
||||
|
||||
rocprof_cmd = ""
|
||||
rocprof_args = ""
|
||||
@@ -675,7 +675,7 @@ def run_prof(
|
||||
if new_env and not using_v3() and not using_v1():
|
||||
# flatten tcc for applicable mi300 input
|
||||
f = path(workload_dir + "/out/pmc_1/results_" + fbase + ".csv")
|
||||
xcds = get_num_xcds(mspec.gpu_model, mspec.compute_partition)
|
||||
xcds = mi_gpu_specs.get_num_xcds(mspec.gpu_model, mspec.compute_partition)
|
||||
df = flatten_tcc_info_across_xcds(f, xcds, int(mspec._l2_banks))
|
||||
df.to_csv(f, index=False)
|
||||
|
||||
|
||||
@@ -0,0 +1,150 @@
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
from importlib.machinery import SourceFileLoader
|
||||
from unittest.mock import patch
|
||||
|
||||
import pandas as pd
|
||||
import pytest
|
||||
|
||||
from src.utils.specs import generate_machine_specs
|
||||
|
||||
rocprof_compute = SourceFileLoader("rocprof-compute", "src/rocprof-compute").load_module()
|
||||
|
||||
|
||||
# NOTE: Only testing gfx942 for now.
|
||||
GFX942_CHIP_IDS = ["29856", "29876", "29857", "29877", "29858", "29878", "29861", "29881"]
|
||||
|
||||
# Expected result
|
||||
GFX942_NUM_XCDS = {"spx": 8, "dpx": 4, "qpx": 2, "cpx": 1}
|
||||
|
||||
|
||||
# helper to strip ANSI color codes if your app uses them
|
||||
ANSI_ESCAPE = re.compile(r"\x1B[@-_][0-?]*[ -/]*[@-~]")
|
||||
|
||||
|
||||
def strip_ansi(s: str) -> str:
|
||||
return ANSI_ESCAPE.sub("", s)
|
||||
|
||||
|
||||
def parse_table_dict(output: str) -> dict:
|
||||
"""
|
||||
Parse an ASCII table into a dict mapping Spec -> Value.
|
||||
"""
|
||||
lines = [l for l in output.splitlines() if l.startswith("│")]
|
||||
# locate header row (the one containing 'Spec' and 'Value')
|
||||
header_idx = next(
|
||||
(i for i, ln in enumerate(lines) if "Spec" in ln and "Value" in ln), None
|
||||
)
|
||||
if header_idx is None:
|
||||
raise ValueError("Header row with Spec and Value not found")
|
||||
|
||||
header_cells = [c.strip() for c in lines[header_idx].strip("│").split("│")]
|
||||
|
||||
spec_i = header_cells.index("Spec")
|
||||
value_i = header_cells.index("Value")
|
||||
|
||||
result = {}
|
||||
for ln in lines[header_idx + 2 :]:
|
||||
if ln.startswith("├") or ln.startswith("╘"):
|
||||
continue
|
||||
cells = [c.strip() for c in ln.strip("│").split("│")]
|
||||
if len(cells) <= max(spec_i, value_i):
|
||||
continue
|
||||
spec = cells[spec_i]
|
||||
value = cells[value_i]
|
||||
if spec:
|
||||
result[spec] = value
|
||||
return result
|
||||
|
||||
|
||||
def run(cmd):
|
||||
p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
if cmd[0] == "amd-smi" and p.returncode == 8:
|
||||
print("ERROR: No GPU detected. Unable to load amd-smi")
|
||||
assert 0
|
||||
return p.stdout.decode("utf-8")
|
||||
|
||||
|
||||
def gpu_arch():
|
||||
gpu_arch = None
|
||||
|
||||
## 1) Parse arch details from rocminfo
|
||||
rocminfo = str(
|
||||
# decode with utf-8 to account for rocm-smi changes in latest rocm
|
||||
subprocess.run(
|
||||
["rocminfo"], stdout=subprocess.PIPE, stderr=subprocess.PIPE
|
||||
).stdout.decode("utf-8")
|
||||
)
|
||||
rocminfo = rocminfo.split("\n")
|
||||
soc_regex = re.compile(r"^\s*Name\s*:\s+ ([a-zA-Z0-9]+)\s*$", re.MULTILINE)
|
||||
devices = list(filter(soc_regex.match, rocminfo))
|
||||
gpu_arch = devices[0].split()[1]
|
||||
|
||||
if not gpu_arch:
|
||||
## 2) Try parse arch details from chip id.
|
||||
chip_id = re.compile(r"^\s*Chip ID:\s+ ([a-zA-Z0-9]+)\s*", re.MULTILINE)
|
||||
ids = list(filter(chip_id.match, rocminfo))
|
||||
for id in ids:
|
||||
chip_id = re.match(r"^[^()]+", id.split()[2]).group(0)
|
||||
|
||||
if chip_id in GFX942_CHIP_IDS:
|
||||
gpu_arch = "gfx942"
|
||||
|
||||
return gpu_arch
|
||||
|
||||
|
||||
@pytest.mark.num_xcds_spec_class
|
||||
def test_num_xcds_spec_class(monkeypatch):
|
||||
arch = gpu_arch()
|
||||
|
||||
# 1. Check if gfx942 soc
|
||||
if not arch or "gfx942" not in arch.lower():
|
||||
pytest.skip("Skipping num xcds test for non-gfx942 socs.")
|
||||
|
||||
# 2. load machine specs
|
||||
machine_spec = generate_machine_specs(None)
|
||||
|
||||
# 3. check results are expected
|
||||
assert machine_spec.compute_partition is not None
|
||||
assert int(machine_spec.num_xcd) == GFX942_NUM_XCDS.get(
|
||||
machine_spec.compute_partition.lower(), -1
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.num_xcds_cli_output
|
||||
def test_num_xcds_cli_output():
|
||||
arch = gpu_arch()
|
||||
|
||||
# 1. Check if gfx942 soc
|
||||
if not arch or "gfx942" not in arch.lower():
|
||||
pytest.skip("Skipping num xcds test for non-gfx942 socs.")
|
||||
|
||||
# 2. Run rocprof-compute -s and grab rocprof-compute num_xcd
|
||||
proc = subprocess.run(
|
||||
["rocprof-compute", "-s"],
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
)
|
||||
assert (
|
||||
proc.returncode == 0
|
||||
), f"Non-zero exit ({proc.returncode}), stderr:\n{proc.stderr}"
|
||||
|
||||
# 3. strip ANSI, parse table
|
||||
clean = strip_ansi(proc.stdout)
|
||||
return_dict = parse_table_dict(clean)
|
||||
|
||||
# 4. check results are expected
|
||||
assert (
|
||||
"Compute Partition" in return_dict
|
||||
), "Spec 'Compute Partition' not found in table"
|
||||
assert "Num XCDs" in return_dict, "Spec 'Num XCDs' not found in table"
|
||||
|
||||
compute_partition_actual = return_dict["Compute Partition"]
|
||||
num_xcd_actual = return_dict["Num XCDs"]
|
||||
|
||||
assert compute_partition_actual is not None
|
||||
assert int(num_xcd_actual) == GFX942_NUM_XCDS.get(
|
||||
compute_partition_actual.lower(), -1
|
||||
)
|
||||
In neuem Issue referenzieren
Einen Benutzer sperren