* Clean up unused functions.

* Fix number of XCDs for MI300X CPX (core partition).

* Add support for memory partition mode.

* Modify total_xcd to adapt to all gpu models.

* Run black and isort.

* Make gpu_arch regex more generic.

* Add error checking for compute partition mode num xcds.

* Set gpu_chip_id as optional.

* Fix get_gpu_model.

---------

Signed-off-by: xuchen-amd <xuchen@amd.com>
Этот коммит содержится в:
xuchen-amd
2025-03-21 02:02:58 -04:00
коммит произвёл GitHub
родитель b596098d14
Коммит 2e7f82aa13
6 изменённых файлов: 594 добавлений и 74 удалений
+6 -17
Просмотреть файл
@@ -24,7 +24,6 @@
import argparse
import importlib
import logging
import os
import shutil
import socket
@@ -43,6 +42,10 @@ from utils.logger import (
setup_file_handler,
setup_logging_priority,
)
from utils.mi_gpu_spec import (
get_gpu_series_dict,
parse_mi_gpu_spec,
)
from utils.specs import MachineSpecs, generate_machine_specs
from utils.utils import (
console_debug,
@@ -57,21 +60,6 @@ from utils.utils import (
set_locale_encoding,
)
SUPPORTED_ARCHS = {
"gfx906": {"mi50": ["MI50", "MI60"]},
"gfx908": {"mi100": ["MI100"]},
"gfx90a": {"mi200": ["MI210", "MI250", "MI250X"]},
"gfx940": {"mi300": ["MI300A_A0"]},
"gfx941": {"mi300": ["MI300X_A0"]},
"gfx942": {"mi300": ["MI300A_A1", "MI300X_A1"]},
}
MI300_CHIP_IDS = {
"29856": "MI300A_A1",
"29857": "MI300X_A1",
"29858": "MI308X",
}
class RocProfCompute:
def __init__(self):
@@ -87,7 +75,8 @@ class RocProfCompute:
"ver_pretty": None,
}
self.__options = {}
self.__supported_archs = SUPPORTED_ARCHS
parse_mi_gpu_spec()
self.__supported_archs = get_gpu_series_dict()
self.__mspec: MachineSpecs = None # to be initalized in load_soc_specs()
setup_console_handler()
self.set_version()
+13 -20
Просмотреть файл
@@ -27,6 +27,7 @@ import math
import os
import re
import shutil
import sys
import threading
from abc import ABC, abstractmethod
from collections import OrderedDict
@@ -36,7 +37,7 @@ import numpy as np
import pandas as pd
import yaml
from rocprof_compute_base import MI300_CHIP_IDS, SUPPORTED_ARCHS
from utils.mi_gpu_spec import get_gpu_model, get_gpu_series
from utils.parser import build_in_vars, supported_denom
from utils.utils import (
console_debug,
@@ -45,6 +46,7 @@ from utils.utils import (
convert_metric_id_to_panel_idx,
demarcate,
get_default_accumulate_counter_file_ymal,
total_xcds,
using_v3,
)
@@ -53,6 +55,7 @@ class OmniSoC_Base:
def __init__(
self, args, mspec
): # new info field will contain rocminfo or sysinfo to populate properties
console_debug("[omnisoc init]")
self.__args = args
self.__arch = None
self._mspec = mspec
@@ -102,7 +105,8 @@ class OmniSoC_Base:
@demarcate
def populate_mspec(self):
from utils.specs import run, search, total_sqc, total_xcds
console_debug("[populate_mspec]")
from utils.specs import run, search, total_sqc
if not hasattr(self._mspec, "_rocminfo") or self._mspec._rocminfo is None:
return
@@ -151,11 +155,6 @@ class OmniSoC_Base:
self._mspec.workgroup_max_size = key
continue
key = search(r"^\s*Chip ID:\s+ ([a-zA-Z0-9]+)\s*", linetext)
if key != None:
self._mspec.chip_id = key
continue
key = search(r"^\s*Max Waves Per CU:\s+ ([a-zA-Z0-9]+)\s*", linetext)
if key != None:
self._mspec.max_waves_per_cu = key
@@ -182,18 +181,11 @@ class OmniSoC_Base:
self._mspec.cur_sclk = self._mspec.max_sclk
self._mspec.cur_mclk = self._mspec.max_mclk
self._mspec.gpu_series = list(SUPPORTED_ARCHS[self._mspec.gpu_arch].keys())[
0
].upper()
# specify gpu name for gfx942 hardware
self._mspec.gpu_model = list(SUPPORTED_ARCHS[self._mspec.gpu_arch].keys())[
0
].upper()
if self._mspec.gpu_model == "MI300":
# Use Chip ID to distinguish MI300 gpu model using the built-in dictionary
if self._mspec.chip_id in MI300_CHIP_IDS:
self._mspec.gpu_model = MI300_CHIP_IDS[self._mspec.chip_id]
self._mspec.gpu_series = get_gpu_series(self._mspec.gpu_arch).upper()
# specify gpu model name for gfx942 hardware
self._mspec.gpu_model = get_gpu_model(
self._mspec.gpu_arch, self._mspec.gpu_chip_id
).upper()
self._mspec.num_xcd = str(
total_xcds(self._mspec.gpu_model, self._mspec.compute_partition)
)
@@ -593,7 +585,8 @@ def perfmon_coalesce(
# TODO: more error checking
if len(spatial_multiplexing) != 3:
console_error(
"profiling", "multiplexing need provide node_idx node_count and gpu_count"
"profiling",
"multiplexing need provide node_idx node_count and gpu_count",
)
node_idx = int(spatial_multiplexing[0])
+338
Просмотреть файл
@@ -0,0 +1,338 @@
import logging
import os
import sys
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, Union
import yaml
# Constants for MI series
# NOTE: Currently supports MI50, MI100, MI200, MI300
MI50 = 0
MI100 = 1
MI200 = 2
MI300 = 3
MI_CONSTANS = {MI50: "mi50", MI100: "mi100", MI200: "mi200", MI300: "mi300"}
gpu_series_dict = {} # key: gpu arch
gpu_model_dict = {} # key: gpu_arch
mi300_archs_dict = {} # key: gpu model
mi300_num_xcds_dict = {} # key: gpu model
mi300_nps_dict = {} # key: gpu model (NOTE: key can also be architecture)
mi300_chip_id_dict = {} # key: chip id (int)
# ----------------------------
# Data Class handling to preserve the hierarchical gpu information
# ----------------------------
@dataclass
class ComputePartitionMode:
"""
Represents the compute partition mode.
"""
def __init__(self, num_xcds=None):
self.__num_xcds = num_xcds
def get_num_xcds(self):
return self.__num_xcds
class Singleton(object):
_instances = {}
def __new__(class_, *args, **kwargs):
if class_ not in class_._instances:
class_._instances[class_] = super(Singleton, class_).__new__(
class_, *args, **kwargs
)
return class_._instances[class_]
@dataclass
class MIGPU(Singleton):
"""
Singleton class representing the detected MI GPU of current system.
Ensures only one instance exists.
"""
_instance = None # Class variable to hold the single instance
def __new__(cls, *args, **kwargs):
if cls._instance is None:
cls._instance = super(MIGPU, cls).__new__(cls)
cls._instance.mi_gpu_spec = [] # Initialize the instance attribute
return cls._instance
def __init__(
self,
gpu_series,
gpu_arch,
gpu_model,
chip_id=None,
mi300_arch=None,
num_xcds=None,
):
"""
gpu series, gpu_arch and gpu_model information must be available for a given MI GPU.
gpu series (str)
gpu_arch (str)
gpu_model (str)
"""
# gpu_series (str): The GPU series name (e.g., 'mi50', 'mi100', 'mi200', 'mi300')
self.gpu_series = gpu_series
self.gpu_arch = gpu_arch
self.gpu_model = gpu_model
self.chip_id = chip_id
self.mi300_arch = mi300_arch
self.compute_partition = ComputePartitionMode(num_xcds)
self.is_mi300 = True if self.mi300_arch is not None else False
def __post_init__(self):
if self.is_mi300:
# NOTE: currently, all mi300 series gpus shall have compute partition information
if self.compute_partition is None:
logging.warning(
"[MIGPU post init] mi300 gpu detected, but no num_xcd/compute partition data detected!!!"
)
def set_chip_id(self, chip_id):
self.chip_id = chip_id
def set_mi300_arch(self, mi300_arch, num_xcds):
"""
All mi300 series gpus shall have compute partition information.
"""
if num_xcds is None:
logging.warning(
"[MIGPU post init] mi300 gpu detected, but no num_xcd/compute partition data detected!!!"
)
self.mi300_arch = mi300_arch
self.compute_partition = ComputePartitionMode(num_xcds)
def get_gpu_series(self):
return self.gpu_series
def get_gpu_arch(self):
return self.gpu_arch
def get_gpu_model(self):
return self.gpu_model
def get_chip_id(self):
return self.chip_id
def get_mi300_arch(self):
return self.mi300_arch
def get_compute_partition(self):
return self.compute_partition
# ----------------------------
# YAML Parsing and Data Handling
# ----------------------------
def load_yaml(file_path: str) -> Dict[str, Any]:
"""
Loads MI GPU YAML data /util into a Python dictionary.
Args:
file_path (str): The path to the YAML file.
Returns:
Dict[str, Any]: Parsed YAML data as a nested dictionary.
Exit with console error if an error occurs.
"""
logging.debug("[load_yaml]")
try:
with open(file_path, "r") as file:
data = yaml.safe_load(file)
return data
except FileNotFoundError:
logging.error(f"Error: The file '{file_path}' was not found.")
except yaml.YAMLError as exc:
logging.error(f"Error parsing YAML file '{file_path}': {exc}")
except Exception as e:
logging.error(
f"An unexpected error occurred while loading YAML file '{file_path}': {e}"
)
def parse_mi_gpu_spec():
"""
Parse out mi gpu data from yaml file and store in memory.
MI GPUs
|-- series
|-- architecture (list)
|-- models
|-- chip_ids
|-- mi300_arch
|-- partition_mode
"""
current_dir = os.path.dirname(__file__)
yaml_file_path = os.path.join(current_dir, "mi_gpu_spec.yaml")
# Load the YAML data
yaml_data = load_yaml(yaml_file_path)
mi300_models_dict = {}
for mi_index, mi_series in MI_CONSTANS.items():
if mi_series != MI_CONSTANS[MI300]:
logging.debug("[parse_mi_gpu_spec] Processing series: %s" % mi_series)
for key, value in yaml_data.items():
# parse out gpu series and gpu model information for mi50, 100, 200
curr_gpu_arch = value[mi_index]["gpu_archs"][0]["gpu_arch"]
gpu_series_dict[curr_gpu_arch] = mi_series
gpu_model_dict[curr_gpu_arch] = []
for models in value[mi_index]["gpu_archs"][0]["models"]:
gpu_model_dict[curr_gpu_arch].append(models["gpu_model"])
elif mi_series == MI_CONSTANS[MI300]:
# MI300 requires specific processing
for key, value in yaml_data.items():
mi300_gpu_archs_list = []
# NOTE: only MI300 have multiple architectures
for archs in value[MI300]["gpu_archs"]:
curr_gpu_arch = archs["gpu_arch"]
mi300_gpu_archs_list.append(curr_gpu_arch)
gpu_series_dict[curr_gpu_arch] = mi_series
for idx, arch in enumerate(mi300_gpu_archs_list):
mi300_models_dict[arch] = []
for models in value[MI300]["gpu_archs"][idx]["models"]:
gpu_model = models["gpu_model"]
# NOTE: mi300 architecture is available for all mi300 gpu models
mi300_archs_dict[gpu_model] = models["mi300_arch"]["architecture"]
# NOTE: compute partition mode num xcds is available for all mi300 gpu models
mi300_num_xcds_dict[gpu_model] = models["mi300_arch"][
"partition_mode"
]["compute_partition_mode"]["num_xcds"]
# NOTE: memory partition mode nps is available for all mi300 gpu models
mi300_nps_dict[gpu_model] = models["mi300_arch"][
"partition_mode"
]["memory_partition_mode"]
if not models["chip_ids"]["local"] is None:
# save chip_id, gpu_model pair if chip id is available
# NOTE: chip id is available for all gfx942 machines
mi300_chip_id_dict[models["chip_ids"]["local"]] = models[
"gpu_model"
]
mi300_models_dict[arch].append(gpu_model)
gpu_model_dict.update(mi300_models_dict)
def get_gpu_series_dict():
if not gpu_series_dict:
logging.error(
"gpu_series_dict not yet populated, did you run parse_mi_gpu_spec()?"
)
return None
return gpu_series_dict
def get_gpu_series(gpu_arch_):
if not gpu_series_dict:
logging.error(
"gpu_series_dict not yet populated, did you run parse_mi_gpu_spec()?"
)
return None
# Normalize the key by checking both the raw and lowercase versions
gpu_series = gpu_series_dict.get(gpu_arch_) or gpu_series_dict.get(gpu_arch_.lower())
if gpu_series:
return gpu_series
logging.warning(f"No matching gpu series found for gpu arch: {gpu_arch_}")
return None
def get_gpu_model(gpu_arch_, chip_id_):
# Check that gpu_model_dict is populated first
if not gpu_model_dict:
logging.error(
"gpu_model_dict not yet populated. Did you run parse_mi_gpu_spec()?"
)
return None
gpu_arch_lower = gpu_arch_.lower()
# Handle gfx942 with chip_id mapping
if gpu_arch_lower == "gfx942":
if chip_id_ and int(chip_id_) in mi300_chip_id_dict:
gpu_model = mi300_chip_id_dict.get(int(chip_id_))
else:
logging.warning(f"No gpu model found for chip id: {chip_id_}")
return None
# Otherwise use gpu_model_dict mapping for other mi architectures
elif gpu_arch_lower in gpu_model_dict:
# NOTE: take the first element works for now
gpu_model = gpu_model_dict[gpu_arch_lower][0]
else:
logging.warning(f"No gpu model found for gpu arch: {gpu_arch_lower}")
return None
if not gpu_model:
logging.warning(f"No gpu model found for gpu arch: {gpu_arch_lower}")
return None
return gpu_model
def get_mi300_archs_dict():
if not mi300_archs_dict:
logging.error(
"mi300_archs_dict not yet populated, did you run parse_mi_gpu_spec()?"
)
return None
return mi300_archs_dict
def get_mi300_num_xcds(gpu_model_, compute_partition_):
if not mi300_num_xcds_dict:
logging.error(
"mi300_num_xcds_dict not yet populated, did you run parse_mi_gpu_spec()?"
)
return None
gpu_model_lower = gpu_model_.lower()
partition_lower = compute_partition_.lower()
if gpu_model_lower not in mi300_num_xcds_dict:
logging.info(f"Current system is not a mi300 system: {gpu_model_}")
return None
model_dict = mi300_num_xcds_dict[gpu_model_lower]
if partition_lower not in model_dict:
logging.info(f"Unknown compute partition: {compute_partition_}")
return None
num_xcds = model_dict[partition_lower]
if not num_xcds:
logging.warning(
"Unknown compute partition found for %s / %s", compute_partition_, gpu_model_
)
return None
return num_xcds
def get_mi300_chip_id_dict():
if mi300_chip_id_dict:
return mi300_chip_id_dict
else:
logging.error(
"mi300_chip_id_dict not yet populated, did you run parse_mi_gpu_spec()?"
)
+164
Просмотреть файл
@@ -0,0 +1,164 @@
# --------------------------------------------------------------------------------
#
# This yaml file tracks MI gpu spec in a tree structure.
#
# *It is important to note that this file only tracks the common specs for MI GPU series.*
# *For example, the CU #s are based on information retrieved from other tools.*
# **
#
# MI GPUs
# |-- series: the specific MI series; mi50, mi100, mi200, mi300
# |-- architecture: currently, only mi300 gpus hold different architectures
# |-- models
# |-- chip_ids: chip id is specific to the environment the gpu is being used on
# |-- mi300_arch: mi300 specific architectures; mi300a, mi300x
# |-- partition_mode: currently, only mi300 gpus hold partition mode information
# two types: compute partition mode, memory partition mode,
# currently only mi300 gpus contains compute partition mode information on number of xcds
#
# --------------------------------------------------------------------------------
mi_gpu_spec:
- gpu_series: mi50
gpu_archs:
- gpu_arch: gfx906
models:
- gpu_model: mi50
mi300_arch:
architecture: null
partition_mode: null
chip_ids:
local: null
- gpu_model: mi60
mi300_arch:
architecture: null
partition_mode: null
chip_ids:
local: null
- gpu_series: mi100
gpu_archs:
- gpu_arch: gfx908
models:
- gpu_model: mi100
mi300_arch:
architecture: null
partition_mode: null
chip_ids:
local: null
- gpu_series: mi200
gpu_archs:
- gpu_arch: gfx90a
models:
- gpu_model: mi210
mi300_arch:
architecture: null
partition_mode: null
chip_ids:
local: null
- gpu_model: mi250
mi300_arch:
architecture: null
partition_mode: null
chip_ids:
local: null
- gpu_model: mi250x
mi300_arch:
architecture: null
partition_mode: null
chip_ids:
local: null
- gpu_series: mi300
gpu_archs:
- gpu_arch: gfx940
models:
- gpu_model: mi300a_a0
mi300_arch:
architecture: mi300a
partition_mode:
compute_partition_mode:
num_xcds:
spx: 6
dpx: null
tpx: 2
qpx: null
cpx: null
memory_partition_mode:
nps4: {tpx}
nps1: {spx, tpx}
chip_ids:
local: null
- gpu_arch: gfx941
models:
- gpu_model: mi300x_a0
mi300_arch:
architecture: mi300x
partition_mode:
compute_partition_mode:
num_xcds:
spx: 8
dpx: 4
tpx: null
qpx: 2
cpx: 1
memory_partition_mode:
nps4: {qpx, cpx}
nps1: {spx, qpx, cpx}
chip_ids:
local: null
- gpu_arch: gfx942
models:
- gpu_model: mi300a_a1
mi300_arch:
architecture: mi300a
partition_mode:
compute_partition_mode:
num_xcds:
spx: 6
dpx: null
tpx: 2
qpx: null
cpx: null
memory_partition_mode:
nps4: {tpx}
nps1: {spx, tpx}
chip_ids:
local: 29856
- gpu_model: mi300x_a1
mi300_arch:
architecture: mi300x
partition_mode:
compute_partition_mode:
num_xcds:
spx: 8
dpx: 4
tpx: null
qpx: 2
cpx: 1
memory_partition_mode:
nps4: {qpx, cpx}
nps1: {spx, qpx, cpx}
chip_ids:
local: 29857
- gpu_model: mi308x
mi300_arch:
architecture: mi308x
partition_mode:
compute_partition_mode:
num_xcds:
spx: 4
dpx: 2
tpx: null
qpx: null
cpx: 1
memory_partition_mode:
nps4: {cpx}
nps1: {spx, dpx, cpx}
chip_ids:
local: 29858
+41 -18
Просмотреть файл
@@ -29,6 +29,7 @@ import os
import re
import socket
import subprocess
import sys
from dataclasses import dataclass, field, fields
from datetime import datetime
from math import ceil
@@ -37,6 +38,7 @@ from pathlib import Path as path
import pandas as pd
import config
from utils.mi_gpu_spec import get_gpu_series_dict, get_mi300_chip_id_dict
from utils.tty import get_table_string
from utils.utils import (
console_debug,
@@ -60,21 +62,40 @@ VERSION_LOC = [
def detect_arch(_rocminfo):
from rocprof_compute_base import SUPPORTED_ARCHS
for idx1, linetext in enumerate(_rocminfo):
gpu_arch = search(r"^\s*Name\s*:\s+ ([a-zA-Z0-9]+)\s*$", linetext)
if gpu_arch in SUPPORTED_ARCHS.keys():
# NOTE: currently supported socs are gfx archs only
gpu_arch = search(r"^\s*Name\s*:\s* ([Gg][Ff][Xx][a-zA-Z0-9]+).*\s*$", linetext)
if gpu_arch in get_gpu_series_dict().keys():
break
if str(gpu_arch) in SUPPORTED_ARCHS.keys():
if str(gpu_arch) in get_gpu_series_dict().keys():
gpu_arch = str(gpu_arch)
break
if not gpu_arch in SUPPORTED_ARCHS.keys():
console_error("Cannot find a supported arch in rocminfo")
if not gpu_arch in get_gpu_series_dict().keys():
console_error("Cannot find a supported arch in rocminfo: " + str(gpu_arch))
else:
return (gpu_arch, idx1)
def detect_gpu_chip_id(_rocminfo):
for idx1, linetext in enumerate(_rocminfo):
# NOTE: current supported socs only have numbers in Chip ID
gpu_chip_id = search(r"^\s*Chip ID\s*:\s* ([0-9]+).*\s*$", linetext)
if gpu_chip_id and int(gpu_chip_id) in get_mi300_chip_id_dict().keys():
gpu_chip_id = str(gpu_chip_id)
break
if str(gpu_chip_id) in get_mi300_chip_id_dict().keys():
gpu_chip_id = str(gpu_chip_id)
break
if not gpu_chip_id:
console_warning("No Chip ID detected: " + str(gpu_chip_id))
elif (
gpu_chip_id not in get_mi300_chip_id_dict().keys()
and int(gpu_chip_id) not in get_mi300_chip_id_dict().keys()
):
console_warning("Unknown Chip ID detected: " + str(gpu_chip_id))
return gpu_chip_id
# Custom decorator to mimic the behavior of kw_only found in Python 3.10
def kw_only(cls):
def __init__(self, *args, **kwargs):
@@ -163,6 +184,7 @@ def generate_machine_specs(args, sysinfo: dict = None):
_rocminfo = rocminfo_full.split("\n")
gpu_arch, idx = detect_arch(_rocminfo)
_rocminfo = _rocminfo[idx + 1 :] # update rocminfo for target section
gpu_chip_id = detect_gpu_chip_id(_rocminfo)
specs = MachineSpecs(
version=specs_version,
timestamp=timestamp,
@@ -180,7 +202,9 @@ def generate_machine_specs(args, sysinfo: dict = None):
compute_partition=compute_partition,
memory_partition=memory_partition,
gpu_arch=gpu_arch,
gpu_chip_id=gpu_chip_id,
)
# Load above SoC specs via module import
try:
soc_module = importlib.import_module("rocprof_compute_soc.soc_" + specs.gpu_arch)
@@ -367,6 +391,14 @@ class MachineSpecs:
"name": "GPU Arch",
},
)
gpu_chip_id: str = field(
default=None,
metadata={
"doc": "The Chip ID of the accelerators/GPUs in the system.",
"name": "Chip ID",
"optional": True,
},
)
gpu_l1: str = field(
default=None,
metadata={
@@ -420,13 +452,6 @@ class MachineSpecs:
"name": "Workgroup Max Size",
},
)
chip_id: str = field(
default=None,
metadata={
"doc": "The Chip ID of the accelerators/GPUs in the system.",
"name": "Chip ID",
},
)
max_waves_per_cu: str = field(
default=None,
metadata={
@@ -661,11 +686,9 @@ def total_sqc(archname, numCUs, numSEs):
def total_l2_banks(archname, L2Banks, compute_partition):
# Fixme: support all supported partitioning mode
# Fixme: "name" is a bad name!
totalL2Banks = L2Banks
xcds = total_xcds(archname, compute_partition)
return L2Banks * xcds
totalL2Banks = L2Banks * xcds
return totalL2Banks
if __name__ == "__main__":
+32 -19
Просмотреть файл
@@ -42,6 +42,7 @@ from pathlib import Path as path
import pandas as pd
import config
from utils.mi_gpu_spec import get_mi300_num_xcds
rocprof_cmd = ""
rocprof_args = ""
@@ -686,7 +687,7 @@ def run_prof(
if new_env and not using_v3():
# flatten tcc for applicable mi300 input
f = path(workload_dir + "/out/pmc_1/results_" + fbase + ".csv")
xcds = total_xcds(mspec.gpu_model, mspec.compute_partition)
xcds = get_mi300_num_xcds(mspec.gpu_model, mspec.compute_partition)
df = flatten_tcc_info_across_xcds(f, xcds, int(mspec._l2_banks))
df.to_csv(f, index=False)
@@ -835,6 +836,7 @@ def replace_timestamps(workload_dir):
def gen_sysinfo(
workload_name, workload_dir, ip_blocks, app_cmd, skip_roof, roof_only, mspec, soc
):
console_debug("[gen_sysinfo]")
df = mspec.get_class_members()
# Append workload information to machine specs
@@ -1051,47 +1053,58 @@ def flatten_tcc_info_across_xcds(file, xcds, tcc_channel_per_xcd):
return df
def total_xcds(archname, compute_partition):
def total_xcds(gpu_model, compute_partition):
"""
Returns the number of xcds for a gpu model and compute_partition pair.
"""
# For mi300 chips, return result from mi_gpu_spec
result = get_mi300_num_xcds(gpu_model, compute_partition)
if result:
return result
# For other systems, use manual check
# check MI300 has a valid compute partition
mi300a_archs = ["mi300a_a0", "mi300a_a1"]
mi300x_archs = ["mi300x_a0", "mi300x_a1"]
mi308x_archs = ["mi308x"]
mi300a_model = ["mi300a_a0", "mi300a_a1"]
mi300x_model = ["mi300x_a0", "mi300x_a1"]
mi308x_model = ["mi308x"]
if (
archname.lower() in mi300a_archs + mi300x_archs + mi308x_archs
gpu_model.lower() in mi300a_model + mi300x_model + mi308x_model
and compute_partition == "NA"
):
console_error("Invalid compute partition found for {}".format(archname))
if archname.lower() not in mi300a_archs + mi300x_archs + mi308x_archs:
console_error("Invalid compute partition found for {}".format(gpu_model))
if gpu_model.lower() not in mi300a_model + mi300x_model + mi308x_model:
return 1
# from the whitepaper
# https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/white-papers/amd-cdna-3-white-paper.pdf
if compute_partition.lower() == "spx":
if archname.lower() in mi300a_archs:
if gpu_model.lower() in mi300a_model:
return 6
if archname.lower() in mi300x_archs:
if gpu_model.lower() in mi300x_model:
return 8
if archname.lower() in mi308x_archs:
if gpu_model.lower() in mi308x_model:
return 4
if compute_partition.lower() == "tpx":
if archname.lower() in mi300a_archs:
if gpu_model.lower() in mi300a_model:
return 2
if compute_partition.lower() == "dpx":
if archname.lower() in mi300x_archs:
if gpu_model.lower() in mi300x_model:
return 4
if archname.lower() in mi308x_archs:
if gpu_model.lower() in mi308x_model:
return 2
if compute_partition.lower() == "qpx":
if archname.lower() in mi300x_archs:
if gpu_model.lower() in mi300x_model:
return 2
if compute_partition.lower() == "cpx":
if archname.lower() in mi300x_archs:
return 2
if archname.lower() in mi308x_archs:
if gpu_model.lower() in mi300x_model:
return 1
if gpu_model.lower() in mi308x_model:
return 1
# TODO implement other archs here as needed
console_error(
"Unknown compute partition / arch found for {} / {}".format(
compute_partition, archname
compute_partition, gpu_model
)
)