Add the ability to determine GPU model from Chip ID (#423)
* Add the ability to determine GPU model from Chip ID for distinguishing MI300 systems by using a built-in dictionary. Signed-off-by: xuchen-amd <xuchen@amd.com> * Add support for MI300X_A1 Signed-off-by: xuchen-amd <xuchen@amd.com> * Remove MI308X identification using num CUs, and format Python using black. Signed-off-by: xuchen-amd <xuchen@amd.com> * Add Read the Docs Signed-off-by: xuchen-amd <xuchen@amd.com> * Add sphinx requirement Signed-off-by: xuchen-amd <xuchen@amd.com> * Remove gpu_model identification using gpu_arch Signed-off-by: xuchen-amd <xuchen@amd.com> * Remove OMNIPERF_ARCH_OVERRIDE and its usage. Determining MI300 gpu model solely based on chip id. Signed-off-by: xuchen-amd <xuchen@amd.com> * Fix Python formatting using black. Signed-off-by: xuchen-amd <xuchen@amd.com> --------- Signed-off-by: xuchen-amd <xuchen@amd.com>
이 커밋은 다음에 포함됨:
@@ -55,6 +55,12 @@ SUPPORTED_ARCHS = {
|
||||
"gfx942": {"mi300": ["MI300A_A1", "MI300X_A1"]},
|
||||
}
|
||||
|
||||
MI300_CHIP_IDS = {
|
||||
"29856": "MI300A_A1",
|
||||
"29857": "MI300X_A1",
|
||||
"29858": "MI308X",
|
||||
}
|
||||
|
||||
|
||||
class Omniperf:
|
||||
def __init__(self):
|
||||
|
||||
@@ -34,6 +34,7 @@ from pathlib import Path
|
||||
from collections import OrderedDict
|
||||
|
||||
from omniperf_base import SUPPORTED_ARCHS
|
||||
from omniperf_base import MI300_CHIP_IDS
|
||||
|
||||
|
||||
class OmniSoC_Base:
|
||||
@@ -100,11 +101,6 @@ class OmniSoC_Base:
|
||||
# assume no SoC specific options and return empty list by default
|
||||
return []
|
||||
|
||||
def check_arch_override(self):
|
||||
if "OMNIPERF_ARCH_OVERRIDE" in os.environ.keys():
|
||||
return os.environ["OMNIPERF_ARCH_OVERRIDE"]
|
||||
return ""
|
||||
|
||||
@demarcate
|
||||
def populate_mspec(self):
|
||||
from utils.specs import search, run, total_sqc, total_xcds
|
||||
@@ -156,6 +152,11 @@ class OmniSoC_Base:
|
||||
self._mspec.workgroup_max_size = key
|
||||
continue
|
||||
|
||||
key = search(r"^\s*Chip ID:\s+ ([a-zA-Z0-9]+)\s*", linetext)
|
||||
if key != None:
|
||||
self._mspec.chip_id = key
|
||||
continue
|
||||
|
||||
key = search(r"^\s*Max Waves Per CU:\s+ ([a-zA-Z0-9]+)\s*", linetext)
|
||||
if key != None:
|
||||
self._mspec.max_waves_per_cu = key
|
||||
@@ -181,28 +182,9 @@ class OmniSoC_Base:
|
||||
0
|
||||
].upper()
|
||||
if self._mspec.gpu_model == "MI300":
|
||||
self._mspec.gpu_model = list(SUPPORTED_ARCHS[self._mspec.gpu_arch].values())[
|
||||
0
|
||||
][0]
|
||||
if self._mspec.gpu_arch == "gfx942":
|
||||
if (
|
||||
"MI300A" in "\n".join(self._mspec._rocminfo)
|
||||
or "MI300A" in self.check_arch_override()
|
||||
):
|
||||
self._mspec.gpu_model = "MI300A_A1"
|
||||
elif (
|
||||
"MI300X" in "\n".join(self._mspec._rocminfo)
|
||||
or "MI300X" in self.check_arch_override()
|
||||
):
|
||||
self._mspec.gpu_model = "MI300X_A1"
|
||||
# We need to distinguish MI308X by peeking reported num CUs
|
||||
elif self._mspec.cu_per_gpu == "80" or "MI308X" in self.check_arch_override():
|
||||
self._mspec.gpu_model = "MI308X"
|
||||
else:
|
||||
console_error(
|
||||
"Cannot parse MI300 details from rocminfo. Please verify output or set the arch using (e.g.,) "
|
||||
'export OMNIPERF_ARCH_OVERRIDE="MI300A"'
|
||||
)
|
||||
# Use Chip ID to distinguish MI300 gpu model using the built-in dictionary
|
||||
if self._mspec.chip_id in MI300_CHIP_IDS:
|
||||
self._mspec.chip_id = MI300_CHIP_IDS[self._mspec.chip_id]
|
||||
|
||||
self._mspec.num_xcd = str(
|
||||
total_xcds(self._mspec.gpu_model, self._mspec.compute_partition)
|
||||
|
||||
@@ -403,6 +403,13 @@ class MachineSpecs:
|
||||
"name": "Workgroup Max Size",
|
||||
},
|
||||
)
|
||||
chip_id: str = field(
|
||||
default=None,
|
||||
metadata={
|
||||
"doc": "<>",
|
||||
"name": "Chip ID",
|
||||
},
|
||||
)
|
||||
max_waves_per_cu: str = field(
|
||||
default=None,
|
||||
metadata={
|
||||
|
||||
새 이슈에서 참조
사용자 차단