From 2e7f82aa13aa0584fee7a129ea2130e38ba07fd6 Mon Sep 17 00:00:00 2001
From: xuchen-amd <xuchen@amd.com>
Date: Fri, 21 Mar 2025 02:02:58 -0400
Subject: [PATCH] Improve chip info logic. (#581)

* Clean up unused functions.

* Fix number of XCDs for MI300X CPX (core partition).

* Add support for memory partition mode.

* Modify total_xcd to adapt to all gpu models.

* Run black and isort.

* Make gpu_arch regex more generic.

* Add error checking for compute partition mode num xcds.

* Set gpu_chip_id as optional.

* Fix get_gpu_model.

---------

Signed-off-by: xuchen-amd <xuchen@amd.com>
---
 src/rocprof_compute_base.py         |  23 +-
 src/rocprof_compute_soc/soc_base.py |  33 ++-
 src/utils/mi_gpu_spec.py            | 338 ++++++++++++++++++++++++++++
 src/utils/mi_gpu_spec.yaml          | 164 ++++++++++++++
 src/utils/specs.py                  |  59 +++--
 src/utils/utils.py                  |  51 +++--
 6 files changed, 594 insertions(+), 74 deletions(-)
 create mode 100644 src/utils/mi_gpu_spec.py
 create mode 100644 src/utils/mi_gpu_spec.yaml

diff --git a/src/rocprof_compute_base.py b/src/rocprof_compute_base.py
index cd97676fde..e81615db1d 100644
--- a/src/rocprof_compute_base.py
+++ b/src/rocprof_compute_base.py
@@ -24,7 +24,6 @@
 
 import argparse
 import importlib
-import logging
 import os
 import shutil
 import socket
@@ -43,6 +42,10 @@ from utils.logger import (
     setup_file_handler,
     setup_logging_priority,
 )
+from utils.mi_gpu_spec import (
+    get_gpu_series_dict,
+    parse_mi_gpu_spec,
+)
 from utils.specs import MachineSpecs, generate_machine_specs
 from utils.utils import (
     console_debug,
@@ -57,21 +60,6 @@ from utils.utils import (
     set_locale_encoding,
 )
 
-SUPPORTED_ARCHS = {
-    "gfx906": {"mi50": ["MI50", "MI60"]},
-    "gfx908": {"mi100": ["MI100"]},
-    "gfx90a": {"mi200": ["MI210", "MI250", "MI250X"]},
-    "gfx940": {"mi300": ["MI300A_A0"]},
-    "gfx941": {"mi300": ["MI300X_A0"]},
-    "gfx942": {"mi300": ["MI300A_A1", "MI300X_A1"]},
-}
-
-MI300_CHIP_IDS = {
-    "29856": "MI300A_A1",
-    "29857": "MI300X_A1",
-    "29858": "MI308X",
-}
-
 
 class RocProfCompute:
     def __init__(self):
@@ -87,7 +75,8 @@ class RocProfCompute:
             "ver_pretty": None,
         }
         self.__options = {}
-        self.__supported_archs = SUPPORTED_ARCHS
+        parse_mi_gpu_spec()
+        self.__supported_archs = get_gpu_series_dict()
         self.__mspec: MachineSpecs = None  # to be initalized in load_soc_specs()
         setup_console_handler()
         self.set_version()
diff --git a/src/rocprof_compute_soc/soc_base.py b/src/rocprof_compute_soc/soc_base.py
index 817f1c94b6..fea2901111 100644
--- a/src/rocprof_compute_soc/soc_base.py
+++ b/src/rocprof_compute_soc/soc_base.py
@@ -27,6 +27,7 @@ import math
 import os
 import re
 import shutil
+import sys
 import threading
 from abc import ABC, abstractmethod
 from collections import OrderedDict
@@ -36,7 +37,7 @@ import numpy as np
 import pandas as pd
 import yaml
 
-from rocprof_compute_base import MI300_CHIP_IDS, SUPPORTED_ARCHS
+from utils.mi_gpu_spec import get_gpu_model, get_gpu_series
 from utils.parser import build_in_vars, supported_denom
 from utils.utils import (
     console_debug,
@@ -45,6 +46,7 @@ from utils.utils import (
     convert_metric_id_to_panel_idx,
     demarcate,
     get_default_accumulate_counter_file_ymal,
+    total_xcds,
     using_v3,
 )
 
@@ -53,6 +55,7 @@ class OmniSoC_Base:
     def __init__(
         self, args, mspec
     ):  # new info field will contain rocminfo or sysinfo to populate properties
+        console_debug("[omnisoc init]")
         self.__args = args
         self.__arch = None
         self._mspec = mspec
@@ -102,7 +105,8 @@ class OmniSoC_Base:
 
     @demarcate
     def populate_mspec(self):
-        from utils.specs import run, search, total_sqc, total_xcds
+        console_debug("[populate_mspec]")
+        from utils.specs import run, search, total_sqc
 
         if not hasattr(self._mspec, "_rocminfo") or self._mspec._rocminfo is None:
             return
@@ -151,11 +155,6 @@ class OmniSoC_Base:
                 self._mspec.workgroup_max_size = key
                 continue
 
-            key = search(r"^\s*Chip ID:\s+ ([a-zA-Z0-9]+)\s*", linetext)
-            if key != None:
-                self._mspec.chip_id = key
-                continue
-
             key = search(r"^\s*Max Waves Per CU:\s+ ([a-zA-Z0-9]+)\s*", linetext)
             if key != None:
                 self._mspec.max_waves_per_cu = key
@@ -182,18 +181,11 @@ class OmniSoC_Base:
         self._mspec.cur_sclk = self._mspec.max_sclk
         self._mspec.cur_mclk = self._mspec.max_mclk
 
-        self._mspec.gpu_series = list(SUPPORTED_ARCHS[self._mspec.gpu_arch].keys())[
-            0
-        ].upper()
-        # specify gpu name for gfx942 hardware
-        self._mspec.gpu_model = list(SUPPORTED_ARCHS[self._mspec.gpu_arch].keys())[
-            0
-        ].upper()
-        if self._mspec.gpu_model == "MI300":
-            # Use Chip ID to distinguish MI300 gpu model using the built-in dictionary
-            if self._mspec.chip_id in MI300_CHIP_IDS:
-                self._mspec.gpu_model = MI300_CHIP_IDS[self._mspec.chip_id]
-
+        self._mspec.gpu_series = get_gpu_series(self._mspec.gpu_arch).upper()
+        # specify gpu model name for gfx942 hardware
+        self._mspec.gpu_model = get_gpu_model(
+            self._mspec.gpu_arch, self._mspec.gpu_chip_id
+        ).upper()
         self._mspec.num_xcd = str(
             total_xcds(self._mspec.gpu_model, self._mspec.compute_partition)
         )
@@ -593,7 +585,8 @@ def perfmon_coalesce(
         # TODO: more error checking
         if len(spatial_multiplexing) != 3:
             console_error(
-                "profiling", "multiplexing need provide node_idx node_count and gpu_count"
+                "profiling",
+                "multiplexing need provide node_idx node_count and gpu_count",
             )
 
         node_idx = int(spatial_multiplexing[0])
diff --git a/src/utils/mi_gpu_spec.py b/src/utils/mi_gpu_spec.py
new file mode 100644
index 0000000000..2682a585c8
--- /dev/null
+++ b/src/utils/mi_gpu_spec.py
@@ -0,0 +1,338 @@
+import logging
+import os
+import sys
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional, Union
+
+import yaml
+
+# Constants for MI series
+# NOTE: Currently supports MI50, MI100, MI200, MI300
+MI50 = 0
+MI100 = 1
+MI200 = 2
+MI300 = 3
+
+MI_CONSTANS = {MI50: "mi50", MI100: "mi100", MI200: "mi200", MI300: "mi300"}
+
+gpu_series_dict = {}  # key: gpu arch
+gpu_model_dict = {}  # key: gpu_arch
+mi300_archs_dict = {}  # key: gpu model
+mi300_num_xcds_dict = {}  # key: gpu model
+mi300_nps_dict = {}  # key: gpu model (NOTE: key can also be architecture)
+mi300_chip_id_dict = {}  # key: chip id (int)
+
+
+# ----------------------------
+# Data Class handling to preserve the hierarchical gpu information
+# ----------------------------
+
+
+@dataclass
+class ComputePartitionMode:
+    """
+    Represents the compute partition mode.
+    """
+
+    def __init__(self, num_xcds=None):
+        self.__num_xcds = num_xcds
+
+    def get_num_xcds(self):
+        return self.__num_xcds
+
+
+class Singleton(object):
+    _instances = {}
+
+    def __new__(class_, *args, **kwargs):
+        if class_ not in class_._instances:
+            class_._instances[class_] = super(Singleton, class_).__new__(
+                class_, *args, **kwargs
+            )
+        return class_._instances[class_]
+
+
+@dataclass
+class MIGPU(Singleton):
+    """
+    Singleton class representing the detected MI GPU of current system.
+    Ensures only one instance exists.
+    """
+
+    _instance = None  # Class variable to hold the single instance
+
+    def __new__(cls, *args, **kwargs):
+        if cls._instance is None:
+            cls._instance = super(MIGPU, cls).__new__(cls)
+            cls._instance.mi_gpu_spec = []  # Initialize the instance attribute
+        return cls._instance
+
+    def __init__(
+        self,
+        gpu_series,
+        gpu_arch,
+        gpu_model,
+        chip_id=None,
+        mi300_arch=None,
+        num_xcds=None,
+    ):
+        """
+        gpu series, gpu_arch and gpu_model information must be available for a given MI GPU.
+        gpu series (str)
+        gpu_arch (str)
+        gpu_model (str)
+        """
+        # gpu_series (str): The GPU series name (e.g., 'mi50', 'mi100', 'mi200', 'mi300')
+        self.gpu_series = gpu_series
+        self.gpu_arch = gpu_arch
+        self.gpu_model = gpu_model
+        self.chip_id = chip_id
+        self.mi300_arch = mi300_arch
+        self.compute_partition = ComputePartitionMode(num_xcds)
+
+        self.is_mi300 = True if self.mi300_arch is not None else False
+
+    def __post_init__(self):
+        if self.is_mi300:
+            # NOTE: currently, all mi300 series gpus shall have compute partition information
+            if self.compute_partition is None:
+                logging.warning(
+                    "[MIGPU post init] mi300 gpu detected, but no num_xcd/compute partition data detected!!!"
+                )
+
+    def set_chip_id(self, chip_id):
+        self.chip_id = chip_id
+
+    def set_mi300_arch(self, mi300_arch, num_xcds):
+        """
+        All mi300 series gpus shall have compute partition information.
+        """
+        if num_xcds is None:
+            logging.warning(
+                "[MIGPU post init] mi300 gpu detected, but no num_xcd/compute partition data detected!!!"
+            )
+
+        self.mi300_arch = mi300_arch
+        self.compute_partition = ComputePartitionMode(num_xcds)
+
+    def get_gpu_series(self):
+        return self.gpu_series
+
+    def get_gpu_arch(self):
+        return self.gpu_arch
+
+    def get_gpu_model(self):
+        return self.gpu_model
+
+    def get_chip_id(self):
+        return self.chip_id
+
+    def get_mi300_arch(self):
+        return self.mi300_arch
+
+    def get_compute_partition(self):
+        return self.compute_partition
+
+
+# ----------------------------
+# YAML Parsing and Data Handling
+# ----------------------------
+
+
+def load_yaml(file_path: str) -> Dict[str, Any]:
+    """
+    Loads MI GPU YAML data /util into a Python dictionary.
+
+    Args:
+        file_path (str): The path to the YAML file.
+
+    Returns:
+        Dict[str, Any]: Parsed YAML data as a nested dictionary.
+                        Exit with console error if an error occurs.
+    """
+    logging.debug("[load_yaml]")
+    try:
+        with open(file_path, "r") as file:
+            data = yaml.safe_load(file)
+            return data
+    except FileNotFoundError:
+        logging.error(f"Error: The file '{file_path}' was not found.")
+    except yaml.YAMLError as exc:
+        logging.error(f"Error parsing YAML file '{file_path}': {exc}")
+    except Exception as e:
+        logging.error(
+            f"An unexpected error occurred while loading YAML file '{file_path}': {e}"
+        )
+
+
+def parse_mi_gpu_spec():
+    """
+    Parse out mi gpu data from yaml file and store in memory.
+    MI GPUs
+      |-- series
+          |-- architecture (list)
+              |-- models
+                  |-- chip_ids
+                  |-- mi300_arch
+                  |-- partition_mode
+    """
+
+    current_dir = os.path.dirname(__file__)
+    yaml_file_path = os.path.join(current_dir, "mi_gpu_spec.yaml")
+
+    # Load the YAML data
+    yaml_data = load_yaml(yaml_file_path)
+    mi300_models_dict = {}
+
+    for mi_index, mi_series in MI_CONSTANS.items():
+        if mi_series != MI_CONSTANS[MI300]:
+            logging.debug("[parse_mi_gpu_spec] Processing series: %s" % mi_series)
+            for key, value in yaml_data.items():
+                # parse out gpu series and gpu model information for mi50, 100, 200
+                curr_gpu_arch = value[mi_index]["gpu_archs"][0]["gpu_arch"]
+                gpu_series_dict[curr_gpu_arch] = mi_series
+                gpu_model_dict[curr_gpu_arch] = []
+                for models in value[mi_index]["gpu_archs"][0]["models"]:
+                    gpu_model_dict[curr_gpu_arch].append(models["gpu_model"])
+        elif mi_series == MI_CONSTANS[MI300]:
+            # MI300 requires specific processing
+            for key, value in yaml_data.items():
+                mi300_gpu_archs_list = []
+                # NOTE: only MI300 have multiple architectures
+                for archs in value[MI300]["gpu_archs"]:
+                    curr_gpu_arch = archs["gpu_arch"]
+                    mi300_gpu_archs_list.append(curr_gpu_arch)
+                    gpu_series_dict[curr_gpu_arch] = mi_series
+
+                for idx, arch in enumerate(mi300_gpu_archs_list):
+                    mi300_models_dict[arch] = []
+                    for models in value[MI300]["gpu_archs"][idx]["models"]:
+                        gpu_model = models["gpu_model"]
+
+                        # NOTE: mi300 architecture is available for all mi300 gpu models
+                        mi300_archs_dict[gpu_model] = models["mi300_arch"]["architecture"]
+
+                        # NOTE: compute partition mode num xcds is available for all mi300 gpu models
+                        mi300_num_xcds_dict[gpu_model] = models["mi300_arch"][
+                            "partition_mode"
+                        ]["compute_partition_mode"]["num_xcds"]
+
+                        # NOTE: memory partition mode nps is available for all mi300 gpu models
+                        mi300_nps_dict[gpu_model] = models["mi300_arch"][
+                            "partition_mode"
+                        ]["memory_partition_mode"]
+
+                        if not models["chip_ids"]["local"] is None:
+                            # save chip_id, gpu_model pair if chip id is available
+                            # NOTE: chip id is available for all gfx942 machines
+                            mi300_chip_id_dict[models["chip_ids"]["local"]] = models[
+                                "gpu_model"
+                            ]
+                        mi300_models_dict[arch].append(gpu_model)
+
+    gpu_model_dict.update(mi300_models_dict)
+
+
+def get_gpu_series_dict():
+    if not gpu_series_dict:
+        logging.error(
+            "gpu_series_dict not yet populated, did you run parse_mi_gpu_spec()?"
+        )
+        return None
+    return gpu_series_dict
+
+
+def get_gpu_series(gpu_arch_):
+    if not gpu_series_dict:
+        logging.error(
+            "gpu_series_dict not yet populated, did you run parse_mi_gpu_spec()?"
+        )
+        return None
+
+    # Normalize the key by checking both the raw and lowercase versions
+    gpu_series = gpu_series_dict.get(gpu_arch_) or gpu_series_dict.get(gpu_arch_.lower())
+    if gpu_series:
+        return gpu_series
+
+    logging.warning(f"No matching gpu series found for gpu arch: {gpu_arch_}")
+    return None
+
+
+def get_gpu_model(gpu_arch_, chip_id_):
+    # Check that gpu_model_dict is populated first
+    if not gpu_model_dict:
+        logging.error(
+            "gpu_model_dict not yet populated. Did you run parse_mi_gpu_spec()?"
+        )
+        return None
+
+    gpu_arch_lower = gpu_arch_.lower()
+
+    # Handle gfx942 with chip_id mapping
+    if gpu_arch_lower == "gfx942":
+        if chip_id_ and int(chip_id_) in mi300_chip_id_dict:
+            gpu_model = mi300_chip_id_dict.get(int(chip_id_))
+        else:
+            logging.warning(f"No gpu model found for chip id: {chip_id_}")
+            return None
+
+    # Otherwise use gpu_model_dict mapping for other mi architectures
+    elif gpu_arch_lower in gpu_model_dict:
+        # NOTE: take the first element works for now
+        gpu_model = gpu_model_dict[gpu_arch_lower][0]
+    else:
+        logging.warning(f"No gpu model found for gpu arch: {gpu_arch_lower}")
+        return None
+
+    if not gpu_model:
+        logging.warning(f"No gpu model found for gpu arch: {gpu_arch_lower}")
+        return None
+
+    return gpu_model
+
+
+def get_mi300_archs_dict():
+    if not mi300_archs_dict:
+        logging.error(
+            "mi300_archs_dict not yet populated, did you run parse_mi_gpu_spec()?"
+        )
+        return None
+    return mi300_archs_dict
+
+
+def get_mi300_num_xcds(gpu_model_, compute_partition_):
+    if not mi300_num_xcds_dict:
+        logging.error(
+            "mi300_num_xcds_dict not yet populated, did you run parse_mi_gpu_spec()?"
+        )
+        return None
+
+    gpu_model_lower = gpu_model_.lower()
+    partition_lower = compute_partition_.lower()
+
+    if gpu_model_lower not in mi300_num_xcds_dict:
+        logging.info(f"Current system is not a mi300 system: {gpu_model_}")
+        return None
+
+    model_dict = mi300_num_xcds_dict[gpu_model_lower]
+    if partition_lower not in model_dict:
+        logging.info(f"Unknown compute partition: {compute_partition_}")
+        return None
+
+    num_xcds = model_dict[partition_lower]
+    if not num_xcds:
+        logging.warning(
+            "Unknown compute partition found for %s / %s", compute_partition_, gpu_model_
+        )
+        return None
+
+    return num_xcds
+
+
+def get_mi300_chip_id_dict():
+    if mi300_chip_id_dict:
+        return mi300_chip_id_dict
+    else:
+        logging.error(
+            "mi300_chip_id_dict not yet populated, did you run parse_mi_gpu_spec()?"
+        )
diff --git a/src/utils/mi_gpu_spec.yaml b/src/utils/mi_gpu_spec.yaml
new file mode 100644
index 0000000000..c481ffd963
--- /dev/null
+++ b/src/utils/mi_gpu_spec.yaml
@@ -0,0 +1,164 @@
+# --------------------------------------------------------------------------------
+#
+# This yaml file tracks MI gpu spec in a tree structure.
+#
+# *It is important to note that this file only tracks the common specs for MI GPU series.*
+# *For example, the CU #s are based on information retrieved from other tools.*
+# **
+#
+# MI GPUs
+#   |-- series: the specific MI series; mi50, mi100, mi200, mi300
+#       |-- architecture: currently, only mi300 gpus hold different architectures
+#           |-- models
+#               |-- chip_ids: chip id is specific to the environment the gpu is being used on
+#               |-- mi300_arch: mi300 specific architectures; mi300a, mi300x
+#                   |-- partition_mode: currently, only mi300 gpus hold partition mode information
+#                                       two types: compute partition mode, memory partition mode,
+#                                       currently only mi300 gpus contains compute partition mode information on number of xcds
+#
+# --------------------------------------------------------------------------------
+
+mi_gpu_spec:
+  - gpu_series: mi50
+    gpu_archs:
+      - gpu_arch: gfx906
+        models:
+          - gpu_model: mi50
+            mi300_arch:
+              architecture: null
+              partition_mode: null
+            chip_ids:
+              local: null
+          - gpu_model: mi60
+            mi300_arch:
+              architecture: null
+              partition_mode: null
+            chip_ids:
+              local: null
+
+  - gpu_series: mi100
+    gpu_archs:
+      - gpu_arch: gfx908
+        models:
+          - gpu_model: mi100
+            mi300_arch:
+              architecture: null
+              partition_mode: null
+            chip_ids:
+              local: null
+
+  - gpu_series: mi200
+    gpu_archs:
+      - gpu_arch: gfx90a
+        models:
+          - gpu_model: mi210
+            mi300_arch:
+              architecture: null
+              partition_mode: null
+            chip_ids:
+              local: null
+          - gpu_model: mi250
+            mi300_arch:
+              architecture: null
+              partition_mode: null
+            chip_ids:
+              local: null
+          - gpu_model: mi250x
+            mi300_arch:
+              architecture: null
+              partition_mode: null
+            chip_ids:
+              local: null
+
+  - gpu_series: mi300
+    gpu_archs:
+      - gpu_arch: gfx940
+        models:
+          - gpu_model: mi300a_a0
+            mi300_arch:
+              architecture: mi300a
+              partition_mode:
+                compute_partition_mode:
+                  num_xcds:
+                    spx: 6
+                    dpx: null
+                    tpx: 2
+                    qpx: null
+                    cpx: null
+                memory_partition_mode:
+                  nps4: {tpx}
+                  nps1: {spx, tpx}
+            chip_ids:
+              local: null
+
+      - gpu_arch: gfx941
+        models:
+          - gpu_model: mi300x_a0
+            mi300_arch:
+              architecture: mi300x
+              partition_mode:
+                compute_partition_mode:
+                  num_xcds:
+                    spx: 8
+                    dpx: 4
+                    tpx: null
+                    qpx: 2
+                    cpx: 1
+                memory_partition_mode:
+                  nps4: {qpx, cpx}
+                  nps1: {spx, qpx, cpx}
+            chip_ids:
+              local: null
+
+      - gpu_arch: gfx942
+        models:
+          - gpu_model: mi300a_a1
+            mi300_arch:
+              architecture: mi300a
+              partition_mode:
+                compute_partition_mode:
+                  num_xcds:
+                    spx: 6
+                    dpx: null
+                    tpx: 2
+                    qpx: null
+                    cpx: null
+                memory_partition_mode:
+                  nps4: {tpx}
+                  nps1: {spx, tpx}
+            chip_ids:
+              local: 29856
+
+          - gpu_model: mi300x_a1
+            mi300_arch:
+              architecture: mi300x
+              partition_mode:
+                compute_partition_mode:
+                  num_xcds:
+                    spx: 8
+                    dpx: 4
+                    tpx: null
+                    qpx: 2
+                    cpx: 1
+                memory_partition_mode:
+                  nps4: {qpx, cpx}
+                  nps1: {spx, qpx, cpx}
+            chip_ids:
+              local: 29857
+
+          - gpu_model: mi308x
+            mi300_arch:
+              architecture: mi308x
+              partition_mode:
+                compute_partition_mode:
+                  num_xcds:
+                    spx: 4
+                    dpx: 2
+                    tpx: null
+                    qpx: null
+                    cpx: 1
+                memory_partition_mode:
+                  nps4: {cpx}
+                  nps1: {spx, dpx, cpx}
+            chip_ids:
+              local: 29858
diff --git a/src/utils/specs.py b/src/utils/specs.py
index 1a457e0aa0..f29e724f5a 100644
--- a/src/utils/specs.py
+++ b/src/utils/specs.py
@@ -29,6 +29,7 @@ import os
 import re
 import socket
 import subprocess
+import sys
 from dataclasses import dataclass, field, fields
 from datetime import datetime
 from math import ceil
@@ -37,6 +38,7 @@ from pathlib import Path as path
 import pandas as pd
 
 import config
+from utils.mi_gpu_spec import get_gpu_series_dict, get_mi300_chip_id_dict
 from utils.tty import get_table_string
 from utils.utils import (
     console_debug,
@@ -60,21 +62,40 @@ VERSION_LOC = [
 
 
 def detect_arch(_rocminfo):
-    from rocprof_compute_base import SUPPORTED_ARCHS
-
     for idx1, linetext in enumerate(_rocminfo):
-        gpu_arch = search(r"^\s*Name\s*:\s+ ([a-zA-Z0-9]+)\s*$", linetext)
-        if gpu_arch in SUPPORTED_ARCHS.keys():
+        # NOTE: currently supported socs are gfx archs only
+        gpu_arch = search(r"^\s*Name\s*:\s* ([Gg][Ff][Xx][a-zA-Z0-9]+).*\s*$", linetext)
+        if gpu_arch in get_gpu_series_dict().keys():
             break
-        if str(gpu_arch) in SUPPORTED_ARCHS.keys():
+        if str(gpu_arch) in get_gpu_series_dict().keys():
             gpu_arch = str(gpu_arch)
             break
-    if not gpu_arch in SUPPORTED_ARCHS.keys():
-        console_error("Cannot find a supported arch in rocminfo")
+    if not gpu_arch in get_gpu_series_dict().keys():
+        console_error("Cannot find a supported arch in rocminfo: " + str(gpu_arch))
     else:
         return (gpu_arch, idx1)
 
 
+def detect_gpu_chip_id(_rocminfo):
+    for idx1, linetext in enumerate(_rocminfo):
+        # NOTE: current supported socs only have numbers in Chip ID
+        gpu_chip_id = search(r"^\s*Chip ID\s*:\s* ([0-9]+).*\s*$", linetext)
+        if gpu_chip_id and int(gpu_chip_id) in get_mi300_chip_id_dict().keys():
+            gpu_chip_id = str(gpu_chip_id)
+            break
+        if str(gpu_chip_id) in get_mi300_chip_id_dict().keys():
+            gpu_chip_id = str(gpu_chip_id)
+            break
+    if not gpu_chip_id:
+        console_warning("No Chip ID detected: " + str(gpu_chip_id))
+    elif (
+        gpu_chip_id not in get_mi300_chip_id_dict().keys()
+        and int(gpu_chip_id) not in get_mi300_chip_id_dict().keys()
+    ):
+        console_warning("Unknown Chip ID detected: " + str(gpu_chip_id))
+    return gpu_chip_id
+
+
 # Custom decorator to mimic the behavior of kw_only found in Python 3.10
 def kw_only(cls):
     def __init__(self, *args, **kwargs):
@@ -163,6 +184,7 @@ def generate_machine_specs(args, sysinfo: dict = None):
     _rocminfo = rocminfo_full.split("\n")
     gpu_arch, idx = detect_arch(_rocminfo)
     _rocminfo = _rocminfo[idx + 1 :]  # update rocminfo for target section
+    gpu_chip_id = detect_gpu_chip_id(_rocminfo)
     specs = MachineSpecs(
         version=specs_version,
         timestamp=timestamp,
@@ -180,7 +202,9 @@ def generate_machine_specs(args, sysinfo: dict = None):
         compute_partition=compute_partition,
         memory_partition=memory_partition,
         gpu_arch=gpu_arch,
+        gpu_chip_id=gpu_chip_id,
     )
+
     # Load above SoC specs via module import
     try:
         soc_module = importlib.import_module("rocprof_compute_soc.soc_" + specs.gpu_arch)
@@ -367,6 +391,14 @@ class MachineSpecs:
             "name": "GPU Arch",
         },
     )
+    gpu_chip_id: str = field(
+        default=None,
+        metadata={
+            "doc": "The Chip ID of the accelerators/GPUs in the system.",
+            "name": "Chip ID",
+            "optional": True,
+        },
+    )
     gpu_l1: str = field(
         default=None,
         metadata={
@@ -420,13 +452,6 @@ class MachineSpecs:
             "name": "Workgroup Max Size",
         },
     )
-    chip_id: str = field(
-        default=None,
-        metadata={
-            "doc": "The Chip ID of the accelerators/GPUs in the system.",
-            "name": "Chip ID",
-        },
-    )
     max_waves_per_cu: str = field(
         default=None,
         metadata={
@@ -661,11 +686,9 @@ def total_sqc(archname, numCUs, numSEs):
 
 
 def total_l2_banks(archname, L2Banks, compute_partition):
-    # Fixme: support all supported partitioning mode
-    # Fixme: "name" is a bad name!
-    totalL2Banks = L2Banks
     xcds = total_xcds(archname, compute_partition)
-    return L2Banks * xcds
+    totalL2Banks = L2Banks * xcds
+    return totalL2Banks
 
 
 if __name__ == "__main__":
diff --git a/src/utils/utils.py b/src/utils/utils.py
index 2cbe942cf9..c50c532a82 100644
--- a/src/utils/utils.py
+++ b/src/utils/utils.py
@@ -42,6 +42,7 @@ from pathlib import Path as path
 import pandas as pd
 
 import config
+from utils.mi_gpu_spec import get_mi300_num_xcds
 
 rocprof_cmd = ""
 rocprof_args = ""
@@ -686,7 +687,7 @@ def run_prof(
     if new_env and not using_v3():
         # flatten tcc for applicable mi300 input
         f = path(workload_dir + "/out/pmc_1/results_" + fbase + ".csv")
-        xcds = total_xcds(mspec.gpu_model, mspec.compute_partition)
+        xcds = get_mi300_num_xcds(mspec.gpu_model, mspec.compute_partition)
         df = flatten_tcc_info_across_xcds(f, xcds, int(mspec._l2_banks))
         df.to_csv(f, index=False)
 
@@ -835,6 +836,7 @@ def replace_timestamps(workload_dir):
 def gen_sysinfo(
     workload_name, workload_dir, ip_blocks, app_cmd, skip_roof, roof_only, mspec, soc
 ):
+    console_debug("[gen_sysinfo]")
     df = mspec.get_class_members()
 
     # Append workload information to machine specs
@@ -1051,47 +1053,58 @@ def flatten_tcc_info_across_xcds(file, xcds, tcc_channel_per_xcd):
     return df
 
 
-def total_xcds(archname, compute_partition):
+def total_xcds(gpu_model, compute_partition):
+    """
+    Returns the number of xcds for a gpu model and compute_partition pair.
+    """
+
+    # For mi300 chips, return result from mi_gpu_spec
+    result = get_mi300_num_xcds(gpu_model, compute_partition)
+    if result:
+        return result
+
+    # For other systems, use manual check
     # check MI300 has a valid compute partition
-    mi300a_archs = ["mi300a_a0", "mi300a_a1"]
-    mi300x_archs = ["mi300x_a0", "mi300x_a1"]
-    mi308x_archs = ["mi308x"]
+    mi300a_model = ["mi300a_a0", "mi300a_a1"]
+    mi300x_model = ["mi300x_a0", "mi300x_a1"]
+    mi308x_model = ["mi308x"]
     if (
-        archname.lower() in mi300a_archs + mi300x_archs + mi308x_archs
+        gpu_model.lower() in mi300a_model + mi300x_model + mi308x_model
         and compute_partition == "NA"
     ):
-        console_error("Invalid compute partition found for {}".format(archname))
-    if archname.lower() not in mi300a_archs + mi300x_archs + mi308x_archs:
+        console_error("Invalid compute partition found for {}".format(gpu_model))
+
+    if gpu_model.lower() not in mi300a_model + mi300x_model + mi308x_model:
         return 1
     # from the whitepaper
     # https://www.amd.com/content/dam/amd/en/documents/instinct-tech-docs/white-papers/amd-cdna-3-white-paper.pdf
     if compute_partition.lower() == "spx":
-        if archname.lower() in mi300a_archs:
+        if gpu_model.lower() in mi300a_model:
             return 6
-        if archname.lower() in mi300x_archs:
+        if gpu_model.lower() in mi300x_model:
             return 8
-        if archname.lower() in mi308x_archs:
+        if gpu_model.lower() in mi308x_model:
             return 4
     if compute_partition.lower() == "tpx":
-        if archname.lower() in mi300a_archs:
+        if gpu_model.lower() in mi300a_model:
             return 2
     if compute_partition.lower() == "dpx":
-        if archname.lower() in mi300x_archs:
+        if gpu_model.lower() in mi300x_model:
             return 4
-        if archname.lower() in mi308x_archs:
+        if gpu_model.lower() in mi308x_model:
             return 2
     if compute_partition.lower() == "qpx":
-        if archname.lower() in mi300x_archs:
+        if gpu_model.lower() in mi300x_model:
             return 2
     if compute_partition.lower() == "cpx":
-        if archname.lower() in mi300x_archs:
-            return 2
-        if archname.lower() in mi308x_archs:
+        if gpu_model.lower() in mi300x_model:
+            return 1
+        if gpu_model.lower() in mi308x_model:
             return 1
     # TODO implement other archs here as needed
     console_error(
         "Unknown compute partition / arch found for {} / {}".format(
-            compute_partition, archname
+            compute_partition, gpu_model
         )
     )