diff --git a/src/omniperf_soc/soc_base.py b/src/omniperf_soc/soc_base.py index 7c3b76a906..38b08b4dd1 100644 --- a/src/omniperf_soc/soc_base.py +++ b/src/omniperf_soc/soc_base.py @@ -90,7 +90,7 @@ class OmniSoC_Base(): @demarcate def populate_mspec(self): - from utils.specs import search, run + from utils.specs import search, run, total_sqc if not hasattr(self._mspec, "_rocminfo"): return @@ -144,6 +144,13 @@ class OmniSoC_Base(): self._mspec.max_waves_per_cu = key break + self._mspec.sqc_per_gpu = str( + total_sqc( + self._mspec.gpu_arch, + self._mspec.cu_per_gpu, + self._mspec.se_per_gpu + )) + # we get the max mclk from rocm-smi --showmclkrange rocm_smi_mclk = run(["rocm-smi", "--showmclkrange"], exit_on_error=True) self._mspec.max_mclk = search(r'(\d+)Mhz\s*$', rocm_smi_mclk) diff --git a/src/omniperf_soc/soc_gfx906.py b/src/omniperf_soc/soc_gfx906.py index 0bd98430b3..a42a5b4ba2 100644 --- a/src/omniperf_soc/soc_gfx906.py +++ b/src/omniperf_soc/soc_gfx906.py @@ -53,7 +53,6 @@ class gfx906_soc (OmniSoC_Base): # Set arch specific specs self._mspec.L2Banks = 16 self._mspec.lds_banks_per_cu = 32 - self._mspec.sqc_per_gpu = 15 self._mspec.pipes_per_gpu = 4 # ----------------------- diff --git a/src/omniperf_soc/soc_gfx908.py b/src/omniperf_soc/soc_gfx908.py index fd2171f925..4833cebf9f 100644 --- a/src/omniperf_soc/soc_gfx908.py +++ b/src/omniperf_soc/soc_gfx908.py @@ -53,7 +53,6 @@ class gfx908_soc (OmniSoC_Base): # Set arch specific specs self._mspec.L2Banks = 32 self._mspec.lds_banks_per_cu = 32 - self._mspec.sqc_per_gpu = 30 self._mspec.pipes_per_gpu = 4 # --showmclkrange is broken in Mi100, hardcode freq self._mspec.max_mclk = 1200 diff --git a/src/omniperf_soc/soc_gfx90a.py b/src/omniperf_soc/soc_gfx90a.py index eafc19c55f..f1dd567189 100644 --- a/src/omniperf_soc/soc_gfx90a.py +++ b/src/omniperf_soc/soc_gfx90a.py @@ -59,7 +59,6 @@ class gfx90a_soc (OmniSoC_Base): # Set arch specific specs self._mspec.L2Banks = 32 self._mspec.lds_banks_per_cu = 32 - self._mspec.sqc_per_gpu = 56 self._mspec.pipes_per_gpu = 4 # ----------------------- diff --git a/src/omniperf_soc/soc_gfx940.py b/src/omniperf_soc/soc_gfx940.py index 712fbe90c1..2640ede437 100644 --- a/src/omniperf_soc/soc_gfx940.py +++ b/src/omniperf_soc/soc_gfx940.py @@ -64,7 +64,6 @@ class gfx940_soc (OmniSoC_Base): # Set arch specific specs self._mspec.L2Banks = 16 self._mspec.lds_banks_per_cu = 32 - self._mspec.sqc_per_gpu = 56 self._mspec.pipes_per_gpu = 4 # ----------------------- diff --git a/src/omniperf_soc/soc_gfx941.py b/src/omniperf_soc/soc_gfx941.py index 4cb2f81860..094998183b 100644 --- a/src/omniperf_soc/soc_gfx941.py +++ b/src/omniperf_soc/soc_gfx941.py @@ -64,7 +64,6 @@ class gfx941_soc (OmniSoC_Base): # Set arch specific specs self._mspec.L2Banks = 16 self._mspec.lds_banks_per_cu = 32 - self._mspec.sqc_per_gpu = 56 self._mspec.pipes_per_gpu = 4 # ----------------------- diff --git a/src/omniperf_soc/soc_gfx942.py b/src/omniperf_soc/soc_gfx942.py index ac33c0ee44..c6a79fe259 100644 --- a/src/omniperf_soc/soc_gfx942.py +++ b/src/omniperf_soc/soc_gfx942.py @@ -64,7 +64,6 @@ class gfx942_soc (OmniSoC_Base): # Set arch specific specs self._mspec.L2Banks = 16 self._mspec.lds_banks_per_cu = 32 - self._mspec.sqc_per_gpu = 56 self._mspec.pipes_per_gpu = 4 # ----------------------- diff --git a/src/utils/specs.py b/src/utils/specs.py index 21352b46a9..1991b111a9 100644 --- a/src/utils/specs.py +++ b/src/utils/specs.py @@ -34,6 +34,7 @@ import logging import pandas as pd from datetime import datetime +from math import ceil from dataclasses import dataclass from pathlib import Path as path from textwrap import dedent @@ -290,6 +291,14 @@ def total_l2_banks(archname, L2Banks, memory_partition): archname, memory_partition) return str(totalL2Banks) +def total_sqc(archname, numCUs, numSEs): + cu_per_se = float(numCUs) / float(numSEs) + sq_per_se = cu_per_se / 2 + if archname.lower() in ['mi50', 'mi100']: + sq_per_se = cu_per_se / 3 + sq_per_se = ceil(sq_per_se) + return int(sq_per_se) * int(numSEs) + if __name__ == "__main__": print(MachineSpecs())