Carry over fixed numSQCs calc from #247

Co-authored-by: Nick Curtis <nicholas.curtis@amd.com>
Signed-off-by: colramos-amd <colramos@amd.com>
Этот коммит содержится в:
colramos-amd
2024-02-21 17:00:39 -06:00
коммит произвёл Cole Ramos
родитель 3ce140d04b
Коммит 2001908fd8
8 изменённых файлов: 17 добавлений и 7 удалений
+8 -1
Просмотреть файл
@@ -90,7 +90,7 @@ class OmniSoC_Base():
@demarcate
def populate_mspec(self):
from utils.specs import search, run
from utils.specs import search, run, total_sqc
if not hasattr(self._mspec, "_rocminfo"):
return
@@ -144,6 +144,13 @@ class OmniSoC_Base():
self._mspec.max_waves_per_cu = key
break
self._mspec.sqc_per_gpu = str(
total_sqc(
self._mspec.gpu_arch,
self._mspec.cu_per_gpu,
self._mspec.se_per_gpu
))
# we get the max mclk from rocm-smi --showmclkrange
rocm_smi_mclk = run(["rocm-smi", "--showmclkrange"], exit_on_error=True)
self._mspec.max_mclk = search(r'(\d+)Mhz\s*$', rocm_smi_mclk)
-1
Просмотреть файл
@@ -53,7 +53,6 @@ class gfx906_soc (OmniSoC_Base):
# Set arch specific specs
self._mspec.L2Banks = 16
self._mspec.lds_banks_per_cu = 32
self._mspec.sqc_per_gpu = 15
self._mspec.pipes_per_gpu = 4
# -----------------------
-1
Просмотреть файл
@@ -53,7 +53,6 @@ class gfx908_soc (OmniSoC_Base):
# Set arch specific specs
self._mspec.L2Banks = 32
self._mspec.lds_banks_per_cu = 32
self._mspec.sqc_per_gpu = 30
self._mspec.pipes_per_gpu = 4
# --showmclkrange is broken in Mi100, hardcode freq
self._mspec.max_mclk = 1200
-1
Просмотреть файл
@@ -59,7 +59,6 @@ class gfx90a_soc (OmniSoC_Base):
# Set arch specific specs
self._mspec.L2Banks = 32
self._mspec.lds_banks_per_cu = 32
self._mspec.sqc_per_gpu = 56
self._mspec.pipes_per_gpu = 4
# -----------------------
-1
Просмотреть файл
@@ -64,7 +64,6 @@ class gfx940_soc (OmniSoC_Base):
# Set arch specific specs
self._mspec.L2Banks = 16
self._mspec.lds_banks_per_cu = 32
self._mspec.sqc_per_gpu = 56
self._mspec.pipes_per_gpu = 4
# -----------------------
-1
Просмотреть файл
@@ -64,7 +64,6 @@ class gfx941_soc (OmniSoC_Base):
# Set arch specific specs
self._mspec.L2Banks = 16
self._mspec.lds_banks_per_cu = 32
self._mspec.sqc_per_gpu = 56
self._mspec.pipes_per_gpu = 4
# -----------------------
-1
Просмотреть файл
@@ -64,7 +64,6 @@ class gfx942_soc (OmniSoC_Base):
# Set arch specific specs
self._mspec.L2Banks = 16
self._mspec.lds_banks_per_cu = 32
self._mspec.sqc_per_gpu = 56
self._mspec.pipes_per_gpu = 4
# -----------------------
+9
Просмотреть файл
@@ -34,6 +34,7 @@ import logging
import pandas as pd
from datetime import datetime
from math import ceil
from dataclasses import dataclass
from pathlib import Path as path
from textwrap import dedent
@@ -290,6 +291,14 @@ def total_l2_banks(archname, L2Banks, memory_partition):
archname, memory_partition)
return str(totalL2Banks)
def total_sqc(archname, numCUs, numSEs):
cu_per_se = float(numCUs) / float(numSEs)
sq_per_se = cu_per_se / 2
if archname.lower() in ['mi50', 'mi100']:
sq_per_se = cu_per_se / 3
sq_per_se = ceil(sq_per_se)
return int(sq_per_se) * int(numSEs)
if __name__ == "__main__":
print(MachineSpecs())