Carry over fixed numSQCs calc from #247
Co-authored-by: Nick Curtis <nicholas.curtis@amd.com> Signed-off-by: colramos-amd <colramos@amd.com>
Этот коммит содержится в:
коммит произвёл
Cole Ramos
родитель
3ce140d04b
Коммит
2001908fd8
@@ -90,7 +90,7 @@ class OmniSoC_Base():
|
||||
|
||||
@demarcate
|
||||
def populate_mspec(self):
|
||||
from utils.specs import search, run
|
||||
from utils.specs import search, run, total_sqc
|
||||
|
||||
if not hasattr(self._mspec, "_rocminfo"):
|
||||
return
|
||||
@@ -144,6 +144,13 @@ class OmniSoC_Base():
|
||||
self._mspec.max_waves_per_cu = key
|
||||
break
|
||||
|
||||
self._mspec.sqc_per_gpu = str(
|
||||
total_sqc(
|
||||
self._mspec.gpu_arch,
|
||||
self._mspec.cu_per_gpu,
|
||||
self._mspec.se_per_gpu
|
||||
))
|
||||
|
||||
# we get the max mclk from rocm-smi --showmclkrange
|
||||
rocm_smi_mclk = run(["rocm-smi", "--showmclkrange"], exit_on_error=True)
|
||||
self._mspec.max_mclk = search(r'(\d+)Mhz\s*$', rocm_smi_mclk)
|
||||
|
||||
@@ -53,7 +53,6 @@ class gfx906_soc (OmniSoC_Base):
|
||||
# Set arch specific specs
|
||||
self._mspec.L2Banks = 16
|
||||
self._mspec.lds_banks_per_cu = 32
|
||||
self._mspec.sqc_per_gpu = 15
|
||||
self._mspec.pipes_per_gpu = 4
|
||||
|
||||
# -----------------------
|
||||
|
||||
@@ -53,7 +53,6 @@ class gfx908_soc (OmniSoC_Base):
|
||||
# Set arch specific specs
|
||||
self._mspec.L2Banks = 32
|
||||
self._mspec.lds_banks_per_cu = 32
|
||||
self._mspec.sqc_per_gpu = 30
|
||||
self._mspec.pipes_per_gpu = 4
|
||||
# --showmclkrange is broken in Mi100, hardcode freq
|
||||
self._mspec.max_mclk = 1200
|
||||
|
||||
@@ -59,7 +59,6 @@ class gfx90a_soc (OmniSoC_Base):
|
||||
# Set arch specific specs
|
||||
self._mspec.L2Banks = 32
|
||||
self._mspec.lds_banks_per_cu = 32
|
||||
self._mspec.sqc_per_gpu = 56
|
||||
self._mspec.pipes_per_gpu = 4
|
||||
|
||||
# -----------------------
|
||||
|
||||
@@ -64,7 +64,6 @@ class gfx940_soc (OmniSoC_Base):
|
||||
# Set arch specific specs
|
||||
self._mspec.L2Banks = 16
|
||||
self._mspec.lds_banks_per_cu = 32
|
||||
self._mspec.sqc_per_gpu = 56
|
||||
self._mspec.pipes_per_gpu = 4
|
||||
|
||||
# -----------------------
|
||||
|
||||
@@ -64,7 +64,6 @@ class gfx941_soc (OmniSoC_Base):
|
||||
# Set arch specific specs
|
||||
self._mspec.L2Banks = 16
|
||||
self._mspec.lds_banks_per_cu = 32
|
||||
self._mspec.sqc_per_gpu = 56
|
||||
self._mspec.pipes_per_gpu = 4
|
||||
|
||||
# -----------------------
|
||||
|
||||
@@ -64,7 +64,6 @@ class gfx942_soc (OmniSoC_Base):
|
||||
# Set arch specific specs
|
||||
self._mspec.L2Banks = 16
|
||||
self._mspec.lds_banks_per_cu = 32
|
||||
self._mspec.sqc_per_gpu = 56
|
||||
self._mspec.pipes_per_gpu = 4
|
||||
|
||||
# -----------------------
|
||||
|
||||
@@ -34,6 +34,7 @@ import logging
|
||||
import pandas as pd
|
||||
|
||||
from datetime import datetime
|
||||
from math import ceil
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path as path
|
||||
from textwrap import dedent
|
||||
@@ -290,6 +291,14 @@ def total_l2_banks(archname, L2Banks, memory_partition):
|
||||
archname, memory_partition)
|
||||
return str(totalL2Banks)
|
||||
|
||||
def total_sqc(archname, numCUs, numSEs):
|
||||
cu_per_se = float(numCUs) / float(numSEs)
|
||||
sq_per_se = cu_per_se / 2
|
||||
if archname.lower() in ['mi50', 'mi100']:
|
||||
sq_per_se = cu_per_se / 3
|
||||
sq_per_se = ceil(sq_per_se)
|
||||
return int(sq_per_se) * int(numSEs)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print(MachineSpecs())
|
||||
|
||||
Ссылка в новой задаче
Block a user