Responding to Karls review

Signed-off-by: colramos-amd <colramos@amd.com>
Cette révision appartient à :
colramos-amd
2024-01-24 12:44:42 -06:00
révisé par Cole Ramos
Parent bd600bce49
révision e5ac5ccf7d
10 fichiers modifiés avec 194 ajouts et 211 suppressions
+14 -12
Voir le fichier
@@ -29,32 +29,34 @@ import os
from pathlib import Path
import shutil
from utils.specs import get_machine_specs
from utils.utils import demarcate, trace_logger, get_version, get_version_display, detect_rocprof, error
from utils.utils import demarcate, trace_logger, get_version, get_version_display, detect_rocprof, error, get_submodules
from argparser import omniarg_parser
import config
import pandas as pd
import importlib
SUPPORTED_ARCHS = {
"gfx906": {"mi50": ["MI50", "MI60"]},
"gfx908": {"mi100": ["MI100"]},
"gfx90a": {"mi200": ["MI210", "MI250", "MI250X"]},
"gfx940": {"mi300": ["MI300A_A0"]},
"gfx941": {"mi300": ["MI300X_A0"]},
"gfx942": {"mi300": ["MI300A_A1", "MI300X_A1"]},
}
class Omniperf:
def __init__(self):
self.__args = None
self.__profiler_mode = None
self.__analyze_mode = None
self.__soc_name = set() #TODO: Should we make this a list? To accommodate analyze mode
self.__soc_name = set() # gpu name, or in case of analyze mode, all loaded gpu name(s)
self.__soc = dict() # set of key, value pairs. Where arch->OmniSoc() obj
self.__version = {
"ver": None,
"ver_pretty": None,
}
self.__options = {}
self.__supported_archs = {
"gfx906": {"mi50": ["MI50", "MI60"]},
"gfx908": {"mi100": ["MI100"]},
"gfx90a": {"mi200": ["MI210", "MI250", "MI250X"]},
"gfx940": {"mi300": ["MI300A_A0"]},
"gfx941": {"mi300": ["MI300X_A0"]},
"gfx942": {"mi300": ["MI300A_A1", "MI300X_A1"]},
}
self.__supported_archs = SUPPORTED_ARCHS
self.setup_logging()
self.set_version()
@@ -115,7 +117,7 @@ class Omniperf:
self.__version["ver"] = vData["version"]
self.__version["ver_pretty"] = get_version_display(vData["version"], vData["sha"], vData["mode"])
return
def detect_profiler(self):
#TODO:
# Currently this will only be called in profile mode
@@ -135,7 +137,7 @@ class Omniperf:
elif str(rocprof_cmd).endswith("rocprofv2"):
self.__profiler_mode = "rocprofv2"
else:
error("Incompatible profiler. Please review documentation.")
error("Incompatible profiler: %s. Supported profilers include: %s" % (rocprof_cmd, get_submodules('omniperf_profile')))
return
+1 -2
Voir le fichier
@@ -81,7 +81,6 @@ class OmniProfiler_Base():
# Remove old pmc_perf.txt input from perfmon dir
os.remove(workload_perfmon_dir + "/pmc_perf.txt")
# joins disparate runs less dumbly than rocprof
@demarcate
def join_prof(self, out=None):
"""Manually join separated rocprof runs
@@ -150,7 +149,7 @@ class OmniProfiler_Base():
logging.info(msg)
# now, we can:
#   A) throw away any of the "boring" duplicats
#   A) throw away any of the "boring" duplicates
df = df[
[
k
+13 -13
Voir le fichier
@@ -27,6 +27,18 @@ import config
from omniperf_soc.soc_base import OmniSoC_Base
from utils.utils import demarcate, error
SOC_PARAM = {
"numSE": 4,
"numCU": 60,
"numSIMD": 240,
"numWavesPerCU": 40,
"numSQC": 15,
"L2Banks": 16,
"LDSBanks": 32,
"Freq": 1725,
"mclk": 1000
}
class gfx906_soc (OmniSoC_Base):
def __init__(self,args):
super().__init__(args)
@@ -49,19 +61,7 @@ class gfx906_soc (OmniSoC_Base):
"TCC_channels": 16,
}
)
self.set_soc_param(
{
"numSE": 4,
"numCU": 60,
"numSIMD": 240,
"numWavesPerCU": 40,
"numSQC": 15,
"L2Banks": 16,
"LDSBanks": 32,
"Freq": 1725,
"mclk": 1000
}
)
self.set_soc_param(SOC_PARAM)
#-----------------------
# Required child methods
+13 -13
Voir le fichier
@@ -27,6 +27,18 @@ import config
from omniperf_soc.soc_base import OmniSoC_Base
from utils.utils import demarcate, error
SOC_PARAM = {
"numSE": 8,
"numCU": 120,
"numSIMD": 480,
"numWavesPerCU": 40,
"numSQC": 30,
"L2Banks": 32,
"LDSBanks": 32,
"Freq": 1502,
"mclk": 1200
}
class gfx908_soc (OmniSoC_Base):
def __init__(self,args):
super().__init__(args)
@@ -49,19 +61,7 @@ class gfx908_soc (OmniSoC_Base):
"TCC_channels": 32,
}
)
self.set_soc_param(
{
"numSE": 8,
"numCU": 120,
"numSIMD": 480,
"numWavesPerCU": 40,
"numSQC": 30,
"L2Banks": 32,
"LDSBanks": 32,
"Freq": 1502,
"mclk": 1200
}
)
self.set_soc_param(SOC_PARAM)
@demarcate
def get_profiler_options(self):
+13 -13
Voir le fichier
@@ -29,6 +29,18 @@ from utils.utils import demarcate, mibench
from roofline import Roofline
import logging
SOC_PARAM = {
"numSE": 8,
"numCU": 110,
"numSIMD": 440,
"numWavesPerCU": 32,
"numSQC": 56,
"L2Banks": 32,
"LDSBanks": 32,
"Freq": 1700,
"mclk": 1600
}
class gfx90a_soc (OmniSoC_Base):
def __init__(self,args):
super().__init__(args)
@@ -54,19 +66,7 @@ class gfx90a_soc (OmniSoC_Base):
"TCC_channels": 32
}
)
self.set_soc_param(
{
"numSE": 8,
"numCU": 110,
"numSIMD": 440,
"numWavesPerCU": 32,
"numSQC": 56,
"L2Banks": 32,
"LDSBanks": 32,
"Freq": 1700,
"mclk": 1600
}
)
self.set_soc_param(SOC_PARAM)
self.roofline_obj = Roofline(args)
#-----------------------
+13 -13
Voir le fichier
@@ -29,6 +29,18 @@ from utils.utils import demarcate, mibench
from roofline import Roofline
import logging
SOC_PARAM = {
"numSE": 8,
"numCU": 38,
"numSIMD": 4,
"numWavesPerCU": 32,
"numSQC": 56,
"L2Banks": 16,
"LDSBanks": 32,
"Freq": 1950,
"mclk": 1300
}
class gfx940_soc (OmniSoC_Base):
def __init__(self,args):
super().__init__(args)
@@ -55,19 +67,7 @@ class gfx940_soc (OmniSoC_Base):
"TCC_channels": 32
}
)
self.set_soc_param(
{
"numSE": 8,
"numCU": 38,
"numSIMD": 4,
"numWavesPerCU": 32,
"numSQC": 56,
"L2Banks": 16,
"LDSBanks": 32,
"Freq": 1950,
"mclk": 1300
}
)
self.set_soc_param(SOC_PARAM)
self.roofline_obj = Roofline(args)
#-----------------------
+13 -13
Voir le fichier
@@ -29,6 +29,18 @@ from utils.utils import demarcate, mibench
from roofline import Roofline
import logging
SOC_PARAM = {
"numSE": 8,
"numCU": 38,
"numSIMD": 4,
"numWavesPerCU": 32,
"numSQC": 56,
"L2Banks": 16,
"LDSBanks": 32,
"Freq": 1950,
"mclk": 1300
}
class gfx941_soc (OmniSoC_Base):
def __init__(self,args):
super().__init__(args)
@@ -55,19 +67,7 @@ class gfx941_soc (OmniSoC_Base):
"TCC_channels": 32
}
)
self.set_soc_param(
{
"numSE": 8,
"numCU": 38,
"numSIMD": 4,
"numWavesPerCU": 32,
"numSQC": 56,
"L2Banks": 16,
"LDSBanks": 32,
"Freq": 1950,
"mclk": 1300
}
)
self.set_soc_param(SOC_PARAM)
self.roofline_obj = Roofline(args)
#-----------------------
+13 -13
Voir le fichier
@@ -29,6 +29,18 @@ from utils.utils import demarcate, mibench
from roofline import Roofline
import logging
SOC_PARAM = {
"numSE": 8,
"numCU": 38,
"numSIMD": 4,
"numWavesPerCU": 32,
"numSQC": 56,
"L2Banks": 16,
"LDSBanks": 32,
"Freq": 1950,
"mclk": 1300
}
class gfx942_soc (OmniSoC_Base):
def __init__(self,args):
super().__init__(args)
@@ -55,19 +67,7 @@ class gfx942_soc (OmniSoC_Base):
"TCC_channels": 32
}
)
self.set_soc_param(
{
"numSE": 8,
"numCU": 38,
"numSIMD": 4,
"numWavesPerCU": 32,
"numSQC": 56,
"L2Banks": 16,
"LDSBanks": 32,
"Freq": 1950,
"mclk": 1300
}
)
self.set_soc_param(SOC_PARAM)
self.roofline_obj = Roofline(args)
#-----------------------
+83 -119
Voir le fichier
@@ -29,10 +29,13 @@ import re
import sys
import socket
import subprocess
import importlib
import logging
from dataclasses import dataclass
from pathlib import Path as path
from textwrap import dedent
from utils.utils import error
@dataclass
class MachineSpecs:
@@ -101,8 +104,26 @@ class MachineSpecs:
def gpuinfo():
# Local var only for rocminfo searching
gpu_list = {"gfx906", "gfx908", "gfx90a", "gfx940", "gfx941", "gfx942"}
from omniperf_base import SUPPORTED_ARCHS
gpu_info = {
"gpu_name": None,
"gpu_arch": None,
"L1": None,
"L2": None,
"max_sclk": None,
"num_CU": None,
"num_SIMD": None,
"num_SE": None,
"wave_size": None,
"grp_size": None,
"max_waves_per_cu": None,
"L2Banks": None,
"LDSBanks": None,
"numSQC": None,
"compute_partition": None,
"memory_partition": None,
}
# Fixme: find better way to differentiate cards, GPU vs APU, etc.
rocminfo_full = run(["rocminfo"])
@@ -110,135 +131,95 @@ def gpuinfo():
for idx1, linetext in enumerate(rocminfo):
gpu_arch = search(r"^\s*Name\s*:\s+ ([a-zA-Z0-9]+)\s*$", linetext)
if gpu_arch in gpu_list:
if gpu_arch in SUPPORTED_ARCHS.keys():
break
if str(gpu_arch) in gpu_list:
if str(gpu_arch) in SUPPORTED_ARCHS.keys():
gpu_arch = str(gpu_arch)
break
if not gpu_arch in gpu_list:
return (
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
)
if not gpu_arch in SUPPORTED_ARCHS.keys():
return gpu_info
L1, L2 = "", ""
gpu_info['L1'], gpu_info['L1'] = "", ""
for idx2, linetext in enumerate(rocminfo[idx1 + 1 :]):
key = search(r"^\s*L1:\s+ ([a-zA-Z0-9]+)\s*", linetext)
if key != None:
L1 = key
gpu_info['L1'] = key
continue
key = search(r"^\s*L2:\s+ ([a-zA-Z0-9]+)\s*", linetext)
if key != None:
L2 = key
gpu_info['L2'] = key
continue
key = search(r"^\s*Max Clock Freq\. \(MHz\):\s+([0-9]+)", linetext)
if key != None:
max_sclk = key
gpu_info['max_sclk'] = key
continue
key = search(r"^\s*Compute Unit:\s+ ([a-zA-Z0-9]+)\s*", linetext)
if key != None:
num_CU = key
gpu_info['num_CU'] = key
continue
key = search(r"^\s*SIMDs per CU:\s+ ([a-zA-Z0-9]+)\s*", linetext)
if key != None:
num_SIMD = key
gpu_info['num_SIMD'] = key
continue
key = search(r"^\s*Shader Engines:\s+ ([a-zA-Z0-9]+)\s*", linetext)
if key != None:
num_SE = key
gpu_info['num_SE'] = key
continue
key = search(r"^\s*Wavefront Size:\s+ ([a-zA-Z0-9]+)\s*", linetext)
if key != None:
wave_size = key
gpu_info['wave_size'] = key
continue
key = search(r"^\s*Workgroup Max Size:\s+ ([a-zA-Z0-9]+)\s*", linetext)
if key != None:
grp_size = key
gpu_info['grp_size'] = key
continue
key = search(r"^\s*Max Waves Per CU:\s+ ([a-zA-Z0-9]+)\s*", linetext)
if key != None:
max_waves_per_cu = key
gpu_info['max_waves_per_cu'] = key
break
gpu_name = ""
L2Banks = ""
LDSBanks = "32"
numSQC = ""
if gpu_arch == "gfx906":
gpu_name = "MI50"
L2Banks = "16"
numSQC = str(int(num_CU) // 4)
elif gpu_arch == "gfx908":
gpu_name = "MI100"
L2Banks = "32"
numSQC = "48"
elif gpu_arch == "gfx90a":
L2Banks = "32"
gpu_name = "MI200"
numSQC = "56"
elif gpu_arch == "gfx940":
gpu_name = "MI300A_A0"
L2Banks = "16"
numSQC = "56"
elif gpu_arch == "gfx941":
gpu_name = "MI300X_A0"
L2Banks = "16"
numSQC = "56"
elif (gpu_arch == "gfx942") and ("MI300A" in rocminfo_full):
try:
soc_module = importlib.import_module('omniperf_soc.soc_'+gpu_arch)
except ModuleNotFoundError as e:
error("Arch %s marked as supported, but couldn't find class implementation %s." % (gpu_arch, e))
# load arch specific info
try:
gpu_name = list(SUPPORTED_ARCHS[gpu_arch].keys())[0].upper()
gpu_info['L2Banks'] = str(soc_module.SOC_PARAM['L2Banks'])
gpu_info['numSQC'] = str(soc_module.SOC_PARAM['numSQC'])
gpu_info['LDSBanks'] = str(soc_module.SOC_PARAM['LDSBanks'])
except KeyError as e:
error("Incomplete class definition for %s. Expected a field for %s in SOC_PARAM." % (gpu_arch, e))\
# specify gpu name for gfx942 hardware
if gpu_name == "MI300":
gpu_name = list(SUPPORTED_ARCHS[gpu_arch].values())[0]
if (gpu_info['gpu_arch'] == "gfx942") and ("MI300A" in rocminfo_full):
gpu_name = "MI300A_A1"
L2Banks = "16"
numSQC = "56"
elif (gpu_arch == "gfx942") and ("MI300A" not in rocminfo_full):
if (gpu_arch == "gfx942") and ("MI300A" not in rocminfo_full):
gpu_name = "MI300X_A1"
L2Banks = "16"
numSQC = "56"
else:
print("\nInvalid SoC")
sys.exit(0)
compute_partition = ""
memory_partition = ""
return (
gpu_name,
gpu_arch,
L1,
L2,
max_sclk,
num_CU,
num_SIMD,
num_SE,
wave_size,
grp_size,
max_waves_per_cu,
L2Banks,
LDSBanks,
numSQC,
compute_partition,
memory_partition,
)
gpu_info['gpu_name'] = gpu_name
gpu_info['gpu_arch'] = gpu_arch
gpu_info['compute_partition'] = ""
gpu_info['memory_partition'] = ""
# verify all fields are filled
for key, value in gpu_info.items():
if value is None:
logging.info("Warning: %s is missing from gpu_info dictionary." % key)
return gpu_info
def run(cmd):
@@ -300,24 +281,7 @@ def get_machine_specs(devicenum):
print("ensure you have valid ROCm installation.")
sys.exit(1)
(
gpu_name,
gpu_arch,
L1,
L2,
max_sclk,
num_CU,
num_SIMD,
num_SE,
wave_size,
grp_size,
max_waves_per_cu,
L2Banks,
LDSBanks,
numSQC,
compute_partition,
memory_partition,
) = gpuinfo()
gpu_info = gpuinfo()
rocm_smi = run(["rocm-smi"])
@@ -372,23 +336,23 @@ def get_machine_specs(devicenum):
ram,
distro,
rocm_version,
gpu_name,
gpu_arch,
gpu_info['gpu_name'],
gpu_info['gpu_arch'],
vbios,
L1,
L2,
num_CU,
num_SIMD,
num_SE,
wave_size,
grp_size,
max_sclk,
gpu_info['L1'],
gpu_info['L2'],
gpu_info['num_CU'],
gpu_info['num_SIMD'],
gpu_info['num_SE'],
gpu_info['wave_size'],
gpu_info['grp_size'],
gpu_info['max_sclk'],
cur_sclk,
cur_mclk,
max_waves_per_cu,
L2Banks,
LDSBanks,
numSQC,
gpu_info['max_waves_per_cu'],
gpu_info['L2Banks'],
gpu_info['LDSBanks'],
gpu_info['numSQC'],
hbmBW,
compute_partition,
memory_partition,
+18
Voir le fichier
@@ -556,4 +556,22 @@ def get_hbm_stack_num(gpu_name, memory_partition):
else:
# Fixme: add proper numbers for other archs
return -1
def get_submodules(package_name):
"""List all submodules for a target package
"""
import importlib
import pkgutil
submodules = []
# walk all submodules in target package
package = importlib.import_module(package_name)
for _, name, _ in pkgutil.walk_packages(package.__path__):
pretty_name = name.split("_", 1)[1].replace("_", "")
# ignore base submodule, add all other
if pretty_name != "base":
submodules.append(pretty_name)
return submodules