Adding sbios, vbios, and partitioning info to specs. Also reorganizing fields.
Co-authored-by: fei.zheng <fei.zheng@amd.com> Signed-off-by: colramos-amd <colramos@amd.com>
This commit is contained in:
@@ -144,7 +144,7 @@ class Omniperf:
|
||||
# rather than detect from rocminfo
|
||||
if not arch:
|
||||
mspec = get_machine_specs(0)
|
||||
arch = mspec.GPU
|
||||
arch = mspec.arch
|
||||
|
||||
# instantiate underlying SoC support class
|
||||
# in case of analyze mode, __soc can accommodate multiple archs
|
||||
|
||||
+177
-44
@@ -34,67 +34,106 @@ from dataclasses import dataclass
|
||||
from pathlib import Path as path
|
||||
from textwrap import dedent
|
||||
|
||||
gpu_list = {"gfx906", "gfx908", "gfx90a", "gfx900"}
|
||||
|
||||
|
||||
@dataclass
|
||||
class MachineSpecs:
|
||||
hostname: str
|
||||
cpu: str
|
||||
kernel: str
|
||||
CPU: str
|
||||
sbios: str
|
||||
kernel_version: str
|
||||
ram: str
|
||||
distro: str
|
||||
rocmversion: str
|
||||
rocm_version: str
|
||||
GPU: str
|
||||
arch: str
|
||||
vbios: str
|
||||
L1: str
|
||||
L2: str
|
||||
SCLK: str
|
||||
CU: str
|
||||
SIMD: str
|
||||
SE: str
|
||||
wave_size: str
|
||||
workgroup_size: str
|
||||
cur_SCLK: str
|
||||
cur_MCLK: str
|
||||
wave_occu: str
|
||||
workgroup_max_size: str
|
||||
max_sclk: str
|
||||
cur_sclk: str
|
||||
cur_mclk: str
|
||||
max_waves_per_cu: str
|
||||
L2Banks: str
|
||||
LDSBanks: str
|
||||
numSQC: str
|
||||
hbmBW: str
|
||||
compute_partition: str
|
||||
memory_partition: str
|
||||
|
||||
def __str__(self):
|
||||
return dedent(
|
||||
f"""\
|
||||
Host info:
|
||||
hostname: {self.hostname}
|
||||
cpu info: {self.cpu}
|
||||
CPU: {self.CPU}
|
||||
sbios: {self.sbios}
|
||||
ram: {self.ram}
|
||||
distro: {self.distro}
|
||||
kernel version: {self.kernel}
|
||||
rocm version: {self.rocmversion}
|
||||
kernel_version: {self.kernel_version}
|
||||
rocm_version: {self.rocm_version}
|
||||
Device info:
|
||||
GPU: {self.GPU}
|
||||
L1: {self.L1}
|
||||
L2: {self.L2}
|
||||
Max SCLK: {self.SCLK}MHz
|
||||
Current SCLK: {self.cur_SCLK}MHz
|
||||
Current MCLK: {self.cur_MCLK}MHz
|
||||
arch: {self.arch}
|
||||
vbios: {self.vbios}
|
||||
L1: {self.L1} KB
|
||||
L2: {self.L2} KB
|
||||
max_sclk: {self.max_sclk} MHz
|
||||
cur_sclk: {self.cur_sclk} MHz
|
||||
cur_mclk: {self.cur_mclk} MHz
|
||||
CU: {self.CU}
|
||||
SIMD: {self.SIMD}
|
||||
SE: {self.SE}
|
||||
Wave Size: {self.wave_size}
|
||||
Workgroup Max Size: {self.workgroup_size}
|
||||
Max Wave Occupancy Per CU: {self.wave_occu}
|
||||
wave_size: {self.wave_size}
|
||||
workgroup_max_size: {self.workgroup_max_size}
|
||||
max_waves_per_cu: {self.max_waves_per_cu}
|
||||
L2Banks: {self.L2Banks}
|
||||
LDSBanks: {self.LDSBanks}
|
||||
numSQC: {self.numSQC}
|
||||
hbmBW: {self.hbmBW} MB/s
|
||||
compute_partition: {self.compute_partition}
|
||||
memory_partition: {self.memory_partition}
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
def gpuinfo():
|
||||
rocminfo = run(["rocminfo"]).split("\n")
|
||||
# Local var only for rocminfo searching
|
||||
gpu_list = {"gfx906", "gfx908", "gfx90a", "gfx940", "gfx941", "gfx942"}
|
||||
|
||||
# Fixme: find better way to differentiate cards, GPU vs APU, etc.
|
||||
rocminfo_full = run(["rocminfo"])
|
||||
rocminfo = rocminfo_full.split("\n")
|
||||
|
||||
for idx1, linetext in enumerate(rocminfo):
|
||||
gpu_id = search(r"^\s*Name\s*:\s+ ([a-zA-Z0-9]+)\s*$", linetext)
|
||||
if gpu_id in gpu_list:
|
||||
gpu_arch = search(r"^\s*Name\s*:\s+ ([a-zA-Z0-9]+)\s*$", linetext)
|
||||
if gpu_arch in gpu_list:
|
||||
break
|
||||
|
||||
if not gpu_id in gpu_list:
|
||||
return None, None, None, None, None, None, None, None, None, None
|
||||
if str(gpu_arch) in gpu_list:
|
||||
gpu_arch = str(gpu_arch)
|
||||
break
|
||||
if not gpu_arch in gpu_list:
|
||||
return (
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
)
|
||||
|
||||
L1, L2 = "", ""
|
||||
for idx2, linetext in enumerate(rocminfo[idx1 + 1 :]):
|
||||
@@ -110,7 +149,7 @@ def gpuinfo():
|
||||
|
||||
key = search(r"^\s*Max Clock Freq\. \(MHz\):\s+([0-9]+)", linetext)
|
||||
if key != None:
|
||||
sclk = key
|
||||
max_sclk = key
|
||||
continue
|
||||
|
||||
key = search(r"^\s*Compute Unit:\s+ ([a-zA-Z0-9]+)\s*", linetext)
|
||||
@@ -140,10 +179,66 @@ def gpuinfo():
|
||||
|
||||
key = search(r"^\s*Max Waves Per CU:\s+ ([a-zA-Z0-9]+)\s*", linetext)
|
||||
if key != None:
|
||||
wave_occu = key
|
||||
max_waves_per_cu = key
|
||||
break
|
||||
|
||||
return gpu_id, L1, L2, sclk, num_CU, num_SIMD, num_SE, wave_size, grp_size, wave_occu
|
||||
gpu_name = ""
|
||||
L2Banks = ""
|
||||
LDSBanks = "32"
|
||||
numSQC = ""
|
||||
|
||||
if gpu_arch == "gfx906":
|
||||
gpu_name = "MI50"
|
||||
L2Banks = "16"
|
||||
numSQC = str(int(num_CU) // 4)
|
||||
elif gpu_arch == "gfx908":
|
||||
gpu_name = "MI100"
|
||||
L2Banks = "32"
|
||||
numSQC = "48"
|
||||
elif gpu_arch == "gfx90a":
|
||||
L2Banks = "32"
|
||||
gpu_name = "MI200"
|
||||
numSQC = "56"
|
||||
elif gpu_arch == "gfx940":
|
||||
gpu_name = "MI300A_A0"
|
||||
L2Banks = "16"
|
||||
numSQC = "56"
|
||||
elif gpu_arch == "gfx941":
|
||||
gpu_name = "MI300X_A0"
|
||||
L2Banks = "16"
|
||||
numSQC = "56"
|
||||
elif (gpu_arch == "gfx942") and ("MI300A" in rocminfo_full):
|
||||
gpu_name = "MI300A_A1"
|
||||
L2Banks = "16"
|
||||
numSQC = "56"
|
||||
elif (gpu_arch == "gfx942") and ("MI300A" not in rocminfo_full):
|
||||
gpu_name = "MI300X_A1"
|
||||
L2Banks = "16"
|
||||
numSQC = "56"
|
||||
else:
|
||||
print("\nInvalid SoC")
|
||||
sys.exit(0)
|
||||
|
||||
compute_partition = ""
|
||||
memory_partition = ""
|
||||
return (
|
||||
gpu_name,
|
||||
gpu_arch,
|
||||
L1,
|
||||
L2,
|
||||
max_sclk,
|
||||
num_CU,
|
||||
num_SIMD,
|
||||
num_SE,
|
||||
wave_size,
|
||||
grp_size,
|
||||
max_waves_per_cu,
|
||||
L2Banks,
|
||||
LDSBanks,
|
||||
numSQC,
|
||||
compute_partition,
|
||||
memory_partition,
|
||||
)
|
||||
|
||||
|
||||
def run(cmd):
|
||||
@@ -206,30 +301,41 @@ def get_machine_specs(devicenum):
|
||||
sys.exit(1)
|
||||
|
||||
(
|
||||
gpu_id,
|
||||
gpu_name,
|
||||
gpu_arch,
|
||||
L1,
|
||||
L2,
|
||||
sclk,
|
||||
max_sclk,
|
||||
num_CU,
|
||||
num_SIMD,
|
||||
num_SE,
|
||||
wave_size,
|
||||
grp_size,
|
||||
wave_occu,
|
||||
max_waves_per_cu,
|
||||
L2Banks,
|
||||
LDSBanks,
|
||||
numSQC,
|
||||
compute_partition,
|
||||
memory_partition,
|
||||
) = gpuinfo()
|
||||
|
||||
rocm_smi = run(["rocm-smi"])
|
||||
|
||||
device = rf"^\s*{devicenum}(.*)"
|
||||
|
||||
hostname = socket.gethostname()
|
||||
cpu = search(r"^model name\s*: (.*?)$", cpuinfo)
|
||||
kernel = search(r"version (\S*)", version)
|
||||
sbios = (
|
||||
path("/sys/class/dmi/id/bios_vendor").read_text().strip()
|
||||
+ path("/sys/class/dmi/id/bios_version").read_text().strip()
|
||||
)
|
||||
CPU = search(r"^model name\s*: (.*?)$", cpuinfo)
|
||||
kernel_version = search(r"version (\S*)", version)
|
||||
ram = search(r"MemTotal:\s*(\S*)", meminfo)
|
||||
distro = search(r'PRETTY_NAME="(.*?)"', os_release)
|
||||
if distro is None:
|
||||
distro = ""
|
||||
|
||||
rocmversion = rocm_ver.strip()
|
||||
rocm_version = rocm_ver.strip()
|
||||
|
||||
freq = search(device, rocm_smi).split()
|
||||
cur_sclk = search(r"([0-9]+)", freq[2])
|
||||
@@ -238,27 +344,54 @@ def get_machine_specs(devicenum):
|
||||
|
||||
cur_mclk = search(r"([0-9]+)", freq[3])
|
||||
if cur_mclk is None:
|
||||
cur_mclk = ""
|
||||
cur_mclk = 0
|
||||
|
||||
# FIXME with device
|
||||
vbios = search(r"VBIOS version: (.*?)$", run(["rocm-smi", "-v"]))
|
||||
|
||||
# FIXME with spec
|
||||
hbmBW = str(int(cur_mclk) / 1000 * 4096 / 8 * 2)
|
||||
|
||||
compute_partition = search(
|
||||
r"Compute Partition:\s*(\w+)", run(["rocm-smi", "--showcomputepartition"])
|
||||
)
|
||||
if compute_partition == None:
|
||||
compute_partition = "NA"
|
||||
|
||||
memory_partition = search(
|
||||
r"Memory Partition:\s*(\w+)", run(["rocm-smi", "--showmemorypartition"])
|
||||
)
|
||||
if memory_partition == None:
|
||||
memory_partition = "NA"
|
||||
|
||||
return MachineSpecs(
|
||||
hostname,
|
||||
cpu,
|
||||
kernel,
|
||||
CPU,
|
||||
sbios,
|
||||
kernel_version,
|
||||
ram,
|
||||
distro,
|
||||
rocmversion,
|
||||
gpu_id,
|
||||
rocm_version,
|
||||
gpu_name,
|
||||
gpu_arch,
|
||||
vbios,
|
||||
L1,
|
||||
L2,
|
||||
sclk,
|
||||
num_CU,
|
||||
num_SIMD,
|
||||
num_SE,
|
||||
wave_size,
|
||||
grp_size,
|
||||
max_sclk,
|
||||
cur_sclk,
|
||||
cur_mclk,
|
||||
wave_occu,
|
||||
max_waves_per_cu,
|
||||
L2Banks,
|
||||
LDSBanks,
|
||||
numSQC,
|
||||
hbmBW,
|
||||
compute_partition,
|
||||
memory_partition,
|
||||
)
|
||||
|
||||
|
||||
|
||||
+28
-28
@@ -24,11 +24,12 @@
|
||||
|
||||
import logging
|
||||
import sys
|
||||
import subprocess
|
||||
import shutil
|
||||
import os
|
||||
import io
|
||||
import re
|
||||
import selectors
|
||||
import subprocess
|
||||
import shutil
|
||||
import pandas as pd
|
||||
import glob
|
||||
from utils import specs
|
||||
@@ -243,9 +244,9 @@ def gen_sysinfo(workload_name, workload_dir, ip_blocks, app_cmd, skip_roof, roof
|
||||
# write header
|
||||
header = "workload_name,"
|
||||
header += "command,"
|
||||
header += "host_name,host_cpu,host_distro,host_kernel,host_rocmver,date,"
|
||||
header += "gpu_soc,numSE,numCU,numSIMD,waveSize,maxWavesPerCU,maxWorkgroupSize,"
|
||||
header += "L1,L2,sclk,mclk,cur_sclk,cur_mclk,L2Banks,LDSBanks,name,numSQC,hbmBW,"
|
||||
header += "host_name,host_cpu,sbios,host_distro,host_kernel,host_rocmver,date,"
|
||||
header += "gpu_soc,vbios,numSE,numCU,numSIMD,waveSize,maxWavesPerCU,maxWorkgroupSize,"
|
||||
header += "L1,L2,sclk,mclk,cur_sclk,cur_mclk,L2Banks,LDSBanks,name,numSQC,hbmBW,compute_partition,memory_partition,"
|
||||
header += "ip_blocks\n"
|
||||
sysinfo.write(header)
|
||||
|
||||
@@ -260,45 +261,44 @@ def gen_sysinfo(workload_name, workload_dir, ip_blocks, app_cmd, skip_roof, roof
|
||||
param += ['"' + app_cmd + '"']
|
||||
param += [
|
||||
mspec.hostname,
|
||||
mspec.cpu,
|
||||
mspec.CPU,
|
||||
mspec.sbios,
|
||||
mspec.distro,
|
||||
mspec.kernel,
|
||||
mspec.rocmversion,
|
||||
mspec.kernel_version,
|
||||
mspec.rocm_version,
|
||||
timestamp,
|
||||
]
|
||||
|
||||
# GPU info
|
||||
param += [
|
||||
mspec.GPU,
|
||||
mspec.arch,
|
||||
mspec.vbios,
|
||||
mspec.SE,
|
||||
mspec.CU,
|
||||
mspec.SIMD,
|
||||
mspec.wave_size,
|
||||
mspec.wave_occu,
|
||||
mspec.workgroup_size,
|
||||
mspec.max_waves_per_cu,
|
||||
mspec.workgroup_max_size,
|
||||
]
|
||||
param += [
|
||||
mspec.L1,
|
||||
mspec.L2,
|
||||
mspec.SCLK,
|
||||
mspec.cur_MCLK,
|
||||
mspec.cur_SCLK,
|
||||
mspec.cur_MCLK,
|
||||
mspec.cur_mclk,
|
||||
mspec.cur_mclk,
|
||||
mspec.cur_sclk,
|
||||
mspec.cur_mclk,
|
||||
mspec.L2Banks,
|
||||
mspec.LDSBanks,
|
||||
mspec.GPU,
|
||||
mspec.numSQC,
|
||||
mspec.hbmBW,
|
||||
mspec.compute_partition,
|
||||
mspec.memory_partition,
|
||||
]
|
||||
|
||||
blocks = []
|
||||
hbmBW = int(mspec.cur_MCLK) / 1000 * 4096 / 8 * 2
|
||||
if mspec.GPU == "gfx906":
|
||||
param += ["16", "32", "mi50", str(int(mspec.CU) // 4), str(hbmBW)]
|
||||
elif mspec.GPU == "gfx908":
|
||||
param += ["32", "32", "mi100", "48", str(hbmBW)]
|
||||
elif mspec.GPU == "gfx90a":
|
||||
param += ["32", "32", "mi200", "56", str(hbmBW)]
|
||||
if not skip_roof:
|
||||
if roof_only:
|
||||
ip_blocks = ["roofline"]
|
||||
else:
|
||||
blocks.append("roofline")
|
||||
if mspec.GPU == "gfx90a" and (not skip_roof):
|
||||
blocks.append("roofline")
|
||||
|
||||
# ip block info
|
||||
if ip_blocks == None:
|
||||
@@ -313,7 +313,7 @@ def gen_sysinfo(workload_name, workload_dir, ip_blocks, app_cmd, skip_roof, roof
|
||||
|
||||
def detect_roofline():
|
||||
mspec = specs.get_machine_specs(0)
|
||||
rocm_ver = mspec.rocmversion[:1]
|
||||
rocm_ver = mspec.rocm_version[:1]
|
||||
|
||||
os_release = path("/etc/os-release").read_text()
|
||||
ubuntu_distro = specs.search(r'VERSION_ID="(.*?)"', os_release)
|
||||
|
||||
Reference in New Issue
Block a user