Fix hbmBW calculation, (#243)
* Fix hbmBW calculation, currently we're reading the wrong value of the mclk out of rocm-smi
Signed-off-by: Nicholas Curtis <nicurtis@amd.com>
* Fix for earlier smi
Signed-off-by: Nicholas Curtis <nicurtis@amd.com>
---------
Signed-off-by: Nicholas Curtis <nicurtis@amd.com>
Co-authored-by: Nicholas Curtis <nicurtis@amd.com>
[ROCm/rocprofiler-compute commit: c60f031558]
Dieser Commit ist enthalten in:
@@ -30,7 +30,7 @@ import os
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from utils import schema
|
||||
from utils.utils import error, get_hbm_stack_num
|
||||
from utils.utils import error
|
||||
from pathlib import Path
|
||||
import logging
|
||||
|
||||
@@ -409,24 +409,7 @@ def calc_builtin_var(var, sys_info):
|
||||
if isinstance(var, int):
|
||||
return var
|
||||
elif isinstance(var, str) and var.startswith("$totalL2Banks"):
|
||||
# Fixme: support all supported partitioning mode
|
||||
# Fixme: "name" is a bad name!
|
||||
totalL2Banks = sys_info.L2Banks
|
||||
if (
|
||||
sys_info["name"].lower() == "mi300a_a0"
|
||||
or sys_info["name"].lower() == "mi300a_a1"
|
||||
):
|
||||
totalL2Banks = sys_info.L2Banks * get_hbm_stack_num(
|
||||
sys_info["name"], sys_info["memory_partition"]
|
||||
)
|
||||
elif (
|
||||
sys_info["name"].lower() == "mi300x_a0"
|
||||
or sys_info["name"].lower() == "mi300x_a1"
|
||||
):
|
||||
totalL2Banks = sys_info.L2Banks * get_hbm_stack_num(
|
||||
sys_info["name"], sys_info["memory_partition"]
|
||||
)
|
||||
return totalL2Banks
|
||||
return sys_info.totalL2Banks
|
||||
else:
|
||||
print("Don't support", var)
|
||||
sys.exit(1)
|
||||
@@ -1014,6 +997,7 @@ def correct_sys_info(df, specs_correction):
|
||||
"cur_sclk": "cur_sclk",
|
||||
"cur_mclk": "cur_mclk",
|
||||
"L2Banks": "L2Banks",
|
||||
"totalL2Banks": "totalL2Banks",
|
||||
"LDSBanks": "LDSBanks",
|
||||
"numSQC": "numSQC",
|
||||
"numPipes": "numPipes",
|
||||
|
||||
@@ -35,7 +35,7 @@ import logging
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path as path
|
||||
from textwrap import dedent
|
||||
from utils.utils import error
|
||||
from utils.utils import error, get_hbm_stack_num
|
||||
|
||||
@dataclass
|
||||
class MachineSpecs:
|
||||
@@ -57,10 +57,12 @@ class MachineSpecs:
|
||||
wave_size: str
|
||||
workgroup_max_size: str
|
||||
max_sclk: str
|
||||
max_mclk: str
|
||||
cur_sclk: str
|
||||
cur_mclk: str
|
||||
max_waves_per_cu: str
|
||||
L2Banks: str
|
||||
totalL2Banks: str
|
||||
LDSBanks: str
|
||||
numSQC: str
|
||||
numPipes: str
|
||||
@@ -86,6 +88,7 @@ class MachineSpecs:
|
||||
L1: {self.L1} KB
|
||||
L2: {self.L2} KB
|
||||
max_sclk: {self.max_sclk} MHz
|
||||
max_mclk: {self.max_mclk} MHz
|
||||
cur_sclk: {self.cur_sclk} MHz
|
||||
cur_mclk: {self.cur_mclk} MHz
|
||||
CU: {self.CU}
|
||||
@@ -95,6 +98,7 @@ class MachineSpecs:
|
||||
workgroup_max_size: {self.workgroup_max_size}
|
||||
max_waves_per_cu: {self.max_waves_per_cu}
|
||||
L2Banks: {self.L2Banks}
|
||||
totalL2Banks: {self.totalL2Banks}
|
||||
LDSBanks: {self.LDSBanks}
|
||||
numSQC: {self.numSQC}
|
||||
numPipes: {self.numPipes}
|
||||
@@ -114,6 +118,7 @@ def gpuinfo():
|
||||
"L1": None,
|
||||
"L2": None,
|
||||
"max_sclk": None,
|
||||
"max_mclk": None,
|
||||
"num_CU": None,
|
||||
"num_SIMD": None,
|
||||
"numPipes": None,
|
||||
@@ -128,6 +133,10 @@ def gpuinfo():
|
||||
"memory_partition": None,
|
||||
}
|
||||
|
||||
# we get the max mclk from rocm-smi --showmclkrange
|
||||
rocm_smi_mclk = run(["rocm-smi", "--showmclkrange"], exit_on_error=True)
|
||||
gpu_info['max_mclk'] = search(r'(\d+)Mhz\s*$', rocm_smi_mclk)
|
||||
|
||||
# Fixme: find better way to differentiate cards, GPU vs APU, etc.
|
||||
rocminfo_full = run(["rocminfo"])
|
||||
rocminfo = rocminfo_full.split("\n")
|
||||
@@ -246,6 +255,23 @@ def search(pattern, string):
|
||||
return m.group(1)
|
||||
return None
|
||||
|
||||
def total_l2_banks(archname, L2Banks, memory_partition):
|
||||
# Fixme: support all supported partitioning mode
|
||||
# Fixme: "name" is a bad name!
|
||||
totalL2Banks = L2Banks
|
||||
if (
|
||||
archname.lower() == "mi300a_a0"
|
||||
or archname.lower() == "mi300a_a1"
|
||||
):
|
||||
totalL2Banks = L2Banks * get_hbm_stack_num(
|
||||
archname, memory_partition)
|
||||
elif (
|
||||
archname.lower() == "mi300x_a0"
|
||||
or archname.lower() == "mi300x_a1"
|
||||
):
|
||||
totalL2Banks = L2Banks * get_hbm_stack_num(
|
||||
archname, memory_partition)
|
||||
return totalL2Banks
|
||||
|
||||
def get_machine_specs(devicenum):
|
||||
cpuinfo = path("/proc/cpuinfo").read_text()
|
||||
@@ -323,9 +349,6 @@ def get_machine_specs(devicenum):
|
||||
# FIXME with device
|
||||
vbios = search(r"VBIOS version: (.*?)$", run(["rocm-smi", "-v"], exit_on_error=True))
|
||||
|
||||
# FIXME with spec
|
||||
hbmBW = str(int(cur_mclk) / 1000 * 4096 / 8 * 2)
|
||||
|
||||
compute_partition = search(
|
||||
r"Compute Partition:\s*(\w+)", run(["rocm-smi", "--showcomputepartition"])
|
||||
)
|
||||
@@ -338,6 +361,18 @@ def get_machine_specs(devicenum):
|
||||
if memory_partition == None:
|
||||
memory_partition = "NA"
|
||||
|
||||
totalL2Banks = total_l2_banks(
|
||||
gpu_info['gpu_name'], int(gpu_info['L2Banks']), memory_partition)
|
||||
hbmchannels = totalL2Banks
|
||||
if (
|
||||
gpu_info['gpu_name'].lower() == "mi300a_a0"
|
||||
or gpu_info['gpu_name'].lower() == "mi300a_a1"
|
||||
) and memory_partition.lower() == "nps1":
|
||||
# we have an extra 32 channels for the CCD
|
||||
hbmchannels += 32
|
||||
hbmBW = str(int(gpu_info['max_mclk']) / 1000 * 32 * hbmchannels)
|
||||
totalL2Banks = str(totalL2Banks)
|
||||
|
||||
return MachineSpecs(
|
||||
hostname,
|
||||
CPU,
|
||||
@@ -357,10 +392,12 @@ def get_machine_specs(devicenum):
|
||||
gpu_info['wave_size'],
|
||||
gpu_info['grp_size'],
|
||||
gpu_info['max_sclk'],
|
||||
gpu_info['max_mclk'],
|
||||
cur_sclk,
|
||||
cur_mclk,
|
||||
gpu_info['max_waves_per_cu'],
|
||||
gpu_info['L2Banks'],
|
||||
totalL2Banks,
|
||||
gpu_info['LDSBanks'],
|
||||
gpu_info['numSQC'],
|
||||
gpu_info['numPipes'],
|
||||
|
||||
@@ -299,7 +299,8 @@ def gen_sysinfo(workload_name, workload_dir, ip_blocks, app_cmd, skip_roof, roof
|
||||
header += "command,"
|
||||
header += "host_name,host_cpu,sbios,host_distro,host_kernel,host_rocmver,date,"
|
||||
header += "gpu_soc,vbios,numSE,numCU,numSIMD,waveSize,maxWavesPerCU,maxWorkgroupSize,"
|
||||
header += "L1,L2,sclk,mclk,cur_sclk,cur_mclk,L2Banks,LDSBanks,name,numSQC,numPipes,hbmBW,compute_partition,memory_partition,"
|
||||
header += "L1,L2,sclk,mclk,cur_sclk,cur_mclk,L2Banks,totalL2Banks,LDSBanks,name,numSQC,numPipes,"
|
||||
header += "hbmBW,compute_partition,memory_partition,"
|
||||
header += "ip_blocks\n"
|
||||
sysinfo.write(header)
|
||||
|
||||
@@ -341,6 +342,7 @@ def gen_sysinfo(workload_name, workload_dir, ip_blocks, app_cmd, skip_roof, roof
|
||||
mspec.cur_sclk,
|
||||
mspec.cur_mclk,
|
||||
mspec.L2Banks,
|
||||
mspec.totalL2Banks,
|
||||
mspec.LDSBanks,
|
||||
mspec.GPU,
|
||||
mspec.numSQC,
|
||||
|
||||
In neuem Issue referenzieren
Einen Benutzer sperren