Enhance correct_sys_info() func and err checking
Signed-off-by: colramos-amd <colramos@amd.com>
[ROCm/rocprofiler-compute commit: c17e39189f]
This commit is contained in:
gecommit door
Cole Ramos
bovenliggende
98ecb3b590
commit
78b668f128
@@ -41,8 +41,8 @@ class OmniAnalyze_Base:
|
||||
self._runs = OrderedDict()
|
||||
self._arch_configs = {}
|
||||
self.__supported_archs = supported_archs
|
||||
self._output = None
|
||||
self.__socs = None # available OmniSoC objs
|
||||
self._output = None
|
||||
self.__socs:dict = None # available OmniSoC objs
|
||||
|
||||
def get_args(self):
|
||||
return self.__args
|
||||
@@ -150,12 +150,11 @@ class OmniAnalyze_Base:
|
||||
for d in self.__args.path:
|
||||
w = schema.Workload()
|
||||
w.sys_info = file_io.load_sys_info(Path(d[0], "sysinfo.csv"))
|
||||
if self.__args.specs_correction:
|
||||
w.sys_info = parser.correct_sys_info(
|
||||
w.sys_info, self.__args.specs_correction
|
||||
)
|
||||
w.avail_ips = w.sys_info["ip_blocks"].item().split("|")
|
||||
arch = w.sys_info.iloc[0]["gpu_arch"]
|
||||
mspec = self.get_socs()[arch]._mspec
|
||||
if self.__args.specs_correction:
|
||||
w.sys_info = parser.correct_sys_info(mspec, self.__args.specs_correction)
|
||||
w.avail_ips = w.sys_info["ip_blocks"].item().split("|")
|
||||
w.dfs = copy.deepcopy(self._arch_configs[arch].dfs)
|
||||
w.dfs_type = self._arch_configs[arch].dfs_type
|
||||
self._runs[d[0]] = w
|
||||
|
||||
@@ -156,7 +156,7 @@ class Omniperf:
|
||||
return
|
||||
|
||||
@demarcate
|
||||
def load_soc_specs(self, sysinfo=None):
|
||||
def load_soc_specs(self, sysinfo:dict=None):
|
||||
"""Load OmniSoC instance for Omniperf run
|
||||
"""
|
||||
self.__mspec = MachineSpecs(self.__args, sysinfo)
|
||||
@@ -278,6 +278,7 @@ class Omniperf:
|
||||
# Load required SoC(s) from input
|
||||
for d in analyzer.get_args().path:
|
||||
sys_info = pd.read_csv(Path(d[0], "sysinfo.csv"))
|
||||
sys_info = sys_info.to_dict('list')
|
||||
self.load_soc_specs(sys_info)
|
||||
|
||||
analyzer.set_soc(self.__soc)
|
||||
|
||||
@@ -912,9 +912,12 @@ def load_kernel_top(workload, dir):
|
||||
if file.exists():
|
||||
tmp[id] = pd.read_csv(file)
|
||||
else:
|
||||
logging.info(
|
||||
"Warning: Issue loading top kernels. Check pmc_kernel_top.csv"
|
||||
)
|
||||
logging.info("Warning: Issue loading top kernels. Check pmc_kernel_top.csv")
|
||||
# NB: Special case for sysinfo. Probably room for improvement in this whole function design
|
||||
elif "from_csv_columnwise" in df.columns and id == 101:
|
||||
tmp[id] = workload.sys_info.transpose()
|
||||
# All transposed columns should be marked with a general header
|
||||
tmp[id].columns = ["Info"]
|
||||
elif "from_csv_columnwise" in df.columns:
|
||||
# NB:
|
||||
# Another way might be doing transpose in tty like metric_table.
|
||||
@@ -962,60 +965,18 @@ def build_comparable_columns(time_unit):
|
||||
|
||||
return comparable_columns
|
||||
|
||||
|
||||
def correct_sys_info(df, specs_correction):
|
||||
def correct_sys_info(mspec, specs_correction:dict):
|
||||
"""
|
||||
Correct system spec items manually
|
||||
"""
|
||||
|
||||
# NB: to keep the backwards compatibility, we don't touch the current
|
||||
# naming convention. Ideally, the header of sysinfo should use/include
|
||||
# the members of MachineSpecs directly.
|
||||
|
||||
# Sync up with the header defined in omniperf gen_sysinfo() !!
|
||||
# header = "workload_name,"
|
||||
# header += "command,"
|
||||
# header += "host_name,host_cpu,host_distro,host_kernel,host_rocmver,date,"
|
||||
# header += "gpu_soc,numSE,numCU,numSIMD,waveSize,maxWavesPerCU,maxWorkgroupSize,"
|
||||
# header += "L1,L2,sclk,mclk,cur_sclk,cur_mclk,L2Banks,LDSBanks,name,numSQC,numPipes,hbmBW,compute_partition,memory_partition,"
|
||||
# header += "ip_blocks\n"
|
||||
|
||||
name_map = {
|
||||
"host_name": "hostname",
|
||||
"CPU": "host_cpu",
|
||||
"kernel_version": "host_kernel",
|
||||
"host_distro": "distro",
|
||||
# "ram": "",
|
||||
"distro": "host_distro",
|
||||
"rocm_version": "host_rocmver",
|
||||
"GPU": "name",
|
||||
"arch": "gpu_soc",
|
||||
"L1": "L1",
|
||||
"L2": "L2",
|
||||
"CU": "numCU",
|
||||
"SIMD": "numSIMD",
|
||||
"SE": "numSE",
|
||||
"wave_size": "waveSize",
|
||||
"max_waves_per_cu": "maxWavesPerCU",
|
||||
"max_waves_per_cu": "maxWorkgroupSize",
|
||||
"max_sclk": "sclk",
|
||||
"max_mclk": "mclk",
|
||||
"cur_sclk": "cur_sclk",
|
||||
"cur_mclk": "cur_mclk",
|
||||
"L2Banks": "L2Banks",
|
||||
"totalL2Banks": "totalL2Banks",
|
||||
"LDSBanks": "LDSBanks",
|
||||
"numSQC": "numSQC",
|
||||
"numPipes": "numPipes",
|
||||
"hbmBW": "hbmBW",
|
||||
"compute_partition": "compute_partition",
|
||||
"memory_partition": "memory_partition",
|
||||
"num_xcd": "num_xcd"
|
||||
}
|
||||
|
||||
# todo: more err checking for string specs_correction
|
||||
pairs = dict(re.findall(r"(\w+):\s*(\d+)", specs_correction))
|
||||
for k, v in pairs.items():
|
||||
df[name_map[k]] = v
|
||||
|
||||
return df
|
||||
pairs = dict(re.findall(r"(\w+):\s*(\d+)", specs_correction))
|
||||
|
||||
for k, v in pairs.items():
|
||||
if not hasattr(mspec, str(k)):
|
||||
error(f"Invalid specs correction '{k}'. Please use --specs option to peak valid specs")
|
||||
setattr(mspec, str(k), v)
|
||||
return mspec.get_class_members()
|
||||
|
||||
|
||||
|
||||
@@ -53,9 +53,10 @@ VERSION_LOC = [
|
||||
|
||||
@dataclass
|
||||
class MachineSpecs:
|
||||
def __init__(self, args, sysinfo=None):
|
||||
def __init__(self, args, sysinfo:dict=None):
|
||||
if not sysinfo is None:
|
||||
self.gpu_arch = sysinfo.iloc[0]["gpu_arch"]
|
||||
for key, value in sysinfo.items():
|
||||
setattr(self, key, value[0])
|
||||
return
|
||||
# read timestamp info
|
||||
now = datetime.now()
|
||||
@@ -254,8 +255,16 @@ def get_rocm_ver():
|
||||
error("Unable to detect a complete local ROCm installation.\nThe expected %s/.info/ versioning directory is missing. Please ensure you have valid ROCm installation." % _rocm_path)
|
||||
return rocm_ver
|
||||
|
||||
<<<<<<< HEAD
|
||||
def run(cmd, exit_on_error=False):
|
||||
p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
=======
|
||||
def run(cmd,exit_on_error=False):
|
||||
try:
|
||||
p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
except FileNotFoundError as e:
|
||||
error(f"Unable to parse specs. Can't find ROCm asset: {e.filename}\nTry passing a path to an existing workload results in 'analyze' mode.")
|
||||
>>>>>>> 2d92bcf (Enhance correct_sys_info() func and err checking)
|
||||
|
||||
if exit_on_error:
|
||||
if cmd[0] == "rocm-smi":
|
||||
|
||||
Verwijs in nieuw issue
Block a user