diff --git a/projects/rocprofiler-compute/src/omniperf_analyze/analysis_base.py b/projects/rocprofiler-compute/src/omniperf_analyze/analysis_base.py index ad81e27998..8d65e156ac 100644 --- a/projects/rocprofiler-compute/src/omniperf_analyze/analysis_base.py +++ b/projects/rocprofiler-compute/src/omniperf_analyze/analysis_base.py @@ -41,8 +41,8 @@ class OmniAnalyze_Base: self._runs = OrderedDict() self._arch_configs = {} self.__supported_archs = supported_archs - self._output = None - self.__socs = None # available OmniSoC objs + self._output = None + self.__socs:dict = None # available OmniSoC objs def get_args(self): return self.__args @@ -150,12 +150,11 @@ class OmniAnalyze_Base: for d in self.__args.path: w = schema.Workload() w.sys_info = file_io.load_sys_info(Path(d[0], "sysinfo.csv")) - if self.__args.specs_correction: - w.sys_info = parser.correct_sys_info( - w.sys_info, self.__args.specs_correction - ) - w.avail_ips = w.sys_info["ip_blocks"].item().split("|") arch = w.sys_info.iloc[0]["gpu_arch"] + mspec = self.get_socs()[arch]._mspec + if self.__args.specs_correction: + w.sys_info = parser.correct_sys_info(mspec, self.__args.specs_correction) + w.avail_ips = w.sys_info["ip_blocks"].item().split("|") w.dfs = copy.deepcopy(self._arch_configs[arch].dfs) w.dfs_type = self._arch_configs[arch].dfs_type self._runs[d[0]] = w diff --git a/projects/rocprofiler-compute/src/omniperf_base.py b/projects/rocprofiler-compute/src/omniperf_base.py index 4efe3c8b93..f2650fb856 100644 --- a/projects/rocprofiler-compute/src/omniperf_base.py +++ b/projects/rocprofiler-compute/src/omniperf_base.py @@ -156,7 +156,7 @@ class Omniperf: return @demarcate - def load_soc_specs(self, sysinfo=None): + def load_soc_specs(self, sysinfo:dict=None): """Load OmniSoC instance for Omniperf run """ self.__mspec = MachineSpecs(self.__args, sysinfo) @@ -278,6 +278,7 @@ class Omniperf: # Load required SoC(s) from input for d in analyzer.get_args().path: sys_info = pd.read_csv(Path(d[0], "sysinfo.csv")) + sys_info = sys_info.to_dict('list') self.load_soc_specs(sys_info) analyzer.set_soc(self.__soc) diff --git a/projects/rocprofiler-compute/src/utils/parser.py b/projects/rocprofiler-compute/src/utils/parser.py index 3af6570d68..e36b63a4cd 100644 --- a/projects/rocprofiler-compute/src/utils/parser.py +++ b/projects/rocprofiler-compute/src/utils/parser.py @@ -912,9 +912,12 @@ def load_kernel_top(workload, dir): if file.exists(): tmp[id] = pd.read_csv(file) else: - logging.info( - "Warning: Issue loading top kernels. Check pmc_kernel_top.csv" - ) + logging.info("Warning: Issue loading top kernels. Check pmc_kernel_top.csv") + # NB: Special case for sysinfo. Probably room for improvement in this whole function design + elif "from_csv_columnwise" in df.columns and id == 101: + tmp[id] = workload.sys_info.transpose() + # All transposed columns should be marked with a general header + tmp[id].columns = ["Info"] elif "from_csv_columnwise" in df.columns: # NB: # Another way might be doing transpose in tty like metric_table. @@ -962,60 +965,18 @@ def build_comparable_columns(time_unit): return comparable_columns - -def correct_sys_info(df, specs_correction): +def correct_sys_info(mspec, specs_correction:dict): """ Correct system spec items manually """ - - # NB: to keep the backwards compatibility, we don't touch the current - # naming convention. Ideally, the header of sysinfo should use/include - # the members of MachineSpecs directly. - - # Sync up with the header defined in omniperf gen_sysinfo() !! - # header = "workload_name," - # header += "command," - # header += "host_name,host_cpu,host_distro,host_kernel,host_rocmver,date," - # header += "gpu_soc,numSE,numCU,numSIMD,waveSize,maxWavesPerCU,maxWorkgroupSize," - # header += "L1,L2,sclk,mclk,cur_sclk,cur_mclk,L2Banks,LDSBanks,name,numSQC,numPipes,hbmBW,compute_partition,memory_partition," - # header += "ip_blocks\n" - - name_map = { - "host_name": "hostname", - "CPU": "host_cpu", - "kernel_version": "host_kernel", - "host_distro": "distro", - # "ram": "", - "distro": "host_distro", - "rocm_version": "host_rocmver", - "GPU": "name", - "arch": "gpu_soc", - "L1": "L1", - "L2": "L2", - "CU": "numCU", - "SIMD": "numSIMD", - "SE": "numSE", - "wave_size": "waveSize", - "max_waves_per_cu": "maxWavesPerCU", - "max_waves_per_cu": "maxWorkgroupSize", - "max_sclk": "sclk", - "max_mclk": "mclk", - "cur_sclk": "cur_sclk", - "cur_mclk": "cur_mclk", - "L2Banks": "L2Banks", - "totalL2Banks": "totalL2Banks", - "LDSBanks": "LDSBanks", - "numSQC": "numSQC", - "numPipes": "numPipes", - "hbmBW": "hbmBW", - "compute_partition": "compute_partition", - "memory_partition": "memory_partition", - "num_xcd": "num_xcd" - } - # todo: more err checking for string specs_correction - pairs = dict(re.findall(r"(\w+):\s*(\d+)", specs_correction)) - for k, v in pairs.items(): - df[name_map[k]] = v - return df + pairs = dict(re.findall(r"(\w+):\s*(\d+)", specs_correction)) + + for k, v in pairs.items(): + if not hasattr(mspec, str(k)): + error(f"Invalid specs correction '{k}'. Please use --specs option to peak valid specs") + setattr(mspec, str(k), v) + return mspec.get_class_members() + + diff --git a/projects/rocprofiler-compute/src/utils/specs.py b/projects/rocprofiler-compute/src/utils/specs.py index 7ed15494e9..7beb457e86 100644 --- a/projects/rocprofiler-compute/src/utils/specs.py +++ b/projects/rocprofiler-compute/src/utils/specs.py @@ -53,9 +53,10 @@ VERSION_LOC = [ @dataclass class MachineSpecs: - def __init__(self, args, sysinfo=None): + def __init__(self, args, sysinfo:dict=None): if not sysinfo is None: - self.gpu_arch = sysinfo.iloc[0]["gpu_arch"] + for key, value in sysinfo.items(): + setattr(self, key, value[0]) return # read timestamp info now = datetime.now() @@ -254,8 +255,16 @@ def get_rocm_ver(): error("Unable to detect a complete local ROCm installation.\nThe expected %s/.info/ versioning directory is missing. Please ensure you have valid ROCm installation." % _rocm_path) return rocm_ver +<<<<<<< HEAD def run(cmd, exit_on_error=False): p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) +======= +def run(cmd,exit_on_error=False): + try: + p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + except FileNotFoundError as e: + error(f"Unable to parse specs. Can't find ROCm asset: {e.filename}\nTry passing a path to an existing workload results in 'analyze' mode.") +>>>>>>> 2d92bcf (Enhance correct_sys_info() func and err checking) if exit_on_error: if cmd[0] == "rocm-smi":