##############################################################################bl # MIT License # # Copyright (c) 2021 - 2025 Advanced Micro Devices, Inc. All Rights Reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. ##############################################################################el import argparse import importlib import logging import os import shutil import socket import sys import time from pathlib import Path import pandas as pd import yaml import config from argparser import omniarg_parser from utils import file_io, parser, schema from utils.logger import ( setup_console_handler, setup_file_handler, setup_logging_priority, ) from utils.specs import MachineSpecs, generate_machine_specs from utils.utils import ( console_debug, console_error, console_log, console_warning, demarcate, detect_rocprof, get_submodules, get_version, get_version_display, set_locale_encoding, ) SUPPORTED_ARCHS = { "gfx906": {"mi50": ["MI50", "MI60"]}, "gfx908": {"mi100": ["MI100"]}, "gfx90a": {"mi200": ["MI210", "MI250", "MI250X"]}, "gfx940": {"mi300": ["MI300A_A0"]}, "gfx941": {"mi300": ["MI300X_A0"]}, "gfx942": {"mi300": ["MI300A_A1", "MI300X_A1"]}, } MI300_CHIP_IDS = { "29856": "MI300A_A1", "29857": "MI300X_A1", "29858": "MI308X", } class RocProfCompute: def __init__(self): self.__args = None self.__profiler_mode = None self.__analyze_mode = None self.__soc_name = ( set() ) # gpu name, or in case of analyze mode, all loaded gpu name(s) self.__soc = dict() # set of key, value pairs. Where arch->OmniSoc() obj self.__version = { "ver": None, "ver_pretty": None, } self.__options = {} self.__supported_archs = SUPPORTED_ARCHS self.__mspec: MachineSpecs = None # to be initalized in load_soc_specs() setup_console_handler() self.set_version() self.parse_args() self.__mode = self.__args.mode self.__loglevel = setup_logging_priority( self.__args.verbose, self.__args.quiet, self.__mode ) setattr(self.__args, "loglevel", self.__loglevel) set_locale_encoding() if self.__mode == "profile": self.detect_profiler() elif self.__mode == "analyze": self.detect_analyze() console_debug("Execution mode = %s" % self.__mode) def print_graphic(self): """Log program name as ascii art to terminal.""" ascii_art = r""" __ _ _ __ ___ ___ _ __ _ __ ___ / _| ___ ___ _ __ ___ _ __ _ _| |_ ___ | '__/ _ \ / __| '_ \| '__/ _ \| |_ _____ / __/ _ \| '_ ` _ \| '_ \| | | | __/ _ \ | | | (_) | (__| |_) | | | (_) | _|_____| (_| (_) | | | | | | |_) | |_| | || __/ |_| \___/ \___| .__/|_| \___/|_| \___\___/|_| |_| |_| .__/ \__,_|\__\___| |_| |_| """ print(ascii_art) def get_mode(self): return self.__mode def set_version(self): vData = get_version(config.rocprof_compute_home) self.__version["ver"] = vData["version"] self.__version["ver_pretty"] = get_version_display( vData["version"], vData["sha"], vData["mode"] ) return def detect_profiler(self): if ( self.__args.lucky == True or self.__args.summaries == True or self.__args.use_rocscope ): if not shutil.which("rocscope"): console_error("Rocscope must be in PATH") else: self.__profiler_mode = "rocscope" else: rocprof_cmd = detect_rocprof() if str(rocprof_cmd).endswith("rocprof"): self.__profiler_mode = "rocprofv1" elif str(rocprof_cmd).endswith("rocprofv2"): self.__profiler_mode = "rocprofv2" elif str(rocprof_cmd).endswith("rocprofv3"): self.__profiler_mode = "rocprofv3" else: console_error( "Incompatible profiler: %s. Supported profilers include: %s" % (rocprof_cmd, get_submodules("rocprof_compute_profile")) ) return def detect_analyze(self): if self.__args.gui: self.__analyze_mode = "web_ui" else: self.__analyze_mode = "cli" return @demarcate def load_soc_specs(self, sysinfo: dict = None): """Load OmniSoC instance for RocProfCompute run""" self.__mspec = generate_machine_specs(self.__args, sysinfo) if self.__args.specs: print(self.__mspec) sys.exit(0) arch = self.__mspec.gpu_arch # NB: This checker is a bit redundent. We already check this in specs module if arch not in self.__supported_archs.keys(): console_error("%s is an unsupported SoC" % arch) soc_module = importlib.import_module("rocprof_compute_soc.soc_" + arch) soc_class = getattr(soc_module, arch + "_soc") self.__soc[arch] = soc_class(self.__args, self.__mspec) return def parse_args(self): parser = argparse.ArgumentParser( description="Command line interface for AMD's GPU profiler, ROCm Compute Profiler", prog="tool", formatter_class=lambda prog: argparse.RawTextHelpFormatter( prog, max_help_position=30 ), usage="rocprof-compute [mode] [options]", ) omniarg_parser( parser, config.rocprof_compute_home, self.__supported_archs, self.__version ) self.__args = parser.parse_args() if self.__args.mode == None: if self.__args.specs: print(generate_machine_specs(self.__args)) sys.exit(0) parser.print_help(sys.stderr) console_error( "rocprof-compute requires you pass a valid mode. Detected None." ) elif self.__args.mode == "profile": # FIXME: # Might want to get host name from detected spec if self.__args.subpath == "node_name": self.__args.path = str( Path(self.__args.path).joinpath( self.__args.name, socket.gethostname() ) ) elif self.__args.subpath == "gpu_model": self.__args.path = str( Path(self.__args.path).joinpath( self.__args.name, self.__mspec.gpu_model ) ) p = Path(self.__args.path) if not p.exists(): try: p.mkdir(parents=True, exist_ok=False) except FileExistsError: console_error("Directory already exists.") elif self.__args.mode == "analyze": # block all filters during spatial-multiplexing if self.__args.spatial_multiplexing: self.__args.gpu_id = None self.__args.gpu_kernel = None self.__args.gpu_dispatch_id = None self.__args.nodes = None return @demarcate def list_metrics(self): if not self.__args.list_metrics: arch = self.__mspec.gpu_arch else: arch = self.__args.list_metrics if arch in self.__supported_archs.keys(): ac = schema.ArchConfig() ac.panel_configs = file_io.load_panel_configs( self.__args.config_dir.joinpath(arch) ) sys_info = self.__mspec.get_class_members().iloc[0] parser.build_dfs(archConfigs=ac, filter_metrics=[], sys_info=sys_info) for key, value in ac.metric_list.items(): prefix = "" if "." not in str(key): prefix = "" elif str(key).count(".") == 1: prefix = "\t" else: prefix = "\t\t" print(prefix + key, "->", value) sys.exit(0) else: console_error("Unsupported arch") @demarcate def run_profiler(self): self.print_graphic() self.load_soc_specs() if self.__args.list_metrics is not None: self.list_metrics() elif self.__args.name is None: sys.exit("Either --list-name or --name is required") # Deprecation warning for hardware blocks if [ name for name, type in self.__args.filter_blocks.items() if type == "hardware_block" ]: console_warning("Hardware block based filtering will be deprecated soon") # FIXME: # Changing default path should be done at the end of arg parsing stage, # unless there is a specific reason to do here. # Update default path if self.__args.path == str(Path(os.getcwd()).joinpath("workloads")): self.__args.path = str( Path(self.__args.path).joinpath(self.__args.name, self.__mspec.gpu_model) ) # instantiate desired profiler if self.__profiler_mode == "rocprofv1": from rocprof_compute_profile.profiler_rocprof_v1 import rocprof_v1_profiler profiler = rocprof_v1_profiler( self.__args, self.__profiler_mode, self.__soc[self.__mspec.gpu_arch], self.__supported_archs, ) elif self.__profiler_mode == "rocprofv2": from rocprof_compute_profile.profiler_rocprof_v2 import rocprof_v2_profiler profiler = rocprof_v2_profiler( self.__args, self.__profiler_mode, self.__soc[self.__mspec.gpu_arch], self.__supported_archs, ) elif self.__profiler_mode == "rocprofv3": from rocprof_compute_profile.profiler_rocprof_v3 import rocprof_v3_profiler profiler = rocprof_v3_profiler( self.__args, self.__profiler_mode, self.__soc[self.__mspec.gpu_arch], self.__supported_archs, ) elif self.__profiler_mode == "rocscope": from rocprof_compute_profile.profiler_rocscope import rocscope_profiler profiler = rocscope_profiler( self.__args, self.__profiler_mode, self.__soc[self.__mspec.gpu_arch], self.__supported_archs, ) else: console_error("Unsupported profiler") # ----------------------- # run profiling workflow # ----------------------- self.__soc[self.__mspec.gpu_arch].profiling_setup() # Write profiling configuration as yaml file with open(Path(self.__args.path).joinpath("profiling_config.yaml"), "w") as f: args_dict = vars(self.__args) args_dict["config_dir"] = str(args_dict["config_dir"]) yaml.dump(args_dict, f) # enable file-based logging setup_file_handler(self.__args.loglevel, self.__args.path) profiler.pre_processing() console_debug('starting "run_profiling" and about to start rocprof\'s workload') time_start_prof = time.time() profiler.run_profiling(self.__version["ver"], config.prog) time_end_prof = time.time() console_debug( 'finished "run_profiling" and finished rocprof\'s workload, time taken was {} m {} sec'.format( int((time_end_prof - time_start_prof) / 60), str((time_end_prof - time_start_prof) % 60), ) ) profiler.post_processing() time_end_post = time.time() console_debug( 'time taken for "post_processing" was {} seconds'.format( int((time_end_post - time_end_prof) / 60), str((time_end_post - time_end_prof) % 60), ) ) self.__soc[self.__mspec.gpu_arch].post_profiling() return @demarcate def update_db(self): self.print_graphic() from utils.db_connector import DatabaseConnector db_connection = DatabaseConnector(self.__args) # ----------------------- # run database workflow # ----------------------- db_connection.pre_processing() if self.__args.upload: db_connection.db_import() else: db_connection.db_remove() return @demarcate def run_analysis(self): self.print_graphic() console_log("Analysis mode = %s" % self.__analyze_mode) if self.__analyze_mode == "cli": from rocprof_compute_analyze.analysis_cli import cli_analysis analyzer = cli_analysis(self.__args, self.__supported_archs) elif self.__analyze_mode == "web_ui": from rocprof_compute_analyze.analysis_webui import webui_analysis analyzer = webui_analysis(self.__args, self.__supported_archs) else: console_error("Unsupported analysis mode -> %s" % self.__analyze_mode) # ----------------------- # run analysis workflow # ----------------------- analyzer.sanitize() # Load required SoC(s) from input for d in analyzer.get_args().path: # FIXME # sys_info = pd.read_csv(Path(d[0], "sysinfo.csv")) sysinfo_path = ( Path(d[0]) if analyzer.get_args().nodes is None and analyzer.get_args().spatial_multiplexing is not True else file_io.find_1st_sub_dir(d[0]) ) sys_info = file_io.load_sys_info(sysinfo_path.joinpath("sysinfo.csv")) sys_info = sys_info.to_dict("list") sys_info = {key: value[0] for key, value in sys_info.items()} self.load_soc_specs(sys_info) analyzer.set_soc(self.__soc) analyzer.pre_processing() analyzer.run_analysis() return