diff --git a/projects/rocprofiler-compute/src/omniperf_analyze/analysis_cli.py b/projects/rocprofiler-compute/src/omniperf_analyze/analysis_cli.py index b397cda6cd..eb255af96f 100644 --- a/projects/rocprofiler-compute/src/omniperf_analyze/analysis_cli.py +++ b/projects/rocprofiler-compute/src/omniperf_analyze/analysis_cli.py @@ -25,7 +25,7 @@ from omniperf_analyze.analysis_base import OmniAnalyze_Base from utils.utils import demarcate, error from utils import file_io, parser, tty -from utils.csv_processor import kernel_name_shortener +from utils.kernel_name_shortener import kernel_name_shortener class cli_analysis(OmniAnalyze_Base): diff --git a/projects/rocprofiler-compute/src/omniperf_profile/profiler_rocprof_v1.py b/projects/rocprofiler-compute/src/omniperf_profile/profiler_rocprof_v1.py index 350b1363f7..dd21fe5e03 100644 --- a/projects/rocprofiler-compute/src/omniperf_profile/profiler_rocprof_v1.py +++ b/projects/rocprofiler-compute/src/omniperf_profile/profiler_rocprof_v1.py @@ -27,7 +27,7 @@ import os from omniperf_profile.profiler_base import OmniProfiler_Base from utils.utils import demarcate, replace_timestamps -from utils.csv_processor import kernel_name_shortener +from utils.kernel_name_shortener import kernel_name_shortener class rocprof_v1_profiler(OmniProfiler_Base): diff --git a/projects/rocprofiler-compute/src/omniperf_profile/profiler_rocprof_v2.py b/projects/rocprofiler-compute/src/omniperf_profile/profiler_rocprof_v2.py index 12a7791d0f..ffd2349116 100644 --- a/projects/rocprofiler-compute/src/omniperf_profile/profiler_rocprof_v2.py +++ b/projects/rocprofiler-compute/src/omniperf_profile/profiler_rocprof_v2.py @@ -26,7 +26,7 @@ import os import logging from omniperf_profile.profiler_base import OmniProfiler_Base from utils.utils import demarcate -from utils.csv_processor import kernel_name_shortener +from utils.kernel_name_shortener import kernel_name_shortener class rocprof_v2_profiler(OmniProfiler_Base): def __init__(self,profiling_args,profiler_mode,soc): diff --git a/projects/rocprofiler-compute/src/utils/csv_processor.py b/projects/rocprofiler-compute/src/utils/kernel_name_shortener.py similarity index 55% rename from projects/rocprofiler-compute/src/utils/csv_processor.py rename to projects/rocprofiler-compute/src/utils/kernel_name_shortener.py index 43bb5bb49b..9f07c35943 100644 --- a/projects/rocprofiler-compute/src/utils/csv_processor.py +++ b/projects/rocprofiler-compute/src/utils/kernel_name_shortener.py @@ -22,41 +22,33 @@ # SOFTWARE. ##############################################################################el -import argparse -import collections import os -import subprocess import sys -import re -import pandas as pd -import getpass -from pymongo import MongoClient -from tqdm import tqdm -import glob -import re import logging +import glob +import re +import subprocess +import pandas as pd + +from utils.utils import error cache = dict() -supported_arch = {"gfx906": "mi50", "gfx908": "mi100", "gfx90a": "mi200"} -MAX_SERVER_SEL_DELAY = 5000 # 5 sec connection timeout - - # Note: shortener is now dependent on a rocprof install with llvm def kernel_name_shortener(workload_dir, level): def shorten_file(df, level): global cache - columnName = "" - if "KernelName" in df: - columnName = "KernelName" + column_name = "" + if "Kernel_Name" in df: + column_name = "Kernel_Name" if "Name" in df: - columnName = "Name" + column_name = "Name" - if columnName == "KernelName" or columnName == "Name": + if column_name == "Kernel_Name" or column_name == "Name": # loop through all indices for index in df.index: - original_name = df.loc[index, columnName] + original_name = df.loc[index, column_name] if original_name in cache: continue @@ -122,7 +114,7 @@ def kernel_name_shortener(workload_dir, level): if new_name == None or new_name == "": cache[original_name] = demangled_name - df[columnName] = df[columnName].map(cache) + df[column_name] = df[column_name].map(cache) return df @@ -130,12 +122,7 @@ def kernel_name_shortener(workload_dir, level): if level < 5: cpp_filt = os.path.join("/usr", "bin", "c++filt") if not os.path.isfile(cpp_filt): - logging.error( - "Error: Could not resolve c++filt in expected directory: {}".format( - cpp_filt - ) - ) - sys.exit(1) + error("Could not resolve c++filt in expected directory: %s" % cpp_filt) for fpath in glob.glob(workload_dir + "/*.csv"): try: @@ -147,116 +134,6 @@ def kernel_name_shortener(workload_dir, level): modified_df = shorten_file(orig_df, level) modified_df.to_csv(fpath, index=False) except pd.errors.EmptyDataError: - logging.debug("[profiling] Skipping shortening on empty csv " + str(fpath)) + logging.debug("[profiling] Skipping shortening on empty csv: %s" % str(fpath)) - logging.info("[profiling] KernelName shortening complete!") - - -# Verify target directory and setup connection -def parse(args, profileAndExport): - host = args.host - port = str(args.port) - username = args.username - - if profileAndExport: - workload = args.workload + "/" + args.target + "/" - else: - workload = args.workload - - # Verify directory path is valid - print("Pulling data from ", workload) - if os.path.isdir(workload): - print("The directory exists") - else: - raise argparse.ArgumentTypeError("Directory does not exist") - - sysInfoPath = workload + "/sysinfo.csv" - if os.path.isfile(sysInfoPath): - print("Found sysinfo file") - sysInfo = pd.read_csv(sysInfoPath) - # Extract SoC - arch = sysInfo["gpu_soc"][0] - soc = supported_arch[arch] - # Extract name - name = sysInfo["workload_name"][0] - else: - print("Unable to parse SoC or workload name from sysinfo.csv") - sys.exit(1) - - db = "omniperf_" + str(args.team) + "_" + str(name) + "_" + soc - - if args.password == "": - try: - password = getpass.getpass() - except Exception as error: - print("PASSWORD ERROR", error) - else: - print("Password recieved") - else: - password = args.password - - if db.find(".") != -1 or db.find("-") != -1: - raise ValueError("'-' and '.' are not permited in workload name", db) - - connectionInfo = { - "username": username, - "password": password, - "host": host, - "port": port, - "workload": workload, - "db": db, - } - - return connectionInfo - - -def convert_folder(connectionInfo): - # Test connection - connection_str = ( - "mongodb://" - + connectionInfo["username"] - + ":" - + connectionInfo["password"] - + "@" - + connectionInfo["host"] - + ":" - + connectionInfo["port"] - + "/?authSource=admin" - ) - client = MongoClient(connection_str, serverSelectionTimeoutMS=MAX_SERVER_SEL_DELAY) - try: - client.server_info() - except: - print("ERROR: Unable to connect to the server") - sys.exit(1) - - i = 0 - file = "blank" - for file in tqdm(os.listdir(connectionInfo["workload"])): - if file.endswith(".csv"): - print(connectionInfo["workload"] + "/" + file) - try: - fileName = file[0 : file.find(".")] - cmd = ( - "mongoimport --quiet --uri mongodb://{}:{}@{}:{}/{}?authSource=admin --file {} -c {} --drop --type csv --headerline" - ).format( - connectionInfo["username"], - connectionInfo["password"], - connectionInfo["host"], - connectionInfo["port"], - connectionInfo["db"], - connectionInfo["workload"] + "/" + file, - fileName, - ) - os.system(cmd) - i += 1 - except pd.errors.EmptyDataError: - print("Skipping empty csv " + file) - - mydb = client["workload_names"] - mycol = mydb["names"] - value = {"name": connectionInfo["db"]} - newValue = {"name": connectionInfo["db"]} - mycol.replace_one(value, newValue, upsert=True) - print("{} collections added.".format(i)) - print("Workload name uploaded") + logging.info("[profiling] Kernel_Name shortening complete.") \ No newline at end of file