From 4e51c122d17eb450e48699d99bde712fd5bcfc32 Mon Sep 17 00:00:00 2001 From: JoseSantosAMD Date: Tue, 8 Aug 2023 12:06:39 -0500 Subject: [PATCH] removing calls to kernel_name_shortener in mongo shortening now in profile Signed-off-by: JoseSantosAMD --- src/omniperf | 104 +++++++++++++++++++ src/utils/csv_converter.py | 203 ++++++++++++++++++------------------- 2 files changed, 202 insertions(+), 105 deletions(-) diff --git a/src/omniperf b/src/omniperf index 3b38e419b1..7db92004d7 100755 --- a/src/omniperf +++ b/src/omniperf @@ -53,6 +53,8 @@ from common import ( from common import getVersion +cache = dict() + ################################################ # Helper Functions ################################################ @@ -260,7 +262,76 @@ def mongo_import(args, profileAndImport): csv_converter.convert_folder(connectionInfo, Extractionlvl) print("-- Complete! --") +def kernel_name_shortener(df, cache, level): + if level >= 5: + return df + columnName = "" + if "KernelName" in df: + columnName = "KernelName" + if "Name" in df: + columnName = "Name" + + if columnName == "KernelName" or columnName == "Name": + # loop through all indices + for index in df.index: + original_name = df.loc[index, columnName] + if original_name in cache: + continue + + # cache miss, add the shortened name to the dictionary + new_name = "" + matches = "" + + names_and_args = re.compile(r"(?P[( )A-Za-z0-9_]+)([ ,*<>()]+)(::)?") + + # works for name Kokkos::namespace::init_lock_array_kernel_threadid(int) [clone .kd] + if names_and_args.search(original_name): + matches = names_and_args.findall(original_name) + else: + # Works for first case '__amd_rocclr_fillBuffer.kd' + # remove .kd and then parse through original regex + first_case = re.compile(r"([^\s]+)(.kd)") + Mod_name_and_args = re.compile(r"(?P[( )A-Za-z0-9_]+)([ ,*<>()]*)") + interim_name = first_case.search(original_name).group(1) + matches = Mod_name_and_args.findall(interim_name) + + current_level = 0 + for name in matches: + ##can cause errors if a function name or argument is equal to 'clone' + if name[0] == "clone": + continue + if len(name) == 3: + if name[2] == "::": + continue + + if current_level < level: + new_name += name[0] + # closing '>' is to be taken account by the while loop + if name[1].count(">") == 0: + if current_level < level: + if not (current_level == level - 1 and name[1].count("<") > 0): + new_name += name[1] + current_level += name[1].count("<") + + curr_index = 0 + # cases include '>' '> >, ' have to go in depth here to not lose account of commas and current level + while name[1].count(">") > 0 and curr_index < len(name[1]): + if current_level < level: + new_name += name[1][curr_index:] + current_level -= name[1][curr_index:].count(">") + curr_index = len(name[1]) + elif name[1][curr_index] == (">"): + current_level -= 1 + curr_index += 1 + + cache[original_name] = new_name + if new_name == None or new_name == "": + cache[original_name] = original_name + + df[columnName] = df[columnName].map(cache) + + return df ################################################ # Roofline Helpers ################################################ @@ -768,6 +839,7 @@ def main(): # PROFILE MODE ############## if args.mode == "profile": + Extractionlvl = 3 #args.extraction_level print("Resolving rocprof") resolve_rocprof() # Cannot access parent directories @@ -805,11 +877,43 @@ def main(): roof_setup(args, my_parser, VER) # Generate roofline roofline_only(args.path, args.device, args.sort, args.mem_level, args.kernel_names, args.verbose) + #demangle + for file in os.listdir(args.path): + if file.endswith(".csv"): + try: + fileName = file[0 : file.find(".")] + # Only shorten KernelNames if instructed to + if Extractionlvl < 5: + t1 = pd.read_csv( + os.listdir(args.path) + "/" + file, + on_bad_lines="skip", + engine="python", + ) + + t2 = kernel_name_shortener(t1, cache, level=Extractionlvl) + except pd.errors.EmptyDataError: + print("Skipping empty csv " + file) # Profile only else: print("\n-------------\nProfile only\n-------------\n") omniperf_profile(args, VER) + #demangle + for file in os.listdir(args.path): + if file.endswith(".csv"): + try: + fileName = file[0 : file.find(".")] + # Only shorten KernelNames if instructed to + if Extractionlvl < 5: + t1 = pd.read_csv( + os.listdir(args.path) + "/" + file, + on_bad_lines="skip", + engine="python", + ) + + t2 = kernel_name_shortener(t1, cache, level=Extractionlvl) + except pd.errors.EmptyDataError: + print("Skipping empty csv " + file) ############## # DATABASE MODE diff --git a/src/utils/csv_converter.py b/src/utils/csv_converter.py index 4f28d5388e..9709ab9d87 100644 --- a/src/utils/csv_converter.py +++ b/src/utils/csv_converter.py @@ -32,89 +32,82 @@ import getpass from pymongo import MongoClient from tqdm import tqdm import shutil -import subprocess -cache = dict() +# cache = dict() supported_arch = {"gfx906": "mi50", "gfx908": "mi100", "gfx90a": "mi200"} MAX_SERVER_SEL_DELAY = 5000 # 5 sec connection timeout -def kernel_name_shortener(df, cache, level): - if level >= 5: - return df +# def kernel_name_shortener(df, cache, level): +# if level >= 5: +# return df - columnName = "" - if "KernelName" in df: - columnName = "KernelName" - if "Name" in df: - columnName = "Name" +# columnName = "" +# if "KernelName" in df: +# columnName = "KernelName" +# if "Name" in df: +# columnName = "Name" - if columnName == "KernelName" or columnName == "Name": - # loop through all indices - for index in df.index: - original_name = df.loc[index, columnName] - if original_name in cache: - continue +# if columnName == "KernelName" or columnName == "Name": +# # loop through all indices +# for index in df.index: +# original_name = df.loc[index, columnName] +# if original_name in cache: +# continue - cmd = ["llvm-cxxfilt", original_name] +# # cache miss, add the shortened name to the dictionary +# new_name = "" +# matches = "" - proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) +# names_and_args = re.compile(r"(?P[( )A-Za-z0-9_]+)([ ,*<>()]+)(::)?") - demangled_name, e = proc.communicate() - demangled_name = str(demangled_name, "UTF-8").strip() +# # works for name Kokkos::namespace::init_lock_array_kernel_threadid(int) [clone .kd] +# if names_and_args.search(original_name): +# matches = names_and_args.findall(original_name) +# else: +# # Works for first case '__amd_rocclr_fillBuffer.kd' +# # remove .kd and then parse through original regex +# first_case = re.compile(r"([^\s]+)(.kd)") +# Mod_name_and_args = re.compile(r"(?P[( )A-Za-z0-9_]+)([ ,*<>()]*)") +# interim_name = first_case.search(original_name).group(1) +# matches = Mod_name_and_args.findall(interim_name) - # cache miss, add the shortened name to the dictionary - new_name = "" - matches = "" +# current_level = 0 +# for name in matches: +# ##can cause errors if a function name or argument is equal to 'clone' +# if name[0] == "clone": +# continue +# if len(name) == 3: +# if name[2] == "::": +# continue - names_and_args = re.compile(r"(?P[( )A-Za-z0-9_]+)([ ,*<>()]+)(::)?") +# if current_level < level: +# new_name += name[0] +# # closing '>' is to be taken account by the while loop +# if name[1].count(">") == 0: +# if current_level < level: +# if not (current_level == level - 1 and name[1].count("<") > 0): +# new_name += name[1] +# current_level += name[1].count("<") - # works for name Kokkos::namespace::init_lock_array_kernel_threadid(int) [clone .kd] - if names_and_args.search(demangled_name): - matches = names_and_args.findall(demangled_name) - else: - # Works for first case '__amd_rocclr_fillBuffer.kd' - cache[original_name] = new_name - if new_name == None or new_name == "": - cache[original_name] = demangled_name - continue +# curr_index = 0 +# # cases include '>' '> >, ' have to go in depth here to not lose account of commas and current level +# while name[1].count(">") > 0 and curr_index < len(name[1]): +# if current_level < level: +# new_name += name[1][curr_index:] +# current_level -= name[1][curr_index:].count(">") +# curr_index = len(name[1]) +# elif name[1][curr_index] == (">"): +# current_level -= 1 +# curr_index += 1 - current_level = 0 - for name in matches: - ##can cause errors if a function name or argument is equal to 'clone' - if name[0] == "clone": - continue - if len(name) == 3: - if name[2] == "::": - continue +# cache[original_name] = new_name +# if new_name == None or new_name == "": +# cache[original_name] = original_name - if current_level < level: - new_name += name[0] - # closing '>' is to be taken account by the while loop - if name[1].count(">") == 0: - if current_level < level: - if not (current_level == level - 1 and name[1].count("<") > 0): - new_name += name[1] - current_level += name[1].count("<") +# df[columnName] = df[columnName].map(cache) - curr_index = 0 - # cases include '>' '> >, ' have to go in depth here to not lose account of commas and current level - while name[1].count(">") > 0 and curr_index < len(name[1]): - if current_level < level: - new_name += name[1][curr_index:] - current_level -= name[1][curr_index:].count(">") - curr_index = len(name[1]) - elif name[1][curr_index] == (">"): - current_level -= 1 - curr_index += 1 - - cache[original_name] = new_name - if new_name == None or new_name == "": - cache[original_name] = demangled_name - - df[columnName] = df[columnName].map(cache) - - return df +# return df # Verify target directory and setup connection @@ -151,12 +144,12 @@ def parse(args, profileAndExport): db = "omniperf_" + str(args.team) + "_" + str(name) + "_" + soc - if Extractionlvl >= 5: - print("KernelName shortening disabled") - else: - print("KernelName shortening enabled") + # if Extractionlvl >= 5: + # print("KernelName shortening disabled") + # else: + # print("KernelName shortening enabled") - print("Kernel name verbose level:", Extractionlvl) + # print("Kernel name verbose level:", Extractionlvl) if args.password == "": try: @@ -203,14 +196,14 @@ def convert_folder(connectionInfo, Extractionlvl): print("ERROR: Unable to connect to the server") sys.exit(1) # Set up directories - if Extractionlvl < 5: - newfilepath = connectionInfo["workload"] - newfilepath_h = newfilepath + "/renamedFiles/" - if not os.path.exists(newfilepath_h): - os.mkdir(newfilepath_h) - newfilepath = newfilepath_h + connectionInfo["db"] + "/" - if not os.path.exists(newfilepath): - os.mkdir(newfilepath) + # if Extractionlvl < 5: + # newfilepath = connectionInfo["workload"] + # newfilepath_h = newfilepath + "/renamedFiles/" + # if not os.path.exists(newfilepath_h): + # os.mkdir(newfilepath_h) + # newfilepath = newfilepath_h + connectionInfo["db"] + "/" + # if not os.path.exists(newfilepath): + # os.mkdir(newfilepath) # Upload files i = 0 file = "blank" @@ -220,30 +213,30 @@ def convert_folder(connectionInfo, Extractionlvl): try: fileName = file[0 : file.find(".")] # Only shorten KernelNames if instructed to - if Extractionlvl < 5: - t1 = pd.read_csv( - connectionInfo["workload"] + "/" + file, - on_bad_lines="skip", - engine="python", - ) + # if Extractionlvl < 5: + # t1 = pd.read_csv( + # connectionInfo["workload"] + "/" + file, + # on_bad_lines="skip", + # engine="python", + # ) - t2 = kernel_name_shortener(t1, cache, level=Extractionlvl) - df_saved_file = t2.to_csv(newfilepath + file) + # t2 = kernel_name_shortener(t1, cache, level=Extractionlvl) + # df_saved_file = t2.to_csv(newfilepath + file) - cmd = ( - "mongoimport --quiet --uri mongodb://{}:{}@{}:{}/{}?authSource=admin --file {} -c {} --drop --type csv --headerline" - ).format( - connectionInfo["username"], - connectionInfo["password"], - connectionInfo["host"], - connectionInfo["port"], - connectionInfo["db"], - newfilepath + file, - fileName, - ) - os.system(cmd) - else: - cmd = ( + # cmd = ( + # "mongoimport --quiet --uri mongodb://{}:{}@{}:{}/{}?authSource=admin --file {} -c {} --drop --type csv --headerline" + # ).format( + # connectionInfo["username"], + # connectionInfo["password"], + # connectionInfo["host"], + # connectionInfo["port"], + # connectionInfo["db"], + # newfilepath + file, + # fileName, + # ) + # os.system(cmd) + # else: + cmd = ( "mongoimport --quiet --uri mongodb://{}:{}@{}:{}/{}?authSource=admin --file {} -c {} --drop --type csv --headerline" ).format( connectionInfo["username"], @@ -254,7 +247,7 @@ def convert_folder(connectionInfo, Extractionlvl): connectionInfo["workload"] + "/" + file, fileName, ) - os.system(cmd) + os.system(cmd) i += 1 except pd.errors.EmptyDataError: print("Skipping empty csv " + file) @@ -265,7 +258,7 @@ def convert_folder(connectionInfo, Extractionlvl): newValue = {"name": connectionInfo["db"]} mycol.replace_one(value, newValue, upsert=True) # Remove tmp directory if we shortened KernelNames - if Extractionlvl < 5: - shutil.rmtree(newfilepath_h) + # if Extractionlvl < 5: + # shutil.rmtree(newfilepath_h) print("{} collections added.".format(i)) print("Workload name uploaded")