removing calls to kernel_name_shortener in mongo

shortening now in profile

Signed-off-by: JoseSantosAMD <josantos@amd.com>
Этот коммит содержится в:
JoseSantosAMD
2023-08-08 12:06:39 -05:00
родитель add68ded67
Коммит 4e51c122d1
2 изменённых файлов: 202 добавлений и 105 удалений
+104
Просмотреть файл
@@ -53,6 +53,8 @@ from common import (
from common import getVersion
cache = dict()
################################################
# Helper Functions
################################################
@@ -260,7 +262,76 @@ def mongo_import(args, profileAndImport):
csv_converter.convert_folder(connectionInfo, Extractionlvl)
print("-- Complete! --")
def kernel_name_shortener(df, cache, level):
if level >= 5:
return df
columnName = ""
if "KernelName" in df:
columnName = "KernelName"
if "Name" in df:
columnName = "Name"
if columnName == "KernelName" or columnName == "Name":
# loop through all indices
for index in df.index:
original_name = df.loc[index, columnName]
if original_name in cache:
continue
# cache miss, add the shortened name to the dictionary
new_name = ""
matches = ""
names_and_args = re.compile(r"(?P<name>[( )A-Za-z0-9_]+)([ ,*<>()]+)(::)?")
# works for name Kokkos::namespace::init_lock_array_kernel_threadid(int) [clone .kd]
if names_and_args.search(original_name):
matches = names_and_args.findall(original_name)
else:
# Works for first case '__amd_rocclr_fillBuffer.kd'
# remove .kd and then parse through original regex
first_case = re.compile(r"([^\s]+)(.kd)")
Mod_name_and_args = re.compile(r"(?P<name>[( )A-Za-z0-9_]+)([ ,*<>()]*)")
interim_name = first_case.search(original_name).group(1)
matches = Mod_name_and_args.findall(interim_name)
current_level = 0
for name in matches:
##can cause errors if a function name or argument is equal to 'clone'
if name[0] == "clone":
continue
if len(name) == 3:
if name[2] == "::":
continue
if current_level < level:
new_name += name[0]
# closing '>' is to be taken account by the while loop
if name[1].count(">") == 0:
if current_level < level:
if not (current_level == level - 1 and name[1].count("<") > 0):
new_name += name[1]
current_level += name[1].count("<")
curr_index = 0
# cases include '>' '> >, ' have to go in depth here to not lose account of commas and current level
while name[1].count(">") > 0 and curr_index < len(name[1]):
if current_level < level:
new_name += name[1][curr_index:]
current_level -= name[1][curr_index:].count(">")
curr_index = len(name[1])
elif name[1][curr_index] == (">"):
current_level -= 1
curr_index += 1
cache[original_name] = new_name
if new_name == None or new_name == "":
cache[original_name] = original_name
df[columnName] = df[columnName].map(cache)
return df
################################################
# Roofline Helpers
################################################
@@ -768,6 +839,7 @@ def main():
# PROFILE MODE
##############
if args.mode == "profile":
Extractionlvl = 3 #args.extraction_level
print("Resolving rocprof")
resolve_rocprof()
# Cannot access parent directories
@@ -805,11 +877,43 @@ def main():
roof_setup(args, my_parser, VER)
# Generate roofline
roofline_only(args.path, args.device, args.sort, args.mem_level, args.kernel_names, args.verbose)
#demangle
for file in os.listdir(args.path):
if file.endswith(".csv"):
try:
fileName = file[0 : file.find(".")]
# Only shorten KernelNames if instructed to
if Extractionlvl < 5:
t1 = pd.read_csv(
os.listdir(args.path) + "/" + file,
on_bad_lines="skip",
engine="python",
)
t2 = kernel_name_shortener(t1, cache, level=Extractionlvl)
except pd.errors.EmptyDataError:
print("Skipping empty csv " + file)
# Profile only
else:
print("\n-------------\nProfile only\n-------------\n")
omniperf_profile(args, VER)
#demangle
for file in os.listdir(args.path):
if file.endswith(".csv"):
try:
fileName = file[0 : file.find(".")]
# Only shorten KernelNames if instructed to
if Extractionlvl < 5:
t1 = pd.read_csv(
os.listdir(args.path) + "/" + file,
on_bad_lines="skip",
engine="python",
)
t2 = kernel_name_shortener(t1, cache, level=Extractionlvl)
except pd.errors.EmptyDataError:
print("Skipping empty csv " + file)
##############
# DATABASE MODE
+98 -105
Просмотреть файл
@@ -32,89 +32,82 @@ import getpass
from pymongo import MongoClient
from tqdm import tqdm
import shutil
import subprocess
cache = dict()
# cache = dict()
supported_arch = {"gfx906": "mi50", "gfx908": "mi100", "gfx90a": "mi200"}
MAX_SERVER_SEL_DELAY = 5000 # 5 sec connection timeout
def kernel_name_shortener(df, cache, level):
if level >= 5:
return df
# def kernel_name_shortener(df, cache, level):
# if level >= 5:
# return df
columnName = ""
if "KernelName" in df:
columnName = "KernelName"
if "Name" in df:
columnName = "Name"
# columnName = ""
# if "KernelName" in df:
# columnName = "KernelName"
# if "Name" in df:
# columnName = "Name"
if columnName == "KernelName" or columnName == "Name":
# loop through all indices
for index in df.index:
original_name = df.loc[index, columnName]
if original_name in cache:
continue
# if columnName == "KernelName" or columnName == "Name":
# # loop through all indices
# for index in df.index:
# original_name = df.loc[index, columnName]
# if original_name in cache:
# continue
cmd = ["llvm-cxxfilt", original_name]
# # cache miss, add the shortened name to the dictionary
# new_name = ""
# matches = ""
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
# names_and_args = re.compile(r"(?P<name>[( )A-Za-z0-9_]+)([ ,*<>()]+)(::)?")
demangled_name, e = proc.communicate()
demangled_name = str(demangled_name, "UTF-8").strip()
# # works for name Kokkos::namespace::init_lock_array_kernel_threadid(int) [clone .kd]
# if names_and_args.search(original_name):
# matches = names_and_args.findall(original_name)
# else:
# # Works for first case '__amd_rocclr_fillBuffer.kd'
# # remove .kd and then parse through original regex
# first_case = re.compile(r"([^\s]+)(.kd)")
# Mod_name_and_args = re.compile(r"(?P<name>[( )A-Za-z0-9_]+)([ ,*<>()]*)")
# interim_name = first_case.search(original_name).group(1)
# matches = Mod_name_and_args.findall(interim_name)
# cache miss, add the shortened name to the dictionary
new_name = ""
matches = ""
# current_level = 0
# for name in matches:
# ##can cause errors if a function name or argument is equal to 'clone'
# if name[0] == "clone":
# continue
# if len(name) == 3:
# if name[2] == "::":
# continue
names_and_args = re.compile(r"(?P<name>[( )A-Za-z0-9_]+)([ ,*<>()]+)(::)?")
# if current_level < level:
# new_name += name[0]
# # closing '>' is to be taken account by the while loop
# if name[1].count(">") == 0:
# if current_level < level:
# if not (current_level == level - 1 and name[1].count("<") > 0):
# new_name += name[1]
# current_level += name[1].count("<")
# works for name Kokkos::namespace::init_lock_array_kernel_threadid(int) [clone .kd]
if names_and_args.search(demangled_name):
matches = names_and_args.findall(demangled_name)
else:
# Works for first case '__amd_rocclr_fillBuffer.kd'
cache[original_name] = new_name
if new_name == None or new_name == "":
cache[original_name] = demangled_name
continue
# curr_index = 0
# # cases include '>' '> >, ' have to go in depth here to not lose account of commas and current level
# while name[1].count(">") > 0 and curr_index < len(name[1]):
# if current_level < level:
# new_name += name[1][curr_index:]
# current_level -= name[1][curr_index:].count(">")
# curr_index = len(name[1])
# elif name[1][curr_index] == (">"):
# current_level -= 1
# curr_index += 1
current_level = 0
for name in matches:
##can cause errors if a function name or argument is equal to 'clone'
if name[0] == "clone":
continue
if len(name) == 3:
if name[2] == "::":
continue
# cache[original_name] = new_name
# if new_name == None or new_name == "":
# cache[original_name] = original_name
if current_level < level:
new_name += name[0]
# closing '>' is to be taken account by the while loop
if name[1].count(">") == 0:
if current_level < level:
if not (current_level == level - 1 and name[1].count("<") > 0):
new_name += name[1]
current_level += name[1].count("<")
# df[columnName] = df[columnName].map(cache)
curr_index = 0
# cases include '>' '> >, ' have to go in depth here to not lose account of commas and current level
while name[1].count(">") > 0 and curr_index < len(name[1]):
if current_level < level:
new_name += name[1][curr_index:]
current_level -= name[1][curr_index:].count(">")
curr_index = len(name[1])
elif name[1][curr_index] == (">"):
current_level -= 1
curr_index += 1
cache[original_name] = new_name
if new_name == None or new_name == "":
cache[original_name] = demangled_name
df[columnName] = df[columnName].map(cache)
return df
# return df
# Verify target directory and setup connection
@@ -151,12 +144,12 @@ def parse(args, profileAndExport):
db = "omniperf_" + str(args.team) + "_" + str(name) + "_" + soc
if Extractionlvl >= 5:
print("KernelName shortening disabled")
else:
print("KernelName shortening enabled")
# if Extractionlvl >= 5:
# print("KernelName shortening disabled")
# else:
# print("KernelName shortening enabled")
print("Kernel name verbose level:", Extractionlvl)
# print("Kernel name verbose level:", Extractionlvl)
if args.password == "":
try:
@@ -203,14 +196,14 @@ def convert_folder(connectionInfo, Extractionlvl):
print("ERROR: Unable to connect to the server")
sys.exit(1)
# Set up directories
if Extractionlvl < 5:
newfilepath = connectionInfo["workload"]
newfilepath_h = newfilepath + "/renamedFiles/"
if not os.path.exists(newfilepath_h):
os.mkdir(newfilepath_h)
newfilepath = newfilepath_h + connectionInfo["db"] + "/"
if not os.path.exists(newfilepath):
os.mkdir(newfilepath)
# if Extractionlvl < 5:
# newfilepath = connectionInfo["workload"]
# newfilepath_h = newfilepath + "/renamedFiles/"
# if not os.path.exists(newfilepath_h):
# os.mkdir(newfilepath_h)
# newfilepath = newfilepath_h + connectionInfo["db"] + "/"
# if not os.path.exists(newfilepath):
# os.mkdir(newfilepath)
# Upload files
i = 0
file = "blank"
@@ -220,30 +213,30 @@ def convert_folder(connectionInfo, Extractionlvl):
try:
fileName = file[0 : file.find(".")]
# Only shorten KernelNames if instructed to
if Extractionlvl < 5:
t1 = pd.read_csv(
connectionInfo["workload"] + "/" + file,
on_bad_lines="skip",
engine="python",
)
# if Extractionlvl < 5:
# t1 = pd.read_csv(
# connectionInfo["workload"] + "/" + file,
# on_bad_lines="skip",
# engine="python",
# )
t2 = kernel_name_shortener(t1, cache, level=Extractionlvl)
df_saved_file = t2.to_csv(newfilepath + file)
# t2 = kernel_name_shortener(t1, cache, level=Extractionlvl)
# df_saved_file = t2.to_csv(newfilepath + file)
cmd = (
"mongoimport --quiet --uri mongodb://{}:{}@{}:{}/{}?authSource=admin --file {} -c {} --drop --type csv --headerline"
).format(
connectionInfo["username"],
connectionInfo["password"],
connectionInfo["host"],
connectionInfo["port"],
connectionInfo["db"],
newfilepath + file,
fileName,
)
os.system(cmd)
else:
cmd = (
# cmd = (
# "mongoimport --quiet --uri mongodb://{}:{}@{}:{}/{}?authSource=admin --file {} -c {} --drop --type csv --headerline"
# ).format(
# connectionInfo["username"],
# connectionInfo["password"],
# connectionInfo["host"],
# connectionInfo["port"],
# connectionInfo["db"],
# newfilepath + file,
# fileName,
# )
# os.system(cmd)
# else:
cmd = (
"mongoimport --quiet --uri mongodb://{}:{}@{}:{}/{}?authSource=admin --file {} -c {} --drop --type csv --headerline"
).format(
connectionInfo["username"],
@@ -254,7 +247,7 @@ def convert_folder(connectionInfo, Extractionlvl):
connectionInfo["workload"] + "/" + file,
fileName,
)
os.system(cmd)
os.system(cmd)
i += 1
except pd.errors.EmptyDataError:
print("Skipping empty csv " + file)
@@ -265,7 +258,7 @@ def convert_folder(connectionInfo, Extractionlvl):
newValue = {"name": connectionInfo["db"]}
mycol.replace_one(value, newValue, upsert=True)
# Remove tmp directory if we shortened KernelNames
if Extractionlvl < 5:
shutil.rmtree(newfilepath_h)
# if Extractionlvl < 5:
# shutil.rmtree(newfilepath_h)
print("{} collections added.".format(i))
print("Workload name uploaded")