removing calls to kernel_name_shortener in mongo

shortening now in profile Signed-off-by: JoseSantosAMD <josantos@amd.com>
2023-08-08 12:06:39 -05:00
@@ -53,6 +53,8 @@ from common import (

 from common import getVersion

+cache = dict()
+
 ################################################
 # Helper Functions
 ################################################
@@ -260,7 +262,76 @@ def mongo_import(args, profileAndImport):
    csv_converter.convert_folder(connectionInfo, Extractionlvl)
    print("-- Complete! --")

+def kernel_name_shortener(df, cache, level):
+    if level >= 5:
+        return df

+    columnName = ""
+    if "KernelName" in df:
+        columnName = "KernelName"
+    if "Name" in df:
+        columnName = "Name"
+
+    if columnName == "KernelName" or columnName == "Name":
+        # loop through all indices
+        for index in df.index:
+            original_name = df.loc[index, columnName]
+            if original_name in cache:
+                continue
+
+            # cache miss, add the shortened name to the dictionary
+            new_name = ""
+            matches = ""
+
+            names_and_args = re.compile(r"(?P<name>[( )A-Za-z0-9_]+)([ ,*<>()]+)(::)?")
+
+            # works for name Kokkos::namespace::init_lock_array_kernel_threadid(int) [clone .kd]
+            if names_and_args.search(original_name):
+                matches = names_and_args.findall(original_name)
+            else:
+                # Works for first case  '__amd_rocclr_fillBuffer.kd'
+                # remove .kd and then parse through original regex
+                first_case = re.compile(r"([^\s]+)(.kd)")
+                Mod_name_and_args = re.compile(r"(?P<name>[( )A-Za-z0-9_]+)([ ,*<>()]*)")
+                interim_name = first_case.search(original_name).group(1)
+                matches = Mod_name_and_args.findall(interim_name)
+
+            current_level = 0
+            for name in matches:
+                ##can cause errors if a function name or argument is equal to 'clone'
+                if name[0] == "clone":
+                    continue
+                if len(name) == 3:
+                    if name[2] == "::":
+                        continue
+
+                if current_level < level:
+                    new_name += name[0]
+                # closing '>' is to be taken account by the while loop
+                if name[1].count(">") == 0:
+                    if current_level < level:
+                        if not (current_level == level - 1 and name[1].count("<") > 0):
+                            new_name += name[1]
+                    current_level += name[1].count("<")
+
+                curr_index = 0
+                # cases include '>'  '> >, ' have to go in depth here to not lose account of commas and current level
+                while name[1].count(">") > 0 and curr_index < len(name[1]):
+                    if current_level < level:
+                        new_name += name[1][curr_index:]
+                        current_level -= name[1][curr_index:].count(">")
+                        curr_index = len(name[1])
+                    elif name[1][curr_index] == (">"):
+                        current_level -= 1
+                    curr_index += 1
+
+            cache[original_name] = new_name
+            if new_name == None or new_name == "":
+                cache[original_name] = original_name
+
+        df[columnName] = df[columnName].map(cache)
+
+    return df
 ################################################
 # Roofline Helpers
 ################################################
@@ -768,6 +839,7 @@ def main():
    # PROFILE MODE
    ##############
    if args.mode == "profile":
+        Extractionlvl = 3 #args.extraction_level
        print("Resolving rocprof")
        resolve_rocprof()
        # Cannot access parent directories
@@ -805,11 +877,43 @@ def main():
            roof_setup(args, my_parser, VER)
            # Generate roofline
            roofline_only(args.path, args.device, args.sort, args.mem_level, args.kernel_names, args.verbose)
+            #demangle
+            for file in os.listdir(args.path):
+                if file.endswith(".csv"):
+                    try:
+                        fileName = file[0 : file.find(".")]
+                        # Only shorten KernelNames if instructed to
+                        if Extractionlvl < 5:
+                            t1 = pd.read_csv(
+                                os.listdir(args.path) + "/" + file,
+                                on_bad_lines="skip",
+                                engine="python",
+                            )
+
+                            t2 = kernel_name_shortener(t1, cache, level=Extractionlvl)
+                    except pd.errors.EmptyDataError:
+                        print("Skipping empty csv " + file)

        # Profile only
        else:
            print("\n-------------\nProfile only\n-------------\n")
            omniperf_profile(args, VER)
+            #demangle
+            for file in os.listdir(args.path):
+                if file.endswith(".csv"):
+                    try:
+                        fileName = file[0 : file.find(".")]
+                        # Only shorten KernelNames if instructed to
+                        if Extractionlvl < 5:
+                            t1 = pd.read_csv(
+                                os.listdir(args.path) + "/" + file,
+                                on_bad_lines="skip",
+                                engine="python",
+                            )
+
+                            t2 = kernel_name_shortener(t1, cache, level=Extractionlvl)
+                    except pd.errors.EmptyDataError:
+                        print("Skipping empty csv " + file)

    ##############
    # DATABASE MODE
@@ -32,89 +32,82 @@ import getpass
 from pymongo import MongoClient
 from tqdm import tqdm
 import shutil
-import subprocess

-cache = dict()
+# cache = dict()
 supported_arch = {"gfx906": "mi50", "gfx908": "mi100", "gfx90a": "mi200"}
 MAX_SERVER_SEL_DELAY = 5000  # 5 sec connection timeout


-def kernel_name_shortener(df, cache, level):
-    if level >= 5:
-        return df
+# def kernel_name_shortener(df, cache, level):
+#     if level >= 5:
+#         return df

-    columnName = ""
-    if "KernelName" in df:
-        columnName = "KernelName"
-    if "Name" in df:
-        columnName = "Name"
+#     columnName = ""
+#     if "KernelName" in df:
+#         columnName = "KernelName"
+#     if "Name" in df:
+#         columnName = "Name"

-    if columnName == "KernelName" or columnName == "Name":
-        # loop through all indices
-        for index in df.index:
-            original_name = df.loc[index, columnName]
-            if original_name in cache:
-                continue
+#     if columnName == "KernelName" or columnName == "Name":
+#         # loop through all indices
+#         for index in df.index:
+#             original_name = df.loc[index, columnName]
+#             if original_name in cache:
+#                 continue

-            cmd = ["llvm-cxxfilt", original_name]
+#             # cache miss, add the shortened name to the dictionary
+#             new_name = ""
+#             matches = ""

-            proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+#             names_and_args = re.compile(r"(?P<name>[( )A-Za-z0-9_]+)([ ,*<>()]+)(::)?")

-            demangled_name, e = proc.communicate()
-            demangled_name = str(demangled_name, "UTF-8").strip()
+#             # works for name Kokkos::namespace::init_lock_array_kernel_threadid(int) [clone .kd]
+#             if names_and_args.search(original_name):
+#                 matches = names_and_args.findall(original_name)
+#             else:
+#                 # Works for first case  '__amd_rocclr_fillBuffer.kd'
+#                 # remove .kd and then parse through original regex
+#                 first_case = re.compile(r"([^\s]+)(.kd)")
+#                 Mod_name_and_args = re.compile(r"(?P<name>[( )A-Za-z0-9_]+)([ ,*<>()]*)")
+#                 interim_name = first_case.search(original_name).group(1)
+#                 matches = Mod_name_and_args.findall(interim_name)

-            # cache miss, add the shortened name to the dictionary
-            new_name = ""
-            matches = ""
+#             current_level = 0
+#             for name in matches:
+#                 ##can cause errors if a function name or argument is equal to 'clone'
+#                 if name[0] == "clone":
+#                     continue
+#                 if len(name) == 3:
+#                     if name[2] == "::":
+#                         continue

-            names_and_args = re.compile(r"(?P<name>[( )A-Za-z0-9_]+)([ ,*<>()]+)(::)?")
+#                 if current_level < level:
+#                     new_name += name[0]
+#                 # closing '>' is to be taken account by the while loop
+#                 if name[1].count(">") == 0:
+#                     if current_level < level:
+#                         if not (current_level == level - 1 and name[1].count("<") > 0):
+#                             new_name += name[1]
+#                     current_level += name[1].count("<")

-            # works for name Kokkos::namespace::init_lock_array_kernel_threadid(int) [clone .kd]
-            if names_and_args.search(demangled_name):
-                matches = names_and_args.findall(demangled_name)
-            else:
-                # Works for first case  '__amd_rocclr_fillBuffer.kd'
-                cache[original_name] = new_name
-                if new_name == None or new_name == "":
-                    cache[original_name] = demangled_name
-                continue
+#                 curr_index = 0
+#                 # cases include '>'  '> >, ' have to go in depth here to not lose account of commas and current level
+#                 while name[1].count(">") > 0 and curr_index < len(name[1]):
+#                     if current_level < level:
+#                         new_name += name[1][curr_index:]
+#                         current_level -= name[1][curr_index:].count(">")
+#                         curr_index = len(name[1])
+#                     elif name[1][curr_index] == (">"):
+#                         current_level -= 1
+#                     curr_index += 1

-            current_level = 0
-            for name in matches:
-                ##can cause errors if a function name or argument is equal to 'clone'
-                if name[0] == "clone":
-                    continue
-                if len(name) == 3:
-                    if name[2] == "::":
-                        continue
+#             cache[original_name] = new_name
+#             if new_name == None or new_name == "":
+#                 cache[original_name] = original_name

-                if current_level < level:
-                    new_name += name[0]
-                # closing '>' is to be taken account by the while loop
-                if name[1].count(">") == 0:
-                    if current_level < level:
-                        if not (current_level == level - 1 and name[1].count("<") > 0):
-                            new_name += name[1]
-                    current_level += name[1].count("<")
+#         df[columnName] = df[columnName].map(cache)

-                curr_index = 0
-                # cases include '>'  '> >, ' have to go in depth here to not lose account of commas and current level
-                while name[1].count(">") > 0 and curr_index < len(name[1]):
-                    if current_level < level:
-                        new_name += name[1][curr_index:]
-                        current_level -= name[1][curr_index:].count(">")
-                        curr_index = len(name[1])
-                    elif name[1][curr_index] == (">"):
-                        current_level -= 1
-                    curr_index += 1
-
-            cache[original_name] = new_name
-            if new_name == None or new_name == "":
-                cache[original_name] = demangled_name
-
-        df[columnName] = df[columnName].map(cache)
-
-    return df
+#     return df


 # Verify target directory and setup connection
@@ -151,12 +144,12 @@ def parse(args, profileAndExport):

    db = "omniperf_" + str(args.team) + "_" + str(name) + "_" + soc

-    if Extractionlvl >= 5:
-        print("KernelName shortening disabled")
-    else:
-        print("KernelName shortening enabled")
+    # if Extractionlvl >= 5:
+    #     print("KernelName shortening disabled")
+    # else:
+    #     print("KernelName shortening enabled")

-    print("Kernel name verbose level:", Extractionlvl)
+    # print("Kernel name verbose level:", Extractionlvl)

    if args.password == "":
        try:
@@ -203,14 +196,14 @@ def convert_folder(connectionInfo, Extractionlvl):
        print("ERROR: Unable to connect to the server")
        sys.exit(1)
    # Set up directories
-    if Extractionlvl < 5:
-        newfilepath = connectionInfo["workload"]
-        newfilepath_h = newfilepath + "/renamedFiles/"
-        if not os.path.exists(newfilepath_h):
-            os.mkdir(newfilepath_h)
-        newfilepath = newfilepath_h + connectionInfo["db"] + "/"
-        if not os.path.exists(newfilepath):
-            os.mkdir(newfilepath)
+    # if Extractionlvl < 5:
+    #     newfilepath = connectionInfo["workload"]
+    #     newfilepath_h = newfilepath + "/renamedFiles/"
+    #     if not os.path.exists(newfilepath_h):
+    #         os.mkdir(newfilepath_h)
+    #     newfilepath = newfilepath_h + connectionInfo["db"] + "/"
+    #     if not os.path.exists(newfilepath):
+    #         os.mkdir(newfilepath)
    # Upload files
    i = 0
    file = "blank"
@@ -220,30 +213,30 @@ def convert_folder(connectionInfo, Extractionlvl):
            try:
                fileName = file[0 : file.find(".")]
                # Only shorten KernelNames if instructed to
-                if Extractionlvl < 5:
-                    t1 = pd.read_csv(
-                        connectionInfo["workload"] + "/" + file,
-                        on_bad_lines="skip",
-                        engine="python",
-                    )
+                # if Extractionlvl < 5:
+                #     t1 = pd.read_csv(
+                #         connectionInfo["workload"] + "/" + file,
+                #         on_bad_lines="skip",
+                #         engine="python",
+                #     )

-                    t2 = kernel_name_shortener(t1, cache, level=Extractionlvl)
-                    df_saved_file = t2.to_csv(newfilepath + file)
+                #     t2 = kernel_name_shortener(t1, cache, level=Extractionlvl)
+                #     df_saved_file = t2.to_csv(newfilepath + file)

-                    cmd = (
-                        "mongoimport --quiet --uri mongodb://{}:{}@{}:{}/{}?authSource=admin --file {} -c {} --drop --type csv --headerline"
-                    ).format(
-                        connectionInfo["username"],
-                        connectionInfo["password"],
-                        connectionInfo["host"],
-                        connectionInfo["port"],
-                        connectionInfo["db"],
-                        newfilepath + file,
-                        fileName,
-                    )
-                    os.system(cmd)
-                else:
-                    cmd = (
+                #     cmd = (
+                #         "mongoimport --quiet --uri mongodb://{}:{}@{}:{}/{}?authSource=admin --file {} -c {} --drop --type csv --headerline"
+                #     ).format(
+                #         connectionInfo["username"],
+                #         connectionInfo["password"],
+                #         connectionInfo["host"],
+                #         connectionInfo["port"],
+                #         connectionInfo["db"],
+                #         newfilepath + file,
+                #         fileName,
+                #     )
+                #     os.system(cmd)
+                # else:
+                cmd = (
                        "mongoimport --quiet --uri mongodb://{}:{}@{}:{}/{}?authSource=admin --file {} -c {} --drop --type csv --headerline"
                    ).format(
                        connectionInfo["username"],
@@ -254,7 +247,7 @@ def convert_folder(connectionInfo, Extractionlvl):
                        connectionInfo["workload"] + "/" + file,
                        fileName,
                    )
-                    os.system(cmd)
+                os.system(cmd)
                i += 1
            except pd.errors.EmptyDataError:
                print("Skipping empty csv " + file)
@@ -265,7 +258,7 @@ def convert_folder(connectionInfo, Extractionlvl):
    newValue = {"name": connectionInfo["db"]}
    mycol.replace_one(value, newValue, upsert=True)
    # Remove tmp directory if we shortened KernelNames
-    if Extractionlvl < 5:
-        shutil.rmtree(newfilepath_h)
+    # if Extractionlvl < 5:
+    #     shutil.rmtree(newfilepath_h)
    print("{} collections added.".format(i))
    print("Workload name uploaded")