c77596b703
* SWDEV-499989: Add script to convert rocprofv3 counter collection output format to that of v1 * Add logging and argparsing * Dropping duplicated counters in pmc multiple lines * Adding test for conversion * moving conversion script to test files * copy conversion script from scripts folder
157 行
3.8 KiB
Python
実行ファイル
157 行
3.8 KiB
Python
実行ファイル
#!/usr/bin/env python3
|
|
import os
|
|
import pandas as pd
|
|
import argparse
|
|
import logging
|
|
|
|
|
|
def read_csv(file_path):
|
|
df = pd.DataFrame()
|
|
try:
|
|
df = pd.read_csv(file_path)
|
|
except Exception as e:
|
|
logging.info(f"Error reading {file_path}: {e}")
|
|
raise
|
|
return df
|
|
|
|
|
|
def get_counter_collection_files(root_path):
|
|
file_paths = []
|
|
for root, _, files in os.walk(root_path):
|
|
if "pmc_" in root:
|
|
for file in files:
|
|
if file.endswith("counter_collection.csv"):
|
|
file_path = os.path.join(root, file)
|
|
file_paths.append(file_path)
|
|
return file_paths
|
|
|
|
|
|
def get_combined_df(args):
|
|
files_list = []
|
|
for input in args.input:
|
|
if os.path.isfile(input):
|
|
files_list.append(input)
|
|
elif os.path.isdir(input):
|
|
files_list.extend(get_counter_collection_files(input))
|
|
if not files_list:
|
|
raise ValueError("Valid Input files not found")
|
|
logging.info(f"Processing files: {files_list}")
|
|
combined_df = pd.DataFrame()
|
|
for file in files_list:
|
|
combined_df = pd.concat([combined_df, read_csv(file)], ignore_index=True)
|
|
return combined_df
|
|
|
|
|
|
def write_to_file(df, args):
|
|
logging.info(f"Saving output file to : {args.output}")
|
|
directory, file_path = os.path.split(args.output)
|
|
if directory:
|
|
os.makedirs(directory, exist_ok=True)
|
|
df.to_csv(args.output, index=False)
|
|
|
|
|
|
def main(args):
|
|
logging.basicConfig(level=args.loglevel)
|
|
input_df = get_combined_df(args)
|
|
# Validate
|
|
columns = [
|
|
"Correlation_Id",
|
|
"Dispatch_Id",
|
|
"Agent_Id",
|
|
"Queue_Id",
|
|
"Process_Id",
|
|
"Thread_Id",
|
|
"Grid_Size",
|
|
"Kernel_Id",
|
|
"Kernel_Name",
|
|
"Workgroup_Size",
|
|
"LDS_Block_Size",
|
|
"Scratch_Size",
|
|
"VGPR_Count",
|
|
"SGPR_Count",
|
|
"Counter_Name",
|
|
"Counter_Value",
|
|
"Start_Timestamp",
|
|
"End_Timestamp",
|
|
]
|
|
for col in input_df.columns:
|
|
if col not in columns:
|
|
logging.debug(f"Unexpected column {col} found in rocprofv3 input file")
|
|
|
|
non_index_columns = [
|
|
"Correlation_Id",
|
|
"Start_Timestamp",
|
|
"End_Timestamp",
|
|
"Process_Id",
|
|
"Thread_Id",
|
|
"Kernel_Id",
|
|
]
|
|
|
|
# Convert
|
|
indexes = [
|
|
"Dispatch_Id",
|
|
"Agent_Id",
|
|
"Grid_Size",
|
|
"Kernel_Name",
|
|
"LDS_Block_Size",
|
|
"Queue_Id",
|
|
"SGPR_Count",
|
|
"Scratch_Size",
|
|
"VGPR_Count",
|
|
"Workgroup_Size",
|
|
]
|
|
|
|
# Drop duplicate counters in multiple PMC lines
|
|
input_df.drop_duplicates(
|
|
subset=indexes + ["Counter_Name"], keep="first", inplace=True
|
|
)
|
|
|
|
pivoted_data = input_df.pivot_table(
|
|
index=indexes, columns="Counter_Name", values="Counter_Value", aggfunc="sum"
|
|
).reset_index()
|
|
|
|
# Save
|
|
write_to_file(pivoted_data, args)
|
|
|
|
|
|
def parse_args():
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument(
|
|
"-i",
|
|
"--input",
|
|
help="Rocprofv3 Counter Collection input files and/or directories containing `*counter_collection.csv` files",
|
|
nargs="+",
|
|
default=[],
|
|
required=True,
|
|
)
|
|
parser.add_argument(
|
|
"-o",
|
|
"--output",
|
|
help="Rocprofv1 formatted output file name",
|
|
default=None,
|
|
type=str,
|
|
required=True,
|
|
)
|
|
parser.add_argument(
|
|
"-d",
|
|
"--debug",
|
|
help="Debug Logs",
|
|
action="store_const",
|
|
dest="loglevel",
|
|
const=logging.DEBUG,
|
|
default=logging.WARNING,
|
|
)
|
|
parser.add_argument(
|
|
"-v",
|
|
"--verbose",
|
|
help="Verbose Logs",
|
|
action="store_const",
|
|
dest="loglevel",
|
|
const=logging.INFO,
|
|
)
|
|
return parser.parse_args()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main(parse_args())
|