adding rocprof and pytorch parser scripts (#1214)

* adding rocprof parser script

* adding the support for multiple json files

* adding pytorch profiler script

* remove filtering from pytorch log

* adding the addressing the comments and add the feature to parse all kernels

* completing the report for torch profiler

---------

Co-authored-by: Marzieh Berenjkoub <mberenjk@amd.com>
Этот коммит содержится в:
mberenjk
2024-07-19 14:51:28 -05:00
коммит произвёл GitHub
родитель 6f331b0d43
Коммит 519843d2cf
2 изменённых файлов: 186 добавлений и 0 удалений
+118
Просмотреть файл
@@ -0,0 +1,118 @@
import json
import argparse
import os
import csv
import sys
@staticmethod
def get_num_gpu():
return 8
def load_json_files(directory):
json_data = {}
for root, _, files in os.walk(directory):
for file in files:
if file.endswith('json'):
with open(directory+file, 'r') as f:
data = json.load(f)
json_data.setdefault('traceEvents',[]).append(data['traceEvents'])
return json_data
def parse(json_file_path, output_file_name, function_name):
data_all = load_json_files(json_file_path)
data = data_all['traceEvents']
kernels = []
found = False
for entries in data:
for entry in entries:
if 'name'in entry and 'cat' in entry and (entry['cat'] == 'kernel' ):
if function_name == 'all':
kernels.append(entry)
found = True
elif function_name in entry['name']:
kernels.append(entry)
found = True
if not found:
print('There is no ' + function_name +' in this log')
return
sorted_kernels = sorted(kernels, key=lambda x: ( x['ts'], x['pid']))
csv_file_name = output_file_name + '.csv'
json_file_out = output_file_name + '.json'
json_data_out = {}
json_data_out.setdefault('traceEvents',[]).append({})
with open(csv_file_name, 'w', newline='') as csvfile:
fieldnames = ['pid', 'dur', 'ts', 'min_dur', 'max_dur', 'min_start', 'max_start', 'latency_before_first_gpu', 'max_dur - min_dur', 'duration_from_last_arrival', 'first_gpu', 'last_gpu', 'shortest_gpu', 'longest_gpu']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
i = 1
min_dur = sys.float_info.max
max_dur = sys.float_info.min
min_start = sys.float_info.max
max_start = sys.float_info.min
first_gpu = 0
last_gpu = 0
longest_gpu = 0
shortest_gpu = 0
for entry in sorted_kernels:
record = {'pid': entry['pid'], 'dur': entry['dur'],
'ts': entry['ts']}
json_data_out.setdefault('traceEvents',[]).append(entry)
if entry['dur'] < min_dur :
min_dur = min(min_dur, entry['dur'])
shortest_gpu = entry['pid']
if entry['dur'] > max_dur :
max_dur = max(max_dur, entry['dur'])
longest_gpu = entry['pid']
if entry['ts'] < min_start :
min_start = min(min_start, entry['ts'])
first_gpu = entry['pid']
if entry['ts'] > max_start:
max_start = max(max_start, entry['ts'])
duration_from_last_arrival = entry['dur']
last_gpu = entry['pid']
writer.writerow(record)
if (i) % get_num_gpu() == 0:
record = {'min_dur': min_dur, 'max_dur':max_dur, 'min_start':min_start, 'max_start':max_start,'latency_before_first_gpu':max_start-min_start, 'max_dur - min_dur':max_dur-min_dur , 'duration_from_last_arrival':duration_from_last_arrival , 'first_gpu': first_gpu, 'last_gpu':last_gpu, 'shortest_gpu':shortest_gpu, 'longest_gpu':longest_gpu}
writer.writerow(record)
csvfile.write('\n')
min_dur = sys.float_info.max
max_dur = sys.float_info.min
min_start = sys.float_info.max
max_start = sys.float_info.min
first_gpu = 0
last_gpu = 0
longest_gpu = 0
shortest_gpu = 0
i = 0
i = i + 1
with open(json_file_out, 'w') as jsonfileout:
json.dump(json_data_out, jsonfileout, indent=4)
print(f"Data successfully written to {csv_file_name} and {json_file_out}.")
def main():
parser = argparse.ArgumentParser(description='Json file and the function to parse.')
parser.add_argument('json_file_path', metavar='file_path', type=str, help='Path to the JSON file to process')
parser.add_argument('output_file_name', type=str, help='Output File Name')
parser.add_argument('function_name', type=str, help='Kernel Function Name, e.g., oneShotAllReduce, ncclDevKernel_Generic, mscclKernel')
args = parser.parse_args()
parse(args.json_file_path, args.output_file_name, args.function_name)
if __name__ == '__main__':
main()
+68
Просмотреть файл
@@ -0,0 +1,68 @@
import json
import argparse
import os
import csv
def parse(json_file, function_name, output_file_name):
with open(json_file, 'r') as f:
data = json.load(f)
kernels=[]
found = False
for entry in data["traceEvents"]:
if 'name'in entry and (entry['name'] == 'hipExtLaunchKernel' or entry['name'] == 'hipLaunchKernel'):
if function_name == 'all':
found = True
kernels.append(entry)
elif function_name in entry['args']['args'] or function_name in entry['name']:
kernels.append(entry)
found = True
if not found:
print('There is no ' + function_name +' in this log')
return
sorted_kernels = sorted(kernels, key=lambda x: (x['args']['BeginNs'], x['args']['pid']))
csv_file_name = output_file_name + '.csv'
json_file_out = output_file_name + '.json'
json_data_out = {}
json_data_out.setdefault('traceEvents',[]).append({})
with open(csv_file_name, 'w', newline='') as csvfile:
fieldnames = ['pid', 'BeginNs', 'dur', 'ts']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
i = 1
for entry in sorted_kernels:
record = {'pid': entry['args']['pid'], 'BeginNs': entry['args']['BeginNs'], 'dur': entry['dur'],
'ts': entry['ts']}
json_data_out.setdefault('traceEvents',[]).append(entry)
writer.writerow(record)
if (i) % 8 == 0:
csvfile.write('\n')
i = 0
i = i + 1
with open(json_file_out, 'w') as jsonfileout:
json.dump(json_data_out, jsonfileout, indent=4)
print(f"Data successfully written to {csv_file_name} and {json_file_out}.")
def main():
parser = argparse.ArgumentParser(description='Json file and the function to parse.')
#parser.add_argument('json_files_path', type=argparse.FileType('r'), help='JSON file to load!')
parser.add_argument('json_file_path', metavar='file_path', type=str, help='Path to the JSON file to process')
parser.add_argument('function_name', type=str, help='Kernel Function Name, e.g., gatherTopK, ncclDevKernel_Generic, mscclKernel')
parser.add_argument('output_file_name', type=str, help='Output File Name')
args = parser.parse_args()
parse(args.json_file_path, args.function_name, args.output_file_name)
if __name__ == '__main__':
main()