From fd64cb242db50a449bc0bc0a6a834299cbb2b28c Mon Sep 17 00:00:00 2001 From: akolliasAMD <99202231+akolliasAMD@users.noreply.github.com> Date: Tue, 30 May 2023 12:57:25 -0600 Subject: [PATCH] added time results on npkit generator (#749) [ROCm/rccl commit: 2b1efa9e9abafc3fd6c03ba19b5500a32fdb11a1] --- .../tools/scripts/npkit_trace_generator.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/projects/rccl/tools/scripts/npkit_trace_generator.py b/projects/rccl/tools/scripts/npkit_trace_generator.py index 9b12640e75..7cfc021c27 100644 --- a/projects/rccl/tools/scripts/npkit_trace_generator.py +++ b/projects/rccl/tools/scripts/npkit_trace_generator.py @@ -60,7 +60,7 @@ def parse_cpu_event(event_bytes): 'timestamp': int.from_bytes(event_bytes[8:16], byteorder='little', signed=False) } -def parse_gpu_event_file(npkit_dump_dir, npkit_event_def, rank, buf_idx, gpu_clock_scale, cpu_clock_scale, gpu_time_cpu, gpu_time_gpu, dictionary_of_stats): +def parse_gpu_event_file(npkit_dump_dir, npkit_event_def, rank, buf_idx, gpu_clock_scale, cpu_clock_scale, gpu_time_cpu, gpu_time_gpu, dictionary_of_stats, warmup_runs=5): gpu_event_file_path = os.path.join(npkit_dump_dir, 'gpu_events_rank_%d_buf_%d' % (rank, buf_idx)) stats_key = 'gpu_rank_%d' % (rank) channel_stats = {} @@ -71,7 +71,6 @@ def parse_gpu_event_file(npkit_dump_dir, npkit_event_def, rank, buf_idx, gpu_clo event_type_to_seq = {} unfiltered_events = [] start_event_id = 0 - warmup_runs = 5 with open(gpu_event_file_path, 'rb') as f: raw_content = f.read() raw_content_size = len(raw_content) @@ -127,24 +126,32 @@ def parse_gpu_event_file(npkit_dump_dir, npkit_event_def, rank, buf_idx, gpu_clo break delta_time = max(0.001, gpu_events[-1]['ts'] - event_start_ts) # delta needs to take the last begin bandwidth = gpu_events[-1]['args']['size'] / delta_time / 1e3 - if (current_id,parsed_gpu_event['size']) in channel_stats: temp_size = channel_stats[(current_id,parsed_gpu_event['size'])][1]+1 temp = channel_stats[(current_id,parsed_gpu_event['size'])][0] * (temp_size - 1 )/ (temp_size) + temp_delta = channel_stats[(current_id,parsed_gpu_event['size'])][2] * (temp_size - 1 )/ (temp_size) channel_stats[(current_id,parsed_gpu_event['size'])][0] = bandwidth / (temp_size) + temp channel_stats[(current_id,parsed_gpu_event['size'])][1] = temp_size + channel_stats[(current_id,parsed_gpu_event['size'])][2] = delta_time / (temp_size) + temp_delta else: - channel_stats[(current_id,parsed_gpu_event['size'])] = [bandwidth, 1] + channel_stats[(current_id,parsed_gpu_event['size'])] = [bandwidth, 1, delta_time] gpu_events[-1]['args']['bw (GB/s)'] = bandwidth raw_content_idx += raw_event_size - # breakpoint() aggragate + for key in channel_stats: if key in dictionary_of_stats[stats_key]: dictionary_of_stats[stats_key][key][0] += channel_stats[key][0] + old_size = dictionary_of_stats[stats_key][key][1] + new_size = channel_stats[key][1] dictionary_of_stats[stats_key][key][1] += channel_stats[key][1] + cur_size = dictionary_of_stats[stats_key][key][1] + new_avg_time = (dictionary_of_stats[stats_key][key][2]*(old_size)+(new_size)*channel_stats[key][2])/(cur_size) + dictionary_of_stats[stats_key][key][2] = new_avg_time else: dictionary_of_stats[stats_key][key] = channel_stats[key] + + breakpoint() return gpu_events def parse_cpu_event_file(npkit_dump_dir, npkit_event_def, rank, channel, cpu_clock_scale, cpu_time_global, cpu_time_local): @@ -261,7 +268,7 @@ def convert_npkit_dump_to_trace(npkit_dump_dir, output_dir, npkit_event_def, gpu for key in dictionary_of_stats: f.write(key + "\n") for event,size in dictionary_of_stats[key]: - f.write(npkit_event_def['id_to_type'][event] + "\t"+ "size:" + str(size) + "\t" + str(dictionary_of_stats[key][event,size][0]) + "\n") + f.write(npkit_event_def['id_to_type'][event] + "\t"+ "size:" + str(size) + " time: " +str(dictionary_of_stats[key][event,size][2]) + " \t bw: " + str(dictionary_of_stats[key][event,size][0]) + "\n") else: with open(os.path.join(output_dir, 'npkit_event_trace.json'), 'w') as f: json.dump(trace, f)