diff --git a/plugin/att/att.py b/plugin/att/att.py index ac6f87ed31..b4526dda1f 100755 --- a/plugin/att/att.py +++ b/plugin/att/att.py @@ -17,6 +17,8 @@ import numpy as np import matplotlib.pyplot as plt import json +COUNTERS_MAX_CAPTURES = 1<<12 + class PerfEvent(ctypes.Structure): _fields_ = [ ('time', c_uint64), @@ -46,7 +48,7 @@ class KvPair(ctypes.Structure): """ Matches pair = (key, value) on the python side """ _fields_ = [('key', ctypes.c_int), ('value', ctypes.c_int)] - + class ReturnAssemblyInfo(ctypes.Structure): """ Matches ReturnAssemblyInfo on the python side """ @@ -303,38 +305,43 @@ def draw_wave_metrics(selections, normalize): plt.figure(figsize=(15,3)) - delta_time = max(1,int(0.5+np.min([get_delta_time(events) for events in EVENTS]))) - maxtime = np.max([np.max([e.time for e in events]) for events in EVENTS])+1 + delta_step = 8 + quad_delta_time = max(delta_step,int(0.5+np.min([get_delta_time(events) for events in EVENTS]))) + maxtime = np.max([np.max([e.time for e in events]) for events in EVENTS])/quad_delta_time+1 + + if maxtime*delta_step >= COUNTERS_MAX_CAPTURES: + delta_step = 1 + while maxtime >= COUNTERS_MAX_CAPTURES: + quad_delta_time *= 2 + maxtime /= 2 + + maxtime = int(min(maxtime*delta_step, COUNTERS_MAX_CAPTURES)) event_timeline = np.zeros((16, maxtime), dtype=np.int32) - print('Delta:', delta_time) - print('Max_cycles:', maxtime) + print('Delta:', quad_delta_time) + print('Max_cycles:', maxtime*quad_delta_time*4//delta_step) - kernsize = 2*(delta_time//14)+1 - trim = max(maxtime//5000,1) - cycles = 4*np.arange(maxtime)[::trim] - - kernel = np.asarray([np.exp(-abs(k/kernsize)**2) for k in range(-kernsize*3,kernsize*3+1)]) - kernel /= np.sum(kernel)*len(EVENTS)*delta_time + cycles = 4*quad_delta_time//delta_step*np.arange(maxtime) + kernel = len(EVENTS)*quad_delta_time for events in EVENTS: for e in range(len(events)-1): bk = events[e].bank*4 - start = events[e].time - end = start+delta_time + start = events[e].time // (quad_delta_time//delta_step) + end = start+delta_step event_timeline[bk:bk+4, start:end] += np.asarray(events[e].toTuple()[1:5])[:, None] start = events[-1].time - event_timeline[bk:bk+4, start:start+delta_time] += \ + event_timeline[bk:bk+4, start:start+delta_step] += \ np.asarray(events[-1].toTuple()[1:5])[:, None] - - event_timeline = [np.convolve(e, kernel)[3*kernsize:-3*kernsize] for e in event_timeline] + event_timeline = [np.convolve(e, [kernel for k in range(3)])[1:-1] for e in event_timeline] + #event_timeline = [e/kernel for e in event_timeline] if normalize: event_timeline = [100*e/max(e.max(), 1E-5) for e in event_timeline] - + colors = ['blue', 'green', 'gray', 'red', 'orange', 'cyan', 'black', 'darkviolet', 'yellow', 'darkred', 'pink', 'lime', 'gold', 'tan', 'aqua', 'olive'] - [plt.plot(cycles, e[::trim], '-', label=n, color=c) + [plt.plot(cycles, e, '-', label=n, color=c) for e, n, c, sel in zip(event_timeline, EVENT_NAMES, colors, selections) if sel] plt.legend() diff --git a/src/core/hsa/queues/queue.cpp b/src/core/hsa/queues/queue.cpp index 5dd01c4ad4..8452a7afce 100644 --- a/src/core/hsa/queues/queue.cpp +++ b/src/core/hsa/queues/queue.cpp @@ -841,13 +841,61 @@ void WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t user_pkt } /* Write the transformed packets to the hardware queue. */ writer(&transformed_packets[0], transformed_packets.size()); - } else if (session_id.handle > 0 && pkt_count > 0 && is_att_collection_mode && session) { + } else if (session_id.handle > 0 && pkt_count > 0 && + is_att_collection_mode && session && + KernelInterceptCount < MAX_ATT_PROFILES + ) { // att start // Getting Queue Data and Information auto& queue_info = *static_cast(data); std::lock_guard lk(queue_info.qw_mutex); Agent::AgentInfo* agentInfo = &(hsa_support::GetAgentInfo(queue_info.GetGPUAgent().handle)); + bool can_profile_anypacket = false; + std::vector can_profile_packet; + + for (size_t i = 0; i < pkt_count; ++i) { + auto& original_packet = static_cast(packets)[i]; + bool b_profile_this_object = false; + + // Skip packets other than kernel dispatch packets. + if (bit_extract(original_packet.header, HSA_PACKET_HEADER_TYPE, + HSA_PACKET_HEADER_TYPE + HSA_PACKET_HEADER_WIDTH_TYPE - 1) == + HSA_PACKET_TYPE_KERNEL_DISPATCH) { + + auto& kdispatch = static_cast(packets)[i]; + uint64_t kernel_object = kdispatch.kernel_object; + + // Try to match the mangled kernel name with given matches in input.txt + try { + std::lock_guard lock(ksymbol_map_lock); + assert(ksymbols); + const std::string& kernel_name = ksymbols->at(kernel_object); + + // We want to initiate att profiling only if a match exists + for(const std::string& kernel_matches : kernel_profile_names) { + if (kernel_name.find(kernel_matches) != std::string::npos) { + b_profile_this_object = true; + break; + } + } + if (!b_profile_this_object) printf("Skipping: %s\n", kernel_name.c_str()); + } catch (...) { + printf("Warning: Unknown name for object %lu\n", kernel_object); + } + } + + if (b_profile_this_object) + can_profile_anypacket = true; + can_profile_packet.push_back(b_profile_this_object); + } + + if (!can_profile_anypacket) { + /* Write the original packets to the hardware if no patch will be profiled */ + writer(packets, pkt_count); + return; + } + // Preparing att Packets Packet::packet_t start_packet{}; Packet::packet_t stop_packet{}; @@ -902,44 +950,12 @@ void WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t user_pkt att_params, &start_packet, &stop_packet); } - // Searching across all the packets given during this write for (size_t i = 0; i < pkt_count; ++i) { auto& original_packet = static_cast(packets)[i]; - // Skip packets other than kernel dispatch packets. - if (bit_extract(original_packet.header, HSA_PACKET_HEADER_TYPE, - HSA_PACKET_HEADER_TYPE + HSA_PACKET_HEADER_WIDTH_TYPE - 1) != - HSA_PACKET_TYPE_KERNEL_DISPATCH) { - transformed_packets.emplace_back(packets_arr[i]); - continue; - } - - auto& kdispatch = static_cast(packets)[i]; - uint64_t kernel_object = kdispatch.kernel_object; - bool b_profile_this_object = false; - - // Try to match the mangled kernel name with given matches in input.txt - try { - std::lock_guard lock(ksymbol_map_lock); - assert(ksymbols); - const std::string& kernel_name = ksymbols->at(kernel_object); - - // We want to initiate att profiling only if a match exists - for(const std::string& kernel_matches : kernel_profile_names) { - if (kernel_name.find(kernel_matches) != std::string::npos) { - b_profile_this_object = true; - break; - } - } - if (!b_profile_this_object) printf("Skipping: %s\n", kernel_name.c_str()); - } catch (...) { - printf("Warning: Unknown name for object %lu\n", kernel_object); - } - - // If no match was found or intercept count > maximum desired profiles, skip this kernel. - if (!b_profile_this_object || KernelInterceptCount >= MAX_ATT_PROFILES) { - printf("Skipping: %lu\n", kernel_object); + // Skip all packets marked with !can_profile + if (i >= can_profile_packet.size() || can_profile_packet[i] == false) { transformed_packets.emplace_back(packets_arr[i]); continue; }