From cf9975f2174dccb97b7da991df270f4e8ff87675 Mon Sep 17 00:00:00 2001 From: Rachida Kebichi Date: Fri, 23 Oct 2020 18:27:57 -0400 Subject: [PATCH] SWDEV-255543 Hsa memcopy info csv gen Change-Id: I35ed7d613879343851007dce473784ab227cb3ed --- bin/mem_manager.py | 170 +++++++++++++++++++++++++++++++++++++++------ bin/tblextr.py | 14 ++-- 2 files changed, 157 insertions(+), 27 deletions(-) diff --git a/bin/mem_manager.py b/bin/mem_manager.py index 6f0e03dbee..5c15c9702d 100755 --- a/bin/mem_manager.py +++ b/bin/mem_manager.py @@ -40,24 +40,41 @@ DELIM = ',' # Mem copy manager class class MemManager: - def __init__(self, db): + def __init__(self, db, indir): self.db = db self.allocations = {} + self.hsa_agent_types = {} self.memcopies = {} + self.memcpy_index = {} self.filename = '' self.fd = 0 + self.parse_hsa_handles(indir + '/' + 'hsa_handles.txt'); def __del__(self): if self.fd != 0: self.fd.close() + # Parsing the mapping of HSA agent and memory pool handles + def parse_hsa_handles(self, infile): + inp = open(infile, 'r') + cpu_agent_ptrn = re.compile(r'(0x[0-9a-fA-F]+) agent cpu') + gpu_agent_ptrn = re.compile(r'(0x[0-9a-fA-F]+) agent gpu') + for line in inp.readlines(): + m_cpu = cpu_agent_ptrn.match(line) + if m_cpu: + self.hsa_agent_types[str(int(m_cpu.group(1),16))] = 0# "cpu" + m_gpu = gpu_agent_ptrn.match(line) + if m_gpu: + self.hsa_agent_types[str(int(m_gpu.group(1),16))] = 1 #"gpu" + inp.close() + # register alloc and memcpy API calls # ['BeginNs', 'EndNs', 'pid', 'tid', 'Name', 'args', 'Index', 'Data'], def register_api(self, rec_vals): res = '' record_name = rec_vals[4] # 'Name' record_args = rec_vals[5] # 'args' - malloc_ptrn = re.compile(r'hip.*Malloc') - mcopy_ptrn = re.compile(r'hipMemcpy') + malloc_ptrn = re.compile(r'hip.*Malloc|hsa_amd_memory_pool_allocate') + mcopy_ptrn = re.compile(r'hipMemcpy|hsa_amd_memory_async_copy') if malloc_ptrn.match(record_name): self.add_allocation(record_name, record_args) @@ -66,6 +83,44 @@ class MemManager: return res + + # register memcpy asynchronous copy + # ['BeginNs', 'EndNs', 'Name', 'pid', 'tid', 'Index', 'proc-id'], + def register_copy(self, rec_vals): + data = '' + event = rec_vals[2] # 'Name' + recordid = rec_vals[5] # 'Index' + procid = rec_vals[6] # 'proc-id' + size_ptrn = re.compile(DELIM + 'Size=(\d+)' + DELIM) + + # query syncronous memcopy API record + key = (recordid,procid,0) + if key in self.memcopies: + data = self.memcopies[key] + + # query asyncronous memcopy API record + key = (recordid,procid,1) + if key in self.memcopies: + if data != '': fatal('register_copy: corrupted record sync/async') + + async_copy_start_time = rec_vals[0] + async_copy_end_time = rec_vals[1] + + duration = int(async_copy_end_time) - int(async_copy_start_time) + size = 0 + m = size_ptrn.search(self.memcopies[key]) + if m: + size = m.group(1) + bandwidth = round(float(size) * 1000 / duration, 2) + + tid = rec_vals[4] + copy_line_header = str(async_copy_start_time) + DELIM + str(async_copy_end_time) + DELIM + str(procid) + DELIM + str(tid) + copy_line_footer = 'BW=' + str(bandwidth) + DELIM + 'Async=' + str(1) + data = copy_line_header + self.memcopies[key] + copy_line_footer + self.memcopies[key] = data + + return data + # register memcpy asynchronous activity # rec_vals: ['BeginNs', 'EndNs', 'dev-id', 'queue-id', 'Name', 'pid', 'tid', 'Index', 'proc-id', 'Data'], def register_activity(self, rec_vals): @@ -115,18 +170,27 @@ class MemManager: elif event == "hipMalloc3DArray": malloc_args_ptrn = re.compile(r'\(array\((.*)\) width\((.*)\) height\((.*)\) depth\((.*)\)\)') choice = 2 + elif event == "hsa_amd_memory_pool_allocate": + #({handle=25291264}, 40, 0, 0x7ffc4c7bf1b0) + malloc_args_ptrn = re.compile(r'\({handle=\d+}, (\d+), \d+, (0x[0-9a-fA-F]+)\)') + choice = 4 else: #(ptr(0x7f3407000000) size(800000000) flags(0)) malloc_args_ptrn = re.compile(r'\(ptr\((.*)\) size\((.*)\) .*\)') choice = 3 m = malloc_args_ptrn.match(args) if m: - ptr = int(m.group(1), 16) - if choice == 3: + if choice == 4: + ptr = int(m.group(2), 16) + size = int(m.group(1)) + elif choice == 3: + ptr = int(m.group(1), 16) size = int(m.group(2)) elif choice == 1: + ptr = int(m.group(1), 16) size = int(m.group(2)) * int(m.group(3)) else: + ptr = int(m.group(1), 16) size = int(m.group(2)) * int(m.group(3)) * int(m.group(4)) self.allocations[ptr] = (size, event) @@ -145,6 +209,13 @@ class MemManager: addr_type = 'pinned' elif event in ondevice: addr_type = 'device' + elif ptr in self.hsa_agent_types: + if self.hsa_agent_types[ptr] == 0: + addr_type = 'pinned' + elif self.hsa_agent_types[ptr] == 1: + addr_type = 'device' + else: + fatal('internal error: ptr(' + ptr + ') cannot be identified') else: fatal('internal error: ptr(' + ptr + ') cannot be identified') return addr_type @@ -160,24 +231,41 @@ class MemManager: pid = recvals[2] tid = recvals[3] - select_expr = '"Index" = ' + str(recordid) + ' AND "proc-id" = ' + str(procid) - # hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind) - hipMemcpy_ptrn = re.compile(r'\(\s*dst\((.*)\) src\((.*)\) sizeBytes\((\d+)\).*\)') + hip_memcpy_ptrn = re.compile(r'\(\s*dst\((.*)\) src\((.*)\) sizeBytes\((\d+)\).*\)') # hipMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, # size_t height, hipMemcpyKind kind); - hipMemcpy_ptrn2 = re.compile(r'\(\s*dst\((.*)\) .* src\((.*)\) .* width\((\d+)\) height\((\d+)\).*\)') + hip_memcpy_ptrn2 = re.compile(r'\(\s*dst\((.*)\) .* src\((.*)\) .* width\((\d+)\) height\((\d+)\).*\)') # hipMemcpyToArray(hipArray* dst, size_t wOffset, size_t hOffset, const void* src, # size_t count, hipMemcpyKind kind); - hipMemcpy_ptrn3 = re.compile(r'\(\s*dst\((.*)\) .* src\((.*)\) count\((\d+)\).*\)') + hip_memcpy_ptrn3 = re.compile(r'\(\s*dst\((.*)\) .* src\((.*)\) count\((\d+)\).*\)') # memcopy with kind argument - hipMemcpy_ptrn_kind = re.compile(r'.* kind\((\d+)\)\s*.*') + hip_memcpy_ptrn_kind = re.compile(r'.* kind\((\d+)\)\s*.*') + #hsa_amd_memory_async_copy(void* dst, hsa_agent_t dst_agent, const void* src, + # hsa_agent_t src_agent, size_t size, + # uint32_t num_dep_signals, + # const hsa_signal_t* dep_signals, + # hsa_signal_t completion_signal); + # "(0x7f8ab6600000, 27064880, 0x7f8b16000000, 27059968, 800000000, 0, 0, 140240759809536) = 0" + # hsa_memcpy_ptrn_prev used to support format transition and will be cleaned up later. + hsa_memcpy_ptrn_prev = re.compile(r'\((0x[0-9a-fA-F]+), (\d+), (0x[0-9a-fA-F]+), (\d+), (\d+), .*\) = \d') + # "(0x7fd83bc00000, {handle=16124864}, 0x7fd89b600000, {handle=16119808}, 800000000, 0, 0, {handle=140573877724672}) = 0" + hsa_memcpy_ptrn = re.compile(r'\((0x[0-9a-fA-F]+), {handle=(\d+)}, (0x[0-9a-fA-F]+), {handle=(\d+)}, (\d+), .*\) = \d') # aysnc memcopy async_event_ptrn = re.compile(r'Async|async') - m_basic = hipMemcpy_ptrn.match(args) - m_2d = hipMemcpy_ptrn2.match(args) - m_array = hipMemcpy_ptrn3.match(args) + m_basic_hip = hip_memcpy_ptrn.match(args) + m_basic_hsa_prev = hsa_memcpy_ptrn_prev.match(args) + m_basic_hsa = hsa_memcpy_ptrn.match(args) + is_hip = True if not (m_basic_hsa_prev or m_basic_hsa) else False + if not is_hip: + if procid in self.memcpy_index.keys(): + self.memcpy_index[procid] += 1 + else: + self.memcpy_index[procid] = 0 + recordid = self.memcpy_index[procid] + m_2d = hip_memcpy_ptrn2.match(args) + m_array = hip_memcpy_ptrn3.match(args) is_async = 1 if async_event_ptrn.search(event) else 0 async_copy_start_time = -1 @@ -192,7 +280,7 @@ class MemManager: bandwidth = 0 duration = 0 - switcher = { + kind_switcher = { '0': "HtoH", '1': "HtoD", '2': "DtoH", @@ -201,12 +289,30 @@ class MemManager: } condition_matched = False - if m_basic: - dstptr = m_basic.group(1) + if m_basic_hip: + dstptr = m_basic_hip.group(1) dstptr_type = self.get_ptr_type(dstptr) - srcptr = m_basic.group(2) + srcptr = m_basic_hip.group(2) srcptr_type = self.get_ptr_type(srcptr) - size = int(m_basic.group(3)) + size = int(m_basic_hip.group(3)) + condition_matched = True + if m_basic_hsa_prev: + dstptr = m_basic_hsa_prev.group(1) + dstptr_type = self.get_ptr_type(dstptr) + dst_agent_ptr = m_basic_hsa_prev.group(2) + srcptr = m_basic_hsa_prev.group(3) + srcptr_type = self.get_ptr_type(srcptr) + src_agent_ptr = m_basic_hsa_prev.group(4) + size = int(m_basic_hsa_prev.group(5)) + condition_matched = True + if m_basic_hsa: + dstptr = m_basic_hsa.group(1) + dstptr_type = self.get_ptr_type(dstptr) + dst_agent_ptr = m_basic_hsa.group(2) + srcptr = m_basic_hsa.group(3) + srcptr_type = self.get_ptr_type(srcptr) + src_agent_ptr = m_basic_hsa.group(4) + size = int(m_basic_hsa.group(5)) condition_matched = True if m_array: dstptr = m_array.group(1) @@ -231,9 +337,29 @@ class MemManager: duration = (int(end_time) - int(start_time)) bandwidth = round(float(size) * 1000 / duration, 2) - m = hipMemcpy_ptrn_kind.match(args) - if m: - direction = switcher.get(m.group(1), "unknown") + + evt_switcher = { + 'hipMemcpyDtoD': "DtoD", + 'hipMemcpyDtoDAsync': "DtoD", + 'hipMemcpyDtoH': "DtoH", + 'hipMemcpyDtoHAsync': "DtoH", + 'hipMemcpyHtoD': "HtoD", + 'hipMemcpyHtoDAsync': "HtoD", + } + + if is_hip: + m = hip_memcpy_ptrn_kind.match(args) + if m: + direction = kind_switcher.get(m.group(1), "unknown") + else: + direction = evt_switcher.get(event, "unknown") + else: + if dst_agent_ptr in self.hsa_agent_types and src_agent_ptr in self.hsa_agent_types: + if self.hsa_agent_types[src_agent_ptr] == 1: direction = 'D' + elif self.hsa_agent_types[src_agent_ptr] == 0: direction = 'H' + if direction != 'unknown': direction += 'to' + if self.hsa_agent_types[dst_agent_ptr] == 1: direction += 'D' + elif self.hsa_agent_types[dst_agent_ptr] == 0: direction += 'H' copy_line_header = '' copy_line_footer = '' diff --git a/bin/tblextr.py b/bin/tblextr.py index d8b749b2b6..a4308d0def 100755 --- a/bin/tblextr.py +++ b/bin/tblextr.py @@ -343,6 +343,7 @@ api_table_descr = [ def fill_api_db(table_name, db, indir, api_name, api_pid, dep_pid, dep_list, dep_filtr, expl_id): global hsa_activity_found global memory_manager + copy_raws = [] if (hsa_activity_found): copy_raws = db.table_get_raws('COPY') copy_csv = '' @@ -490,7 +491,7 @@ def fill_api_db(table_name, db, indir, api_name, api_pid, dep_pid, dep_list, dep else: activity_record_patching(db, ops_table_name, kernel_found, kernel_str, stream_found, stream_id, select_expr) - api_data = memory_manager.register_api(rec_vals) if mcopy_data_enabled and api_name == 'hip' else '' + api_data = memory_manager.register_api(rec_vals) if mcopy_data_enabled else '' rec_vals.append(api_data) rec_vals[2] = api_pid @@ -515,8 +516,8 @@ def fill_api_db(table_name, db, indir, api_name, api_pid, dep_pid, dep_list, dep # fill COPY DB copy_table_descr = [ - ['BeginNs', 'EndNs', 'Name', 'pid', 'tid', 'Index', 'proc-id'], - {'Index':'INTEGER', 'proc-id':'INTEGER', 'Name':'TEXT', 'args':'TEXT', 'BeginNs':'INTEGER', 'EndNs':'INTEGER', 'pid':'INTEGER', 'tid':'INTEGER'} + ['BeginNs', 'EndNs', 'Name', 'pid', 'tid', 'Index', 'proc-id', 'Data'], + {'Index':'INTEGER', 'proc-id':'INTEGER', 'Name':'TEXT', 'args':'TEXT', 'BeginNs':'INTEGER', 'EndNs':'INTEGER', 'pid':'INTEGER', 'tid':'INTEGER', 'Data':'TEXT'} ] def fill_copy_db(table_name, db, indir): pid = COPY_PID @@ -544,6 +545,9 @@ def fill_copy_db(table_name, db, indir): rec_vals.append(corr_id) rec_vals.append(proc_id) + # registering memcopy information + activity_data = memory_manager.register_copy(rec_vals) if mcopy_data_enabled else '' + rec_vals.append(activity_data) db.insert_entry(table_handle, rec_vals) # filling dependencies @@ -708,14 +712,14 @@ else: with open(dbfile, mode='w') as fd: fd.truncate() db = SQLiteDB(dbfile) - memory_manager = MemManager(db) + memory_manager = MemManager(db, indir) ext_trace_found = fill_ext_db('rocTX', db, indir, 'roctx', EXT_PID) kfd_trace_found = fill_api_db('KFD', db, indir, 'kfd', KFD_PID, NONE_PID, [], {}, 0) - hsa_activity_found = fill_copy_db('COPY', db, indir) hsa_trace_found = fill_api_db('HSA', db, indir, 'hsa', HSA_PID, COPY_PID, kern_dep_list, {}, 0) + hsa_activity_found = fill_copy_db('COPY', db, indir) hip_trace_found = fill_api_db('HIP', db, indir, 'hip', HIP_PID, OPS_PID, [], {}, 1) ops_filtr = fill_ops_db('OPS', 'COPY', db, indir)