From 769f6303d8d51ac34160cf8f5b3aa72b483a2bc1 Mon Sep 17 00:00:00 2001 From: Giovanni LB Date: Sat, 7 Oct 2023 16:34:19 -0300 Subject: [PATCH] SWDEV-423947: Added ATT header option Change-Id: I6e9ad213b578d752c865c9f9af4ee0b79d447e1b [ROCm/rocprofiler commit: e0962d9d634b3e37777ae0ce1de5f12efba2d723] --- projects/rocprofiler/bin/att_to_out.py | 3 +- projects/rocprofiler/plugin/att/att.cpp | 26 +++++---- projects/rocprofiler/plugin/att/att.py | 38 ++++++------ projects/rocprofiler/plugin/att/stitch.py | 3 - .../rocprofiler/src/core/hsa/queues/queue.cpp | 10 ++-- .../rocprofiler/src/core/session/att/att.h | 13 +++++ projects/rocprofiler/src/tools/tool.cpp | 58 +++++++++++++------ 7 files changed, 96 insertions(+), 55 deletions(-) diff --git a/projects/rocprofiler/bin/att_to_out.py b/projects/rocprofiler/bin/att_to_out.py index 84b457593a..6bfcd7fdf7 100755 --- a/projects/rocprofiler/bin/att_to_out.py +++ b/projects/rocprofiler/bin/att_to_out.py @@ -38,7 +38,8 @@ in_filename = sys.argv[1] out_filename = in_filename.split(".att")[0] + ".out" in_bytes = np.fromfile(in_filename, dtype=np.uint16) -out_bytes = [map16(c) + "\n" for c in in_bytes] +offset = 4 if in_bytes[0] >= 0xC000 else 0 +out_bytes = [map16(c) + "\n" for c in in_bytes[offset:]] with open(out_filename, "w") as f: [f.write(b) for b in out_bytes] diff --git a/projects/rocprofiler/plugin/att/att.cpp b/projects/rocprofiler/plugin/att/att.cpp index caaac5ccd4..4275f8a81a 100644 --- a/projects/rocprofiler/plugin/att/att.cpp +++ b/projects/rocprofiler/plugin/att/att.cpp @@ -45,30 +45,33 @@ #include "rocprofiler_plugin.h" #include "../utils.h" #include "code_printing.hpp" - +#include "../../src/core/session/att/att.h" #define ATT_FILENAME_MAXBYTES 90 #define TEST_INVALID_KERNEL size_t(-1) namespace { - class att_plugin_t { public: - att_plugin_t() { + att_plugin_t(void* data) { std::vector mpivars = {"MPI_RANK", "OMPI_COMM_WORLD_RANK", "MV2_COMM_WORLD_RANK"}; for (const char* envvar : mpivars) - if (const char* env = getenv(envvar)) { - MPI_RANK = atoi(env); - MPI_ENABLE = true; - break; - } + if (const char* env = getenv(envvar)) { + MPI_RANK = atoi(env); + MPI_ENABLE = true; + break; + } + + header.raw = reinterpret_cast(data); + header.reserved = 0x11; } bool MPI_ENABLE = false; int MPI_RANK = 0; std::mutex writing_lock; bool is_valid_{true}; + rocprofiler::att_header_packet_t header{.raw = 0}; inline bool att_file_exists(const std::string& name) { struct stat buffer; @@ -130,12 +133,14 @@ class att_plugin_t { << '\n'; // iterate over each shader engine att trace + header.navi = !att_tracer_record->intercept_list.userdata; int se_num = att_tracer_record->shader_engine_data_count; for (int i = 0; i < se_num; i++) { if (!att_tracer_record->shader_engine_data || !att_tracer_record->shader_engine_data[i].buffer_ptr) continue; printf("--------------collecting data for shader_engine %d---------------\n", i); + header.SEID = i; rocprofiler_record_se_att_data_t* se_att_trace = &att_tracer_record->shader_engine_data[i]; char* data_buffer_ptr = reinterpret_cast(se_att_trace->buffer_ptr); @@ -145,6 +150,8 @@ class att_plugin_t { std::cerr << "ATT Failed to open file: " << outfilepath << "_se" << i << ".att\n"; return ROCPROFILER_STATUS_ERROR; } + if (header.enable) + out.write((const char*)&header, sizeof(header.raw)); out.write(data_buffer_ptr, se_att_trace->buffer_size); } @@ -153,7 +160,6 @@ class att_plugin_t { std::cerr << "Could not open ISA file: " << outfilepath << "_isa.s" << std::endl; return ROCPROFILER_STATUS_ERROR; } - uint64_t kernel_begin_addr = att_tracer_record->intercept_list.userdata; isafile << " " << kernel_name_mangled << '\n'; for (size_t i = 0; i < att_tracer_record->intercept_list.count; i++) { @@ -231,7 +237,7 @@ ROCPROFILER_EXPORT int rocprofiler_plugin_initialize(uint32_t rocprofiler_major_ if (att_plugin != nullptr) return ROCPROFILER_STATUS_ERROR; - att_plugin = new att_plugin_t(); + att_plugin = new att_plugin_t(data); if (att_plugin->IsValid()) return ROCPROFILER_STATUS_SUCCESS; // The plugin failed to initialied, destroy it and return an error. diff --git a/projects/rocprofiler/plugin/att/att.py b/projects/rocprofiler/plugin/att/att.py index 62019940fb..43f3531397 100755 --- a/projects/rocprofiler/plugin/att/att.py +++ b/projects/rocprofiler/plugin/att/att.py @@ -172,7 +172,7 @@ path_to_parser = os.path.abspath(rocprofv2_att_lib) SO = CDLL(path_to_parser) SO.AnalyseBinary.restype = ReturnInfo -SO.AnalyseBinary.argtypes = [ctypes.c_char_p, ctypes.c_int, ctypes.c_bool] +SO.AnalyseBinary.argtypes = [ctypes.c_char_p] SO.wrapped_parse_binary.argtypes = [ctypes.c_char_p, ctypes.c_char_p] SO.wrapped_parse_binary.restype = ReturnAssemblyInfo SO.FreeBinary.argtypes = [ctypes.c_uint64] @@ -217,9 +217,9 @@ def parse_binary(filename, kernel=None): return code, jumps, kernel_addr -def getWaves_binary(name, target_cu): +def getWaves_binary(name): filename = os.path.abspath(str(name)) - info = SO.AnalyseBinary(filename.encode("utf-8"), target_cu, False) + info = SO.AnalyseBinary(filename.encode("utf-8")) isValid = info.flags & 0x1 if isValid == 0: @@ -267,7 +267,10 @@ def persist(trace_file, SIMD, traces): smem_ins, smem_stalls, br_ins, br_taken_ins, br_stalls = [], [], [], [], [] for wave in SIMD: - if wave.instructions is None or traces[wave.traceid].instructions is None: + try: + if wave.instructions is None or traces[wave.traceid].instructions is None: + continue + except: continue simds.append(wave.simd) waves.append(wave.wave_id) @@ -456,21 +459,18 @@ if __name__ == "__main__": print("Skipping analysis.") quit() - with open(os.getenv("COUNTERS_PATH"), "r") as f: - lines = [l.split("//")[0] for l in f.readlines()] + if os.getenv("COUNTERS_PATH"): + with open(os.getenv("COUNTERS_PATH"), "r") as f: + lines = [l.split("//")[0] for l in f.readlines()] - EVENT_NAMES = [] - clean = lambda x: x.split("=")[1].split(" ")[0].split("\n")[0] - for line in lines: - if "PERFCOUNTER_ID=" in line: - EVENT_NAMES += ["id: " + clean(line)] - elif "att: TARGET_CU" in line: - args.target_cu = int(clean(line)) - for line in lines: - if "PERFCOUNTER=" in line: - EVENT_NAMES += [clean(line).split("SQ_")[1].lower()] - if args.target_cu is None: - args.target_cu = 1 + EVENT_NAMES = [] + clean = lambda x: x.split("=")[1].split(" ")[0].split("\n")[0] + for line in lines: + if "PERFCOUNTER_ID=" in line: + EVENT_NAMES += ["id: " + clean(line)] + for line in lines: + if "PERFCOUNTER=" in line: + EVENT_NAMES += [clean(line).split("SQ_")[1].lower()] att_kernel_list = glob.glob(args.att_kernel) @@ -520,7 +520,7 @@ if __name__ == "__main__": gc.collect() for name in filenames: - traces, waves, perfevents, occupancy, gfxv, addrs = getWaves_binary(name, args.target_cu) + traces, waves, perfevents, occupancy, gfxv, addrs = getWaves_binary(name) if gfxv is None: continue diff --git a/projects/rocprofiler/plugin/att/stitch.py b/projects/rocprofiler/plugin/att/stitch.py index fc418fe0d5..74efe39a26 100644 --- a/projects/rocprofiler/plugin/att/stitch.py +++ b/projects/rocprofiler/plugin/att/stitch.py @@ -258,7 +258,6 @@ def stitch(insts, raw_code, jumps, gfxv, bIsAuto): while i < N: if insts[i].type == PCINFO: i += 1 - N -= 1 continue #print(line, i, WaveInstCategory[insts[i].type], insts[i].num_waves, insts[i].cycles) @@ -297,7 +296,6 @@ def stitch(insts, raw_code, jumps, gfxv, bIsAuto): pcskip.append(i) matched = next >= 0 i += 1 - N -= 1 elif as_line[1] == SWAPPC: next = watchlist.swappc(as_line[0], line, i) matched = inst.type in [SALU, JUMP] @@ -305,7 +303,6 @@ def stitch(insts, raw_code, jumps, gfxv, bIsAuto): pcskip.append(i) matched = next >= 0 i += 1 - N -= 1 elif inst.type == as_line[1]: if line in jumps: loopCount[jumps[line] - 1] += 1 diff --git a/projects/rocprofiler/src/core/hsa/queues/queue.cpp b/projects/rocprofiler/src/core/hsa/queues/queue.cpp index ff3e152fc3..08d9451d24 100644 --- a/projects/rocprofiler/src/core/hsa/queues/queue.cpp +++ b/projects/rocprofiler/src/core/hsa/queues/queue.cpp @@ -171,7 +171,6 @@ static const kernel_descriptor_t* GetKernelCode(uint64_t kernel_object) { static uint32_t arch_vgpr_count(const std::string_view& name, const kernel_descriptor_t& kernel_code) { - std::string info_name(name.data(), name.size()); if (strcmp(name.data(), "gfx90a") == 0 || strncmp(name.data(), "gfx94", 5) == 0) return (AMD_HSA_BITS_GET(kernel_code.compute_pgm_rsrc3, AMD_COMPUTE_PGM_RSRC_THREE_ACCUM_OFFSET) + @@ -1127,9 +1126,12 @@ void Queue::WriteInterceptor(const void* packets, uint64_t pkt_count, uint64_t u dispatch_packet.completion_signal, session_id_snapshot, buffer_id, profile, kernel_properties, (uint32_t)syscall(__NR_gettid), user_pkt_index); - uint64_t off = dispatch_packet.kernel_object + - GetKernelCode(dispatch_packet.kernel_object)->kernel_code_entry_byte_offset; - codeobj_record::make_capture(rocprofiler_record_id_t{record_id}, capture_mode, off); + uint64_t userdata = HSASupport_Singleton::GetInstance() + .GetHSAAgentInfo(queue_info.GetGPUAgent().handle) + .GetDeviceInfo() + .getName() + .find("gfx9") != std::string::npos; + codeobj_record::make_capture(rocprofiler_record_id_t{record_id}, capture_mode, userdata); codeobj_record::start_capture(rocprofiler_record_id_t{record_id}); codeobj_record::stop_capture(rocprofiler_record_id_t{record_id}); diff --git a/projects/rocprofiler/src/core/session/att/att.h b/projects/rocprofiler/src/core/session/att/att.h index 337c841c3f..f0a44ca7ea 100644 --- a/projects/rocprofiler/src/core/session/att/att.h +++ b/projects/rocprofiler/src/core/session/att/att.h @@ -44,6 +44,19 @@ typedef struct { uint64_t queue_index; } att_pending_signal_t; +union att_header_packet_t { + struct { + uint64_t reserved : 14; + uint64_t navi : 1; + uint64_t enable : 1; + uint64_t DSIMDM : 4; + uint64_t DCU : 5; + uint64_t DSA : 1; + uint64_t SEID : 6; + }; + uint64_t raw; +}; + namespace att { class AttTracer { diff --git a/projects/rocprofiler/src/tools/tool.cpp b/projects/rocprofiler/src/tools/tool.cpp index 95836bf420..1917a08eea 100644 --- a/projects/rocprofiler/src/tools/tool.cpp +++ b/projects/rocprofiler/src/tools/tool.cpp @@ -59,6 +59,14 @@ #include "utils/helper.h" #include "trace_buffer.h" +#include "core/session/att/att.h" + + +struct PluginHeaderPacket +{ + std::string plugin_path; + void* userdata; +}; #define SLEEP_CYCLE_LENGTH 100l @@ -97,10 +105,10 @@ void warning(const std::string& msg) { std::cerr << msg << std::endl; } class rocprofiler_plugin_t { public: - rocprofiler_plugin_t(const std::string& plugin_path) { - plugin_handle_ = dlopen(plugin_path.c_str(), RTLD_LAZY); + rocprofiler_plugin_t(const PluginHeaderPacket& data) { + plugin_handle_ = dlopen(data.plugin_path.c_str(), RTLD_LAZY); if (plugin_handle_ == nullptr) { - warning(std::string("Warning: dlopen for ") + plugin_path + " failed: " + dlerror()); + warning(std::string("Warning: dlopen for ") + data.plugin_path + " failed: " + dlerror()); return; } @@ -117,8 +125,7 @@ class rocprofiler_plugin_t { if (auto* initialize = reinterpret_cast( dlsym(plugin_handle_, "rocprofiler_plugin_initialize")); initialize != nullptr) - valid_ = - initialize(ROCPROFILER_VERSION_MAJOR, ROCPROFILER_VERSION_MINOR, &counter_names) == 0; + valid_ = initialize(ROCPROFILER_VERSION_MAJOR, ROCPROFILER_VERSION_MINOR, data.userdata) == 0; } ~rocprofiler_plugin_t() { @@ -253,7 +260,7 @@ std::vector GetCounterNames() { } typedef std::tuple>, - std::vector, std::vector, std::vector> + std::vector, std::vector, std::vector, uint64_t> att_parsed_input_t; static int GetMpRank() { @@ -269,7 +276,7 @@ att_parsed_input_t GetATTParams() { std::vector counters_names; std::vector dispatch_ids; const char* path = getenv("COUNTERS_PATH"); - if (!path) return {{}, {}, {}, {}}; + if (!path) return {{}, {}, {}, {}, 0}; // List of parameters the user can set. Maxvalue is unused. std::unordered_map ATT_PARAM_NAMES{}; @@ -304,9 +311,12 @@ att_parsed_input_t GetATTParams() { std::ifstream trace_file(path); if (!trace_file.is_open()) { std::cout << "Unable to open att trace file." << std::endl; - return {{}, {}, {}, {}}; + return {{}, {}, {}, {}, 0}; } + rocprofiler::att_header_packet_t header{.raw = 0}; + header.enable = 1; + header.DSIMDM = default_params["SIMD_SELECT"]; int MPI_RANK = GetMpRank(); bool started_att_counters = false; @@ -332,7 +342,10 @@ att_parsed_input_t GetATTParams() { } if (!started_att_counters) continue; - if (param_name == "KERNEL") { + if (param_name == "REMOVE_HEADER_PACKET") { + header.enable = 0; + continue; + } else if (param_name == "KERNEL") { kernel_names.push_back(line); continue; } else if (param_name == "PERFCOUNTER") { @@ -371,14 +384,19 @@ att_parsed_input_t GetATTParams() { param_name.c_str(), line.c_str()); for (auto& name : ATT_PARAM_NAMES) printf("%s\n", name.first.c_str()); } + + if (param_name.find("TARGET_CU") != std::string::npos) + header.DCU = param_value; + else if (param_name.find("SIMD_SELECT") != std::string::npos) + header.DSIMDM = param_value; } - if (!started_att_counters) return {{}, {}, {}, {}}; + if (!started_att_counters) return {{}, {}, {}, {}, 0}; for (auto& param : default_params) parameters.push_back(std::make_pair(ATT_PARAM_NAMES[param.first], param.second)); - return {parameters, kernel_names, counters_names, dispatch_ids}; + return {parameters, kernel_names, counters_names, dispatch_ids, header.raw}; } void finish() { @@ -435,7 +453,7 @@ static void env_var_replace(const char* env_name) { } // load plugins -void plugins_load() { +void plugins_load(void* userdata) { // Load output plugin if (Dl_info dl_info; dladdr((void*)plugins_load, &dl_info) != 0) { const char* plugin_name = getenv("ROCPROFILER_PLUGIN_LIB"); @@ -461,7 +479,11 @@ void plugins_load() { << std::string(getenv("ROCPROFILER_COUNTERS")) << '\n'; } - if (!plugin.emplace(fs::path(dl_info.dli_fname).replace_filename(plugin_name)).is_valid()) { + PluginHeaderPacket header{ + .plugin_path = fs::path(dl_info.dli_fname).replace_filename(plugin_name), + .userdata = userdata + }; + if (!plugin.emplace(header).is_valid()) { plugin.reset(); } } @@ -683,10 +705,6 @@ ROCPROFILER_EXPORT bool OnLoad(void* table, uint64_t runtime_version, uint64_t f } } - // load the plugins - plugins_load(); - - std::vector apis_requested; if (getenv("ROCPROFILER_HIP_API_TRACE")) apis_requested.emplace_back(ACTIVITY_DOMAIN_HIP_API); @@ -703,7 +721,11 @@ ROCPROFILER_EXPORT bool OnLoad(void* table, uint64_t runtime_version, uint64_t f std::vector kernel_names; std::vector att_counters_names; std::vector dispatch_ids; - std::tie(params, kernel_names, att_counters_names, dispatch_ids) = GetATTParams(); + uint64_t attheader; + std::tie(params, kernel_names, att_counters_names, dispatch_ids, attheader) = GetATTParams(); + + // load the plugins + plugins_load(params.size() ? (void*)attheader : (void*)&counter_names); for (auto& kv_pair : params) parameters.emplace_back(rocprofiler_att_parameter_t{kv_pair.first, kv_pair.second});