From f91f0712f714f96ccdd40595bf3d305e6906afab Mon Sep 17 00:00:00 2001 From: "Baraldi, Giovanni" Date: Fri, 25 Apr 2025 18:49:16 +0200 Subject: [PATCH] SWDEV-528686: ATT fix for gfx12 s_wait_idle. Fixes for csv. Default to parse to trace. Fix for ROCR_VISIBLE_DEVICES. (#345) * Fix for gfx12 s_wait_idle. Added wait field on att.csv * Format and default to ATT to trace * Update .mds * No fatal error for invalid agent * Tidy fixes * Rename wait to idle, removed uneeded headers * Remove unused traceID * Tidy fix * Fix csv output * Formatting * Fix tests * Fix tests * Fix for visible devices * Review comment: Fix cmake * Review suggestion * Remove changelog/readme * Review comments * Review comment for CSV * Formatting --------- Co-authored-by: Giovanni Baraldi [ROCm/rocprofiler-sdk commit: a8f33970694454d8978f328e431f96c6feb447f7] --- .../rocprofiler-sdk/source/bin/rocprofv3.py | 136 +++++++------- .../source/lib/att-tool/CMakeLists.txt | 0 .../source/lib/att-tool/code.cpp | 65 ++++--- .../source/lib/att-tool/code.hpp | 18 +- .../source/lib/att-tool/dl.cpp | 1 - .../source/lib/att-tool/profile_interface.cpp | 13 +- .../source/lib/att-tool/profile_interface.hpp | 1 - .../source/lib/att-tool/waitcnt/analysis.cpp | 2 - .../source/lib/att-tool/waitcnt/analysis.hpp | 27 ++- .../source/lib/att-tool/waitcnt/gfx12.cpp | 27 ++- .../tests/att_decoder_waitcnt_test.cpp | 27 +-- .../source/lib/att-tool/wave.cpp | 5 +- .../source/lib/att-tool/wave.hpp | 3 +- .../lib/rocprofiler-sdk-tool/config.hpp | 6 +- .../rocprofiler-sdk/thread_trace/att_core.cpp | 6 +- .../advanced-thread-trace/CMakeLists.txt | 166 ++++++++---------- .../advanced-thread-trace/att_input.json | 1 - .../advanced-thread-trace/att_input.yml.in | 21 --- 18 files changed, 250 insertions(+), 275 deletions(-) mode change 100755 => 100644 projects/rocprofiler-sdk/source/lib/att-tool/CMakeLists.txt diff --git a/projects/rocprofiler-sdk/source/bin/rocprofv3.py b/projects/rocprofiler-sdk/source/bin/rocprofv3.py index 55ed964dc8..9c104de922 100755 --- a/projects/rocprofiler-sdk/source/bin/rocprofv3.py +++ b/projects/rocprofiler-sdk/source/bin/rocprofv3.py @@ -656,89 +656,79 @@ For MPI applications (or other job launchers such as SLURM), place rocprofv3 ins default_att_lib_path, att_support_args, att_support_inp = check_att_capability( rocp_args ) - if att_support_args or len(att_support_inp) != 0: - choice_list = [] - for keys, values in att_support_inp.items(): - choice_list.extend(values) - if att_support_args: - choice_list.extend(list(att_support_args)) - # remove duplicates - choice_list = list(set(choice_list)) + choice_list = [] + for keys, values in att_support_inp.items(): + choice_list.extend(values) + if att_support_args: + choice_list.extend(list(att_support_args)) - att_options = parser.add_argument_group("Advanced Thread Trace (ATT) options") + # remove duplicates + choice_list = list(set(choice_list)) - add_parser_bool_argument( - att_options, - "--advanced-thread-trace", - "--att", - help="Enable ATT", - ) + att_options = parser.add_argument_group("Advanced Thread Trace (ATT) options") - att_options.add_argument( - "--att-library-path", - help="Search path(s) to decoder library/libraries", - default=default_att_lib_path if not att_support_inp else None, - nargs="+", - ) + add_parser_bool_argument( + att_options, + "--advanced-thread-trace", + "--att", + help="Enable ATT", + ) - att_options.add_argument( - "--att-target-cu", - help="ATT target compute unit", - default=None, - ) + att_options.add_argument( + "--att-library-path", + help="Search path(s) to decoder library/libraries", + default=default_att_lib_path if not att_support_inp else None, + nargs="+", + ) - att_options.add_argument( - "--att-simd-select", - help="Select ATT SIMD", - default=None, - type=str, - ) + att_options.add_argument( + "--att-target-cu", + help="ATT target compute unit", + default=None, + ) - att_options.add_argument( - "--att-buffer-size", - help="Buffer Size", - default=None, - type=str, - ) + att_options.add_argument( + "--att-simd-select", + help="Select ATT SIMD", + default=None, + type=str, + ) - att_options.add_argument( - "--att-shader-engine-mask", - help="att shader engine mask", - default=None, - type=str, - ) + att_options.add_argument( + "--att-buffer-size", + help="Buffer Size", + default=None, + type=str, + ) - att_options.add_argument( - "--att-parse", - type=str.lower, - default=( - choice_list[0] if len(choice_list) == 1 and not att_support_inp else None - ), - help="Select ATT Parse method from the choices", - choices=set(choice_list), - ) + att_options.add_argument( + "--att-shader-engine-mask", + help="att shader engine mask", + default=None, + type=str, + ) - att_options.add_argument( - "--att-perfcounters", - help="Set performance counters, and optionally their mask", - default=None, - type=str.upper, - ) + att_options.add_argument( + "--att-perfcounters", + help="Set performance counters, and optionally their mask. gfx9 only.", + default=None, + type=str.upper, + ) - att_options.add_argument( - "--att-perfcounter-ctrl", - help="Integer in [0,32] range specifying collection period.", - default=None, - type=int, - ) + att_options.add_argument( + "--att-perfcounter-ctrl", + help="Integer in [0,32] range specifying collection period. gfx9 only.", + default=None, + type=int, + ) - add_parser_bool_argument( - att_options, - "--att-serialize-all", - default=False, - help="Serialize all kernels", - ) + add_parser_bool_argument( + att_options, + "--att-serialize-all", + default=False, + help="Serialize all kernels", + ) return (parser.parse_args(rocp_args), app_args, att_support_args, att_support_inp) @@ -1408,11 +1398,7 @@ def run(app_args, args, **kwargs): ): fatal_error("Advanced thread trace cannot be enabled with pc sampling") - if not args.att_parse: - fatal_error("provide the parser choice") - update_env("ROCPROF_ADVANCED_THREAD_TRACE", True, overwrite=True) - update_env("ROCPROF_ATT_CAPABILITY", args.att_parse, overwrite=True) if args.att_target_cu is not None: update_env( diff --git a/projects/rocprofiler-sdk/source/lib/att-tool/CMakeLists.txt b/projects/rocprofiler-sdk/source/lib/att-tool/CMakeLists.txt old mode 100755 new mode 100644 diff --git a/projects/rocprofiler-sdk/source/lib/att-tool/code.cpp b/projects/rocprofiler-sdk/source/lib/att-tool/code.cpp index 101421fe1d..7d48306e70 100644 --- a/projects/rocprofiler-sdk/source/lib/att-tool/code.cpp +++ b/projects/rocprofiler-sdk/source/lib/att-tool/code.cpp @@ -22,6 +22,7 @@ #include "code.hpp" #include +#include "lib/output/csv.hpp" #include "outputfile.hpp" #include @@ -33,7 +34,7 @@ namespace rocprofiler { namespace att_wrapper { -#define ATT_CSV_NAME "att_output.csv" +using csv_encoder = rocprofiler::tool::csv::csv_encoder<8>; // Builds a json filetree by recursively inserting "path" into the json object. void @@ -48,10 +49,9 @@ navigate(nlohmann::json& json, std::vector& path, const std::string navigate(j, path, filename); } -CodeFile::CodeFile(const Fspath& _dir, std::shared_ptr& _table) -: dir(_dir) -, filename(_dir / "code.json") -, table(_table) +CodeFile::CodeFile(Fspath _dir, std::shared_ptr _table) +: dir(std::move(_dir)) +, table(std::move(_table)) {} CodeFile::~CodeFile() @@ -76,20 +76,44 @@ CodeFile::~CodeFile() return a.first.marker_id < b.first.marker_id; }); - OutputFile file(dir / ATT_CSV_NAME); + std::stringstream ofs; + csv_encoder::write_row(ofs, + "CodeObj", + "Vaddr", + "Instruction", + "Hitcount", + "Latency", + "Stall", + "Idle", + "Source"); - file << "CodeObj, Vaddr, Instruction, Hitcount, Latency, Source\n"; for(auto& [pc, line] : vec) { if(kernel_names.find(pc) != kernel_names.end()) { - file << pc.marker_id << ',' << pc.addr << ",\"; " << kernel_names.at(pc).name - << "\",0,0,\"" << kernel_names.at(pc).demangled << "\"\n"; + csv_encoder::write_row(ofs, + pc.marker_id, + pc.addr, + "; " + kernel_names.at(pc).name, + 0, + 0, + 0, + 0, + kernel_names.at(pc).demangled); } - file << pc.marker_id << ',' << pc.addr << ",\"" << line->code_line->inst << "\"," - << line->hitcount << ',' << line->latency << ',' << line->code_line->comment - << '\n'; + csv_encoder::write_row(ofs, + pc.marker_id, + pc.addr, + line->code_line->inst, + line->hitcount, + line->latency, + line->stall, + line->idle, + line->code_line->comment); } + + OutputFile file(dir.parent_path() / ("stats_" + dir.filename().string() + ".csv")); + file << ofs.str(); } if(!GlobalDefs::get().has_format("json")) return; @@ -113,17 +137,17 @@ CodeFile::~CodeFile() if(kernel_names.find(line.first) != kernel_names.end()) { std::stringstream code; - code << "[\"; " << kernel_names.at(line.first).name << "\", 100, " - << (isa.line_number - 1) << ", \"" << kernel_names.at(line.first).demangled - << "\", " << line.first.marker_id << ", " << line.first.addr << ", 0, 0]"; - + code << "[\"; " << kernel_names.at(line.first).name << "\",0," << (isa.line_number - 1) + << ",\"" << kernel_names.at(line.first).demangled << "\"," << line.first.marker_id + << "," << line.first.addr << ",0,0,0,0]"; jcode.push_back(nlohmann::json::parse(code.str())); } std::stringstream code; - code << "[\"" << isa.code_line->inst << "\", 0, " << isa.line_number << ", \"" - << isa.code_line->comment << "\", " << line.first.marker_id << ", " << line.first.addr - << ", " << isa.hitcount << ", " << isa.latency << "]"; + code << "[\"" << isa.code_line->inst << "\",0," << isa.line_number << ",\"" + << isa.code_line->comment << "\"," << line.first.marker_id << "," << line.first.addr + << "," << isa.hitcount << "," << isa.latency << "," << isa.stall << "," << isa.idle + << "]"; jcode.push_back(nlohmann::json::parse(code.str())); @@ -139,8 +163,9 @@ CodeFile::~CodeFile() nlohmann::json json; json["code"] = jcode; json["version"] = TOOL_VERSION; + json["header"] = "ISA, _, LineNumber, Source, Codeobj, Vaddr, Hit, Latency, Stall, Idle"; - OutputFile(filename) << json; + OutputFile(dir / "code.json") << json; nlohmann::json jsnapfiletree; size_t num_snap = 0; diff --git a/projects/rocprofiler-sdk/source/lib/att-tool/code.hpp b/projects/rocprofiler-sdk/source/lib/att-tool/code.hpp index 38f6d1e912..82775e5045 100644 --- a/projects/rocprofiler-sdk/source/lib/att-tool/code.hpp +++ b/projects/rocprofiler-sdk/source/lib/att-tool/code.hpp @@ -24,7 +24,6 @@ #include "att_lib_wrapper.hpp" -#include #include #include #include @@ -39,12 +38,14 @@ struct CodeLine { using Instruction = rocprofiler::sdk::codeobj::disassembly::Instruction; - int line_number = 0; - int type = 0; - - std::atomic hitcount{0}; - std::atomic latency{0}; + int line_number{0}; + int type{0}; std::shared_ptr code_line{nullptr}; + + size_t hitcount{0}; + size_t latency{0}; + size_t stall{0}; + size_t idle{0}; }; class CodeFile @@ -53,11 +54,10 @@ class CodeFile public: CodeFile() = default; - CodeFile(const Fspath& dir, std::shared_ptr& table); + CodeFile(Fspath dir, std::shared_ptr table); ~CodeFile(); - Fspath dir{}; - Fspath filename{}; + const Fspath dir{}; std::unordered_map line_numbers{}; std::map> isa_map{}; std::map kernel_names{}; diff --git a/projects/rocprofiler-sdk/source/lib/att-tool/dl.cpp b/projects/rocprofiler-sdk/source/lib/att-tool/dl.cpp index 0788ee98d8..fc52214594 100644 --- a/projects/rocprofiler-sdk/source/lib/att-tool/dl.cpp +++ b/projects/rocprofiler-sdk/source/lib/att-tool/dl.cpp @@ -28,7 +28,6 @@ #include #include -#include #include #include #include diff --git a/projects/rocprofiler-sdk/source/lib/att-tool/profile_interface.cpp b/projects/rocprofiler-sdk/source/lib/att-tool/profile_interface.cpp index 7ad5ea1a25..a6fde4bcc5 100644 --- a/projects/rocprofiler-sdk/source/lib/att-tool/profile_interface.cpp +++ b/projects/rocprofiler-sdk/source/lib/att-tool/profile_interface.cpp @@ -33,7 +33,6 @@ #include #include #include -#include namespace rocprofiler { @@ -87,7 +86,8 @@ get_trace_data(rocprofiler_att_decoder_record_type_t trace_id, bool bInvalid = false; for(size_t wave_n = 0; wave_n < trace_size; wave_n++) { - auto& wave = reinterpret_cast(trace_events)[wave_n]; + auto& wave = reinterpret_cast(trace_events)[wave_n]; + int64_t prev_inst_time = wave.begin_time; WaveFile(tool.config, wave); @@ -102,12 +102,15 @@ get_trace_data(rocprofiler_att_decoder_record_type_t trace_id, try { auto& line = tool.get(inst.pc); - line.hitcount.fetch_add(1, std::memory_order_relaxed); - line.latency.fetch_add(inst.duration, std::memory_order_relaxed); + line.hitcount += 1; + line.latency += inst.duration; + line.stall += inst.stall; + line.idle += std::max(inst.time - prev_inst_time, 0); } catch(...) { bInvalid = true; } + prev_inst_time = std::max(prev_inst_time, inst.time + inst.duration); } } if(bInvalid) ROCP_WARNING << "Could not fetch some instructions!"; @@ -191,7 +194,7 @@ ToolData::~ToolData() = default; std::string demangle(std::string_view line) { - int status; + int status{0}; char* c_name = abi::__cxa_demangle(line.data(), nullptr, nullptr, &status); if(c_name == nullptr) return ""; diff --git a/projects/rocprofiler-sdk/source/lib/att-tool/profile_interface.hpp b/projects/rocprofiler-sdk/source/lib/att-tool/profile_interface.hpp index f67f21041d..c241c30c28 100644 --- a/projects/rocprofiler-sdk/source/lib/att-tool/profile_interface.hpp +++ b/projects/rocprofiler-sdk/source/lib/att-tool/profile_interface.hpp @@ -28,7 +28,6 @@ #include #include -#include #include #include #include diff --git a/projects/rocprofiler-sdk/source/lib/att-tool/waitcnt/analysis.cpp b/projects/rocprofiler-sdk/source/lib/att-tool/waitcnt/analysis.cpp index 85e0ac6af8..9320c8e5d0 100644 --- a/projects/rocprofiler-sdk/source/lib/att-tool/waitcnt/analysis.cpp +++ b/projects/rocprofiler-sdk/source/lib/att-tool/waitcnt/analysis.cpp @@ -28,8 +28,6 @@ namespace rocprofiler { namespace att_wrapper { -std::map> WaitcntList::_cache; - int64_t MemoryCounter::extract_waitcnt(const std::string& str) const { diff --git a/projects/rocprofiler-sdk/source/lib/att-tool/waitcnt/analysis.hpp b/projects/rocprofiler-sdk/source/lib/att-tool/waitcnt/analysis.hpp index 487619f38f..b5d14f931d 100644 --- a/projects/rocprofiler-sdk/source/lib/att-tool/waitcnt/analysis.hpp +++ b/projects/rocprofiler-sdk/source/lib/att-tool/waitcnt/analysis.hpp @@ -26,7 +26,6 @@ #include "lib/att-tool/att_lib_wrapper.hpp" #include "lib/att-tool/code.hpp" -#include #include #include #include @@ -47,25 +46,16 @@ struct WaitcntList using isa_map_t = std::map>; using wave_t = att_wave_data_t; - WaitcntList() = default; - - static const WaitcntList& Get(int gfxip, const wave_t& wave, isa_map_t& isa_map) + WaitcntList(int gfxip, const wave_t& wave, isa_map_t& isa_map) { - auto it = _cache.find(wave.traceID); - if(it != _cache.end()) return *it->second; - - auto ptr = std::make_unique(); - if(gfxip == 9) - ptr->mem_unroll = gfx9_construct(wave, isa_map); + mem_unroll = gfx9_construct(wave, isa_map); else if(gfxip == 10 || gfxip == 11) - ptr->mem_unroll = gfx10_construct(wave, isa_map); + mem_unroll = gfx10_construct(wave, isa_map); else if(gfxip == 12) - ptr->mem_unroll = gfx12_construct(wave, isa_map); + mem_unroll = gfx12_construct(wave, isa_map); else throw std::runtime_error("Invalid gfxip: " + std::to_string(gfxip)); - - return *_cache.emplace(wave.traceID, std::move(ptr)).first->second; } static std::vector gfx9_construct(const wave_t& wave, isa_map_t& isa_map); @@ -73,9 +63,6 @@ struct WaitcntList static std::vector gfx12_construct(const wave_t& wave, isa_map_t& isa_map); std::vector mem_unroll{}; - -private: - static std::map> _cache; }; class MemoryCounter @@ -98,6 +85,12 @@ public: std::optional> handle_mem_op(const std::string& inst, std::vector& flat_list); + void clearTo(std::vector& out) + { + out.insert(out.end(), list.begin(), list.end()); + list.clear(); + }; + const std::string name; Ordering order = Ordering::MEMORY_SEQUENTIAL; std::vector list{}; diff --git a/projects/rocprofiler-sdk/source/lib/att-tool/waitcnt/gfx12.cpp b/projects/rocprofiler-sdk/source/lib/att-tool/waitcnt/gfx12.cpp index d59819439b..c8f5597c7d 100644 --- a/projects/rocprofiler-sdk/source/lib/att-tool/waitcnt/gfx12.cpp +++ b/projects/rocprofiler-sdk/source/lib/att-tool/waitcnt/gfx12.cpp @@ -64,6 +64,7 @@ union MemoryInst int ldcnt : 1; int stcnt : 1; int sampl : 1; + int idle : 1; }; int raw = 0; }; @@ -81,6 +82,13 @@ classify(const std::string& inst) { if(inst.find("s_wait_alu") != npos) return MemoryInstType::TYPE_NOT_MEM; + if(inst.find("s_wait_idle") != npos) + { + MemoryInst type = MemoryInstType::TYPE_WAITCNT; + type.idle = true; + return type; + } + MemoryInst type = MemoryInstType::TYPE_WAITCNT; if(inst.find("dscnt") != npos) type.dscnt = true; if(inst.find("bvhcnt") != npos) type.bvhcn = true; @@ -233,31 +241,26 @@ WaitcntList::gfx12_construct(const wave_t& wave, isa_map_t& isa_map) if(auto joined = expcnt.handle_mem_op(inst_str, empty_list)) mem_unroll.emplace_back(LineWaitcnt{line_number, std::move(*joined)}); } - if(type.sampl) { if(auto joined = samplecnt.handle_mem_op(inst_str, empty_list)) mem_unroll.emplace_back(LineWaitcnt{line_number, std::move(*joined)}); } - if(type.kmcnt) { if(auto joined = kmcnt.handle_mem_op(inst_str, empty_list)) mem_unroll.emplace_back(LineWaitcnt{line_number, std::move(*joined)}); } - if(type.stcnt) { if(auto joined = storecnt.handle_mem_op(inst_str, flat_stor)) mem_unroll.emplace_back(LineWaitcnt{line_number, std::move(*joined)}); } - if(type.ldcnt) { if(auto joined = loadcnt.handle_mem_op(inst_str, flat_load)) mem_unroll.emplace_back(LineWaitcnt{line_number, std::move(*joined)}); } - if(type.dscnt) { if(auto joined = dscnt.handle_mem_op(inst_str, flat_load)) @@ -271,6 +274,20 @@ WaitcntList::gfx12_construct(const wave_t& wave, isa_map_t& isa_map) mem_unroll.emplace_back(LineWaitcnt{line_number, std::move(*joined)}); } } + + if(type.idle) + { + std::vector all{}; + loadcnt.clearTo(all); + storecnt.clearTo(all); + samplecnt.clearTo(all); + dscnt.clearTo(all); + kmcnt.clearTo(all); + expcnt.clearTo(all); + bvhcnt.clearTo(all); + + mem_unroll.emplace_back(LineWaitcnt{line_number, std::move(all)}); + } } } diff --git a/projects/rocprofiler-sdk/source/lib/att-tool/waitcnt/tests/att_decoder_waitcnt_test.cpp b/projects/rocprofiler-sdk/source/lib/att-tool/waitcnt/tests/att_decoder_waitcnt_test.cpp index ea644bf4fa..ac509a85e6 100644 --- a/projects/rocprofiler-sdk/source/lib/att-tool/waitcnt/tests/att_decoder_waitcnt_test.cpp +++ b/projects/rocprofiler-sdk/source/lib/att-tool/waitcnt/tests/att_decoder_waitcnt_test.cpp @@ -93,7 +93,7 @@ TEST(att_decoder_waitcnt_test, gfx9) wave.instructions_array = insts.data(); wave.instructions_size = insts.size(); - const auto& data = WaitcntList::Get(9, wave, isa_map); + auto data = WaitcntList(9, wave, isa_map); std::map> dependencies{}; @@ -183,7 +183,7 @@ TEST(att_decoder_waitcnt_test, gfx10) wave.instructions_array = insts.data(); wave.instructions_size = insts.size(); - const auto& data = WaitcntList::Get(10, wave, isa_map); + auto data = WaitcntList(10, wave, isa_map); std::map> dependencies{}; @@ -285,7 +285,11 @@ TEST(att_decoder_waitcnt_test, gfx12) append_isa(37, "s_waitcnt dscnt(1)"); append_isa(38, "s_waitcnt expcnt(0) bvhcnt(0)"); append_isa(39, "s_waitcnt dscnt(0)"); - append_isa(40, "invalid"); + + append_isa(40, "ds_store"); + append_isa(41, "global_load"); + append_isa(42, "s_wait_idle"); + append_isa(43, "invalid"); std::vector insts{}; for(size_t i = 0; i < isa_map.size(); i++) @@ -300,7 +304,7 @@ TEST(att_decoder_waitcnt_test, gfx12) wave.instructions_array = insts.data(); wave.instructions_size = insts.size(); - const auto& data = WaitcntList::Get(12, wave, isa_map); + auto data = WaitcntList(12, wave, isa_map); std::map> dependencies{}; @@ -317,7 +321,7 @@ TEST(att_decoder_waitcnt_test, gfx12) ASSERT_EQ(dependencies.at(dep).size(), set.size()); }; - ASSERT_EQ(dependencies.size(), 11); + ASSERT_EQ(dependencies.size(), 12); set_equal(6, {2, 3}); set_equal(7, {4}); set_equal(8, {5}); @@ -329,6 +333,7 @@ TEST(att_decoder_waitcnt_test, gfx12) set_equal(37, {31}); set_equal(38, {32, 33, 34, 35}); set_equal(39, {36}); + set_equal(42, {40, 41}); } TEST(att_decoder_waitcnt_test, fail_conditions) @@ -347,22 +352,18 @@ TEST(att_decoder_waitcnt_test, fail_conditions) } WaitcntList::wave_t wave{}; - wave.traceID = 4; wave.instructions_array = insts.data(); wave.instructions_size = insts.size(); // It should give warning and return - ASSERT_TRUE(WaitcntList::Get(9, wave, isa_map).mem_unroll.empty()); - wave.traceID++; - ASSERT_TRUE(WaitcntList::Get(10, wave, isa_map).mem_unroll.empty()); - wave.traceID++; - ASSERT_TRUE(WaitcntList::Get(12, wave, isa_map).mem_unroll.empty()); - wave.traceID++; + ASSERT_TRUE(WaitcntList(9, wave, isa_map).mem_unroll.empty()); + ASSERT_TRUE(WaitcntList(10, wave, isa_map).mem_unroll.empty()); + ASSERT_TRUE(WaitcntList(12, wave, isa_map).mem_unroll.empty()); // it cant operate on invalid gfxip try { - WaitcntList::Get(-1, wave, isa_map); + WaitcntList(-1, wave, isa_map); // fail ASSERT_TRUE(false); } catch(std::runtime_error& e) diff --git a/projects/rocprofiler-sdk/source/lib/att-tool/wave.cpp b/projects/rocprofiler-sdk/source/lib/att-tool/wave.cpp index 2c757fea41..977795dcd1 100644 --- a/projects/rocprofiler-sdk/source/lib/att-tool/wave.cpp +++ b/projects/rocprofiler-sdk/source/lib/att-tool/wave.cpp @@ -42,7 +42,7 @@ WaveFile::WaveFile(WaveConfig& config, const att_wave_data_t& wave) assert(config.filemgr); - int assigned_id = config.id_count.at(wave.simd).at(wave.wave_id).fetch_add(1); + int assigned_id = config.id_count.at(wave.simd).at(wave.wave_id)++; { std::stringstream namess; namess << "se" << config.shader_engine << "_sm" << (int) wave.simd << "_sl" @@ -87,8 +87,7 @@ WaveFile::WaveFile(WaveConfig& config, const att_wave_data_t& wave) try { - const WaitcntList& wait_list = - WaitcntList::Get(config.filemgr->gfxip, wave, config.code->isa_map); + auto wait_list = WaitcntList(config.filemgr->gfxip, wave, config.code->isa_map); for(const auto& line : wait_list.mem_unroll) if(!line.dependencies.empty()) diff --git a/projects/rocprofiler-sdk/source/lib/att-tool/wave.hpp b/projects/rocprofiler-sdk/source/lib/att-tool/wave.hpp index fba7017b08..6233a148dc 100644 --- a/projects/rocprofiler-sdk/source/lib/att-tool/wave.hpp +++ b/projects/rocprofiler-sdk/source/lib/att-tool/wave.hpp @@ -30,7 +30,6 @@ #include "att_lib_wrapper.hpp" -#include #include #include #include @@ -45,7 +44,7 @@ constexpr size_t SIMD_SIZE = 32; class WaveConfig { using WavestateArray = std::array, ATT_WAVE_STATE_LAST>; - using SIMD = std::array, SIMD_SIZE>; + using SIMD = std::array; public: WaveConfig(int se_id, diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/config.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/config.hpp index bf7e1ad9d4..b66b750e23 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/config.hpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk-tool/config.hpp @@ -137,10 +137,10 @@ struct config : output_config std::string pc_sampling_method = get_env("ROCPROF_PC_SAMPLING_METHOD", "none"); std::string pc_sampling_unit = get_env("ROCPROF_PC_SAMPLING_UNIT", "none"); std::string extra_counters_contents = get_env("ROCPROF_EXTRA_COUNTERS_CONTENTS", ""); + std::string att_capability = get_env("ROCPROF_ATT_CAPABILITY", "trace"); - std::unordered_set kernel_filter_range = {}; - std::vector> counters = {}; - std::string att_capability = get_env("ROCPROF_ATT_CAPABILITY", ""); + std::unordered_set kernel_filter_range = {}; + std::vector> counters = {}; std::vector att_param_perfcounters = {}; std::queue collection_periods = {}; diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/thread_trace/att_core.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/thread_trace/att_core.cpp index a8609d83a4..39bb11aa3f 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/thread_trace/att_core.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/thread_trace/att_core.cpp @@ -295,7 +295,11 @@ DispatchThreadTracer::resource_init() if(it == params.end()) continue; auto cache = rocprofiler::agent::get_hsa_agent(rocp_agent); - CHECK(cache.has_value()); + if(!cache.has_value()) + { + ROCP_CI_LOG(TRACE) << "Could not find HSA Agent for " << rocp_agent->id.handle; + continue; + } agents[*cache] = std::make_unique(it->second, rocp_agent->id); } } diff --git a/projects/rocprofiler-sdk/tests/rocprofv3/advanced-thread-trace/CMakeLists.txt b/projects/rocprofiler-sdk/tests/rocprofv3/advanced-thread-trace/CMakeLists.txt index 8e4acb4faf..e65ece9d67 100644 --- a/projects/rocprofiler-sdk/tests/rocprofv3/advanced-thread-trace/CMakeLists.txt +++ b/projects/rocprofiler-sdk/tests/rocprofv3/advanced-thread-trace/CMakeLists.txt @@ -38,51 +38,52 @@ rocprofiler_configure_pytest_files(CONFIG pytest.ini COPY validate.py conftest.p find_package(rocprofiler-sdk REQUIRED) -# hsa multiqueue dependency test +set(IS_DISABLED ON) -add_test( - NAME rocprofv3-test-hsa-multiqueue-att-cmd-env-ld-lib-path-execute - COMMAND - $ --log-level env --advanced-thread-trace - 1 --att-target-cu 1 --att-shader-engine-mask 0x11 --kernel-include-regex copyD - --att-buffer-size 0x6000000 --att-simd-select 0x3 --att-parse testing1 - --att-serialize-all 1 -d ${CMAKE_CURRENT_BINARY_DIR}/%argt%-trace/cmd_input -o - out --output-format json ${PRELOAD_ARGS} -- - $) +set(LIB_PATH_LOC "${ROCM_PATH}/lib") +set(LIB_PATH_ENV "ROCPROF_ATT_LIBRARY_PATH=${LIB_PATH_LOC}") -set_tests_properties( - rocprofv3-test-hsa-multiqueue-att-cmd-env-ld-lib-path-execute - PROPERTIES TIMEOUT 45 LABELS "integration-tests" ENVIRONMENT - LD_LIBRARY_PATH=${CMAKE_LIBRARY_OUTPUT_DIRECTORY}:$ENV{LD_LIBRARY_PATH}) -# hsa multiqueue dependency test +find_library( + attdecoder + HINTS ${LIB_PATH_LOC} + PATHS ${ROCM_PATH} + PATH_SUFFIXES lib + NAMES att_decoder_trace) + +if(attdecoder) + set(IS_DISABLED OFF) +endif() + +# hsa multiqueue dependency test with lib path add_test( NAME rocprofv3-test-hsa-multiqueue-att-cmd-env-att-lib-path-execute COMMAND $ --log-level env --advanced-thread-trace 1 --att-target-cu 1 --att-shader-engine-mask 0x11 --kernel-include-regex copyD - --att-buffer-size 0x6000000 --att-simd-select 0x3 --att-parse testing1 - --att-serialize-all 1 -d ${CMAKE_CURRENT_BINARY_DIR}/%argt%-trace/cmd_input -o - out --output-format json ${PRELOAD_ARGS} -- + --att-buffer-size 0x6000000 --att-simd-select 0x3 --att-serialize-all 1 -d + ${CMAKE_CURRENT_BINARY_DIR}/%argt%-trace/cmd_input -o out --output-format json + ${PRELOAD_ARGS} --att-library-path ${LIB_PATH_LOC} -- $) set_tests_properties( rocprofv3-test-hsa-multiqueue-att-cmd-env-att-lib-path-execute - PROPERTIES TIMEOUT 45 LABELS "integration-tests" ENVIRONMENT - ROCPROF_ATT_LIBRARY_PATH=${CMAKE_LIBRARY_OUTPUT_DIRECTORY}) + PROPERTIES TIMEOUT 45 LABELS "integration-tests" DISABLED ${IS_DISABLED}) -# hsa multiqueue dependency test +# hsa multiqueue dependency test with json input add_test( NAME rocprofv3-test-hsa-multiqueue-att-json-execute COMMAND $ --log-level env --att-library-path - ${CMAKE_LIBRARY_OUTPUT_DIRECTORY} -d - ${CMAKE_CURRENT_BINARY_DIR}/%argt%-trace/json_input -i + ${LIB_PATH_LOC} -d ${CMAKE_CURRENT_BINARY_DIR}/%argt%-trace/json_input -i ${CMAKE_CURRENT_BINARY_DIR}/att_input.json ${PRELOAD_ARGS} -- $) -set_tests_properties(rocprofv3-test-hsa-multiqueue-att-json-execute - PROPERTIES TIMEOUT 45 LABELS "integration-tests") +set_tests_properties( + rocprofv3-test-hsa-multiqueue-att-json-execute + PROPERTIES TIMEOUT 45 LABELS "integration-tests" DISABLED ${IS_DISABLED} ENVIRONMENT + ${LIB_PATH_ENV}) +# validate output add_test( NAME rocprofv3-test-hsa-multiqueue-att-cmd-validate COMMAND @@ -108,22 +109,29 @@ set(MULTIQUEUE_JSON_VALIDATION_FILES set_tests_properties( rocprofv3-test-hsa-multiqueue-att-cmd-validate - PROPERTIES TIMEOUT 45 LABELS "integration-tests" DEPENDS + PROPERTIES TIMEOUT + 45 + LABELS + "integration-tests" + DEPENDS "rocprofv3-test-hsa-multiqueue-att-cmd-ld-lib-path-execute" - FAIL_REGULAR_EXPRESSION "AssertionError") + FAIL_REGULAR_EXPRESSION + "AssertionError" + DISABLED + ${IS_DISABLED}) set_tests_properties( rocprofv3-test-hsa-multiqueue-att-json-validate - PROPERTIES TIMEOUT 45 LABELS "integration-tests" DEPENDS - "rocprofv3-test-hsa-multiqueue-att-json-execute" FAIL_REGULAR_EXPRESSION - "AssertionError") - -if(TARGET rocprofiler-sdk::att-decoder-testing1 AND TARGET - rocprofiler-sdk::att-decoder-testing2) - set(MISSING_TEST_DECODER_LIBS OFF) -else() - set(MISSING_TEST_DECODER_LIBS ON) -endif() + PROPERTIES TIMEOUT + 45 + LABELS + "integration-tests" + DEPENDS + "rocprofv3-test-hsa-multiqueue-att-json-execute" + FAIL_REGULAR_EXPRESSION + "AssertionError" + DISABLED + ${IS_DISABLED}) function(configure_att_input _FILENAME _OUTDIR) set(LIBRARY_OUTPUT_DIR ${_OUTDIR}) @@ -134,74 +142,41 @@ endfunction() configure_att_input(att_input.yml "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}") configure_att_input(att_input_will_fail.yml "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}") -# -# Uses att_library_path in YAML input to specify ATT library path -# +# test yaml input add_test( - NAME rocprofv3-test-att-library-path-yaml-input - COMMAND $ -i - ${CMAKE_CURRENT_BINARY_DIR}/att_input.yml --log-level env --echo -- sleep 0) + NAME rocprofv3-test-att-yaml-input + COMMAND + $ -i + ${CMAKE_CURRENT_BINARY_DIR}/att_input.yml --log-level env --echo -- + $) set_tests_properties( - rocprofv3-test-att-library-path-yaml-input - PROPERTIES TIMEOUT 45 LABELS "integration-tests" DISABLED - "${MISSING_TEST_DECODER_LIBS}") + rocprofv3-test-att-yaml-input PROPERTIES TIMEOUT 45 LABELS "integration-tests" + DISABLED ${IS_DISABLED}) +# Invalid lib path has to fail add_test( - NAME rocprofv3-test-att-library-path-yaml-input-will-fail + NAME rocprofv3-test-att-yaml-input-will-fail COMMAND $ -i ${CMAKE_CURRENT_BINARY_DIR}/att_input_will_fail.yml --log-level env --echo -- - sleep 0) + $) set_tests_properties( - rocprofv3-test-att-library-path-yaml-input-will-fail - PROPERTIES TIMEOUT 45 LABELS "integration-tests" WILL_FAIL ON DISABLED - "${MISSING_TEST_DECODER_LIBS}") + rocprofv3-test-att-yaml-input-will-fail + PROPERTIES TIMEOUT 45 LABELS "integration-tests" WILL_FAIL ON DISABLED ${IS_DISABLED}) -# -# Uses --att-library-path to specify ATT library path -# -add_test( - NAME rocprofv3-test-att-library-path-cmd-line - COMMAND - $ --att --att-library-path - ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/att --att-parse testing2 --log-level env --echo - -- sleep 0) - -set_tests_properties( - rocprofv3-test-att-library-path-cmd-line - PROPERTIES TIMEOUT 45 LABELS "integration-tests" DISABLED - "${MISSING_TEST_DECODER_LIBS}") - -add_test( - NAME rocprofv3-test-att-library-path-cmd-line-will-fail - COMMAND - $ --att --att-library-path - ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/att --att-parse testing1 --log-level env --echo - -- sleep 0) +add_test(NAME rocprofv3-test-att-library-path-cmd-line-will-fail + COMMAND $ --att --att-library-path . + --log-level env --echo -- $) set_tests_properties( rocprofv3-test-att-library-path-cmd-line-will-fail - PROPERTIES TIMEOUT 45 LABELS "integration-tests" WILL_FAIL ON DISABLED - "${MISSING_TEST_DECODER_LIBS}") - -# -# Uses ROCPROF_ATT_LIBRARY_PATH to specify ATT library path -# -add_test(NAME rocprofv3-test-att-library-path-env-var - COMMAND $ --att --att-parse testing2 - --log-level env --echo -- sleep 0) - -set_tests_properties( - rocprofv3-test-att-library-path-env-var - PROPERTIES TIMEOUT 45 LABELS "integration-tests" ENVIRONMENT - "ROCPROF_ATT_LIBRARY_PATH=${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/att" DISABLED - "${MISSING_TEST_DECODER_LIBS}") + PROPERTIES TIMEOUT 45 LABELS "integration-tests" WILL_FAIL ON DISABLED ${IS_DISABLED}) add_test(NAME rocprofv3-test-att-library-path-env-var-will-fail - COMMAND $ --att --att-parse testing1 - --log-level env --echo -- sleep 0) + COMMAND $ --att --log-level env --echo + -- $) set_tests_properties( rocprofv3-test-att-library-path-env-var-will-fail @@ -210,11 +185,11 @@ set_tests_properties( LABELS "integration-tests" ENVIRONMENT - "ROCPROF_ATT_LIBRARY_PATH=${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/att" + "ROCPROF_ATT_LIBRARY_PATH=." WILL_FAIL ON DISABLED - "${MISSING_TEST_DECODER_LIBS}") + ${IS_DISABLED}) # # Uses ATT and Counter Collection at the same time @@ -223,11 +198,10 @@ add_test( NAME rocprofv3-test-hsa-multiqueue-att-plus-pmc-execute COMMAND $ --log-level env --pmc SQ_WAVES - --advanced-thread-trace --att-parse testing1 -d - ${CMAKE_CURRENT_BINARY_DIR}/%argt%-trace/cmd_input -o out --output-format json - ${PRELOAD_ARGS} -- $) + --advanced-thread-trace -d ${CMAKE_CURRENT_BINARY_DIR}/%argt%-trace/cmd_input -o + out --output-format json ${PRELOAD_ARGS} -- $) set_tests_properties( rocprofv3-test-hsa-multiqueue-att-plus-pmc-execute - PROPERTIES TIMEOUT 45 LABELS "integration-tests" ENVIRONMENT - LD_LIBRARY_PATH=${CMAKE_LIBRARY_OUTPUT_DIRECTORY}:$ENV{LD_LIBRARY_PATH}) + PROPERTIES TIMEOUT 45 LABELS "integration-tests" DISABLED ${IS_DISABLED} ENVIRONMENT + ${LIB_PATH_ENV}) diff --git a/projects/rocprofiler-sdk/tests/rocprofv3/advanced-thread-trace/att_input.json b/projects/rocprofiler-sdk/tests/rocprofv3/advanced-thread-trace/att_input.json index 7cfa71d2ae..3ec2575947 100644 --- a/projects/rocprofiler-sdk/tests/rocprofv3/advanced-thread-trace/att_input.json +++ b/projects/rocprofiler-sdk/tests/rocprofv3/advanced-thread-trace/att_input.json @@ -9,7 +9,6 @@ ], "truncate_kernels": true, "advanced_thread_trace": true, - "att_parse": "testing1", "att_target_cu": 1, "att_shader_engine_mask": "0x11", "att_simd_select": "0x3", diff --git a/projects/rocprofiler-sdk/tests/rocprofv3/advanced-thread-trace/att_input.yml.in b/projects/rocprofiler-sdk/tests/rocprofv3/advanced-thread-trace/att_input.yml.in index fc2721831f..d80dd3e1f6 100644 --- a/projects/rocprofiler-sdk/tests/rocprofv3/advanced-thread-trace/att_input.yml.in +++ b/projects/rocprofiler-sdk/tests/rocprofv3/advanced-thread-trace/att_input.yml.in @@ -22,31 +22,10 @@ jobs: - advanced_thread_trace: True - att_parse: testing2 - att_library_path: - - @LIBRARY_OUTPUT_DIR@/att - - - advanced_thread_trace: True - att_parse: testing2 att_library_path: - @LIBRARY_OUTPUT_DIR@/att - @LIBRARY_OUTPUT_DIR@ - advanced_thread_trace: True - att_parse: testing1 - att_library_path: - - @LIBRARY_OUTPUT_DIR@/att - - @LIBRARY_OUTPUT_DIR@ - - - advanced_thread_trace: True - att_parse: testing1 - - - advanced_thread_trace: True - att_parse: testing1 - att_library_path: - - @LIBRARY_OUTPUT_DIR@ - - - advanced_thread_trace: True - att_parse: testing1 att_library_path: - @LIBRARY_OUTPUT_DIR@