SWDEV-528686: ATT fix for gfx12 s_wait_idle. Fixes for csv. Default to parse to trace. Fix for ROCR_VISIBLE_DEVICES. (#345)
* Fix for gfx12 s_wait_idle. Added wait field on att.csv
* Format and default to ATT to trace
* Update .mds
* No fatal error for invalid agent
* Tidy fixes
* Rename wait to idle, removed uneeded headers
* Remove unused traceID
* Tidy fix
* Fix csv output
* Formatting
* Fix tests
* Fix tests
* Fix for visible devices
* Review comment: Fix cmake
* Review suggestion
* Remove changelog/readme
* Review comments
* Review comment for CSV
* Formatting
---------
Co-authored-by: Giovanni Baraldi <gbaraldi@amd.com>
[ROCm/rocprofiler-sdk commit: a8f3397069]
This commit is contained in:
committad av
GitHub
förälder
cd106cda3c
incheckning
f91f0712f7
@@ -656,89 +656,79 @@ For MPI applications (or other job launchers such as SLURM), place rocprofv3 ins
|
||||
default_att_lib_path, att_support_args, att_support_inp = check_att_capability(
|
||||
rocp_args
|
||||
)
|
||||
if att_support_args or len(att_support_inp) != 0:
|
||||
choice_list = []
|
||||
for keys, values in att_support_inp.items():
|
||||
choice_list.extend(values)
|
||||
if att_support_args:
|
||||
choice_list.extend(list(att_support_args))
|
||||
|
||||
# remove duplicates
|
||||
choice_list = list(set(choice_list))
|
||||
choice_list = []
|
||||
for keys, values in att_support_inp.items():
|
||||
choice_list.extend(values)
|
||||
if att_support_args:
|
||||
choice_list.extend(list(att_support_args))
|
||||
|
||||
att_options = parser.add_argument_group("Advanced Thread Trace (ATT) options")
|
||||
# remove duplicates
|
||||
choice_list = list(set(choice_list))
|
||||
|
||||
add_parser_bool_argument(
|
||||
att_options,
|
||||
"--advanced-thread-trace",
|
||||
"--att",
|
||||
help="Enable ATT",
|
||||
)
|
||||
att_options = parser.add_argument_group("Advanced Thread Trace (ATT) options")
|
||||
|
||||
att_options.add_argument(
|
||||
"--att-library-path",
|
||||
help="Search path(s) to decoder library/libraries",
|
||||
default=default_att_lib_path if not att_support_inp else None,
|
||||
nargs="+",
|
||||
)
|
||||
add_parser_bool_argument(
|
||||
att_options,
|
||||
"--advanced-thread-trace",
|
||||
"--att",
|
||||
help="Enable ATT",
|
||||
)
|
||||
|
||||
att_options.add_argument(
|
||||
"--att-target-cu",
|
||||
help="ATT target compute unit",
|
||||
default=None,
|
||||
)
|
||||
att_options.add_argument(
|
||||
"--att-library-path",
|
||||
help="Search path(s) to decoder library/libraries",
|
||||
default=default_att_lib_path if not att_support_inp else None,
|
||||
nargs="+",
|
||||
)
|
||||
|
||||
att_options.add_argument(
|
||||
"--att-simd-select",
|
||||
help="Select ATT SIMD",
|
||||
default=None,
|
||||
type=str,
|
||||
)
|
||||
att_options.add_argument(
|
||||
"--att-target-cu",
|
||||
help="ATT target compute unit",
|
||||
default=None,
|
||||
)
|
||||
|
||||
att_options.add_argument(
|
||||
"--att-buffer-size",
|
||||
help="Buffer Size",
|
||||
default=None,
|
||||
type=str,
|
||||
)
|
||||
att_options.add_argument(
|
||||
"--att-simd-select",
|
||||
help="Select ATT SIMD",
|
||||
default=None,
|
||||
type=str,
|
||||
)
|
||||
|
||||
att_options.add_argument(
|
||||
"--att-shader-engine-mask",
|
||||
help="att shader engine mask",
|
||||
default=None,
|
||||
type=str,
|
||||
)
|
||||
att_options.add_argument(
|
||||
"--att-buffer-size",
|
||||
help="Buffer Size",
|
||||
default=None,
|
||||
type=str,
|
||||
)
|
||||
|
||||
att_options.add_argument(
|
||||
"--att-parse",
|
||||
type=str.lower,
|
||||
default=(
|
||||
choice_list[0] if len(choice_list) == 1 and not att_support_inp else None
|
||||
),
|
||||
help="Select ATT Parse method from the choices",
|
||||
choices=set(choice_list),
|
||||
)
|
||||
att_options.add_argument(
|
||||
"--att-shader-engine-mask",
|
||||
help="att shader engine mask",
|
||||
default=None,
|
||||
type=str,
|
||||
)
|
||||
|
||||
att_options.add_argument(
|
||||
"--att-perfcounters",
|
||||
help="Set performance counters, and optionally their mask",
|
||||
default=None,
|
||||
type=str.upper,
|
||||
)
|
||||
att_options.add_argument(
|
||||
"--att-perfcounters",
|
||||
help="Set performance counters, and optionally their mask. gfx9 only.",
|
||||
default=None,
|
||||
type=str.upper,
|
||||
)
|
||||
|
||||
att_options.add_argument(
|
||||
"--att-perfcounter-ctrl",
|
||||
help="Integer in [0,32] range specifying collection period.",
|
||||
default=None,
|
||||
type=int,
|
||||
)
|
||||
att_options.add_argument(
|
||||
"--att-perfcounter-ctrl",
|
||||
help="Integer in [0,32] range specifying collection period. gfx9 only.",
|
||||
default=None,
|
||||
type=int,
|
||||
)
|
||||
|
||||
add_parser_bool_argument(
|
||||
att_options,
|
||||
"--att-serialize-all",
|
||||
default=False,
|
||||
help="Serialize all kernels",
|
||||
)
|
||||
add_parser_bool_argument(
|
||||
att_options,
|
||||
"--att-serialize-all",
|
||||
default=False,
|
||||
help="Serialize all kernels",
|
||||
)
|
||||
|
||||
return (parser.parse_args(rocp_args), app_args, att_support_args, att_support_inp)
|
||||
|
||||
@@ -1408,11 +1398,7 @@ def run(app_args, args, **kwargs):
|
||||
):
|
||||
fatal_error("Advanced thread trace cannot be enabled with pc sampling")
|
||||
|
||||
if not args.att_parse:
|
||||
fatal_error("provide the parser choice")
|
||||
|
||||
update_env("ROCPROF_ADVANCED_THREAD_TRACE", True, overwrite=True)
|
||||
update_env("ROCPROF_ATT_CAPABILITY", args.att_parse, overwrite=True)
|
||||
|
||||
if args.att_target_cu is not None:
|
||||
update_env(
|
||||
|
||||
Executable → Regular
@@ -22,6 +22,7 @@
|
||||
|
||||
#include "code.hpp"
|
||||
#include <nlohmann/json.hpp>
|
||||
#include "lib/output/csv.hpp"
|
||||
#include "outputfile.hpp"
|
||||
|
||||
#include <fstream>
|
||||
@@ -33,7 +34,7 @@ namespace rocprofiler
|
||||
{
|
||||
namespace att_wrapper
|
||||
{
|
||||
#define ATT_CSV_NAME "att_output.csv"
|
||||
using csv_encoder = rocprofiler::tool::csv::csv_encoder<8>;
|
||||
|
||||
// Builds a json filetree by recursively inserting "path" into the json object.
|
||||
void
|
||||
@@ -48,10 +49,9 @@ navigate(nlohmann::json& json, std::vector<std::string>& path, const std::string
|
||||
navigate(j, path, filename);
|
||||
}
|
||||
|
||||
CodeFile::CodeFile(const Fspath& _dir, std::shared_ptr<AddressTable>& _table)
|
||||
: dir(_dir)
|
||||
, filename(_dir / "code.json")
|
||||
, table(_table)
|
||||
CodeFile::CodeFile(Fspath _dir, std::shared_ptr<AddressTable> _table)
|
||||
: dir(std::move(_dir))
|
||||
, table(std::move(_table))
|
||||
{}
|
||||
|
||||
CodeFile::~CodeFile()
|
||||
@@ -76,20 +76,44 @@ CodeFile::~CodeFile()
|
||||
return a.first.marker_id < b.first.marker_id;
|
||||
});
|
||||
|
||||
OutputFile file(dir / ATT_CSV_NAME);
|
||||
std::stringstream ofs;
|
||||
csv_encoder::write_row(ofs,
|
||||
"CodeObj",
|
||||
"Vaddr",
|
||||
"Instruction",
|
||||
"Hitcount",
|
||||
"Latency",
|
||||
"Stall",
|
||||
"Idle",
|
||||
"Source");
|
||||
|
||||
file << "CodeObj, Vaddr, Instruction, Hitcount, Latency, Source\n";
|
||||
for(auto& [pc, line] : vec)
|
||||
{
|
||||
if(kernel_names.find(pc) != kernel_names.end())
|
||||
{
|
||||
file << pc.marker_id << ',' << pc.addr << ",\"; " << kernel_names.at(pc).name
|
||||
<< "\",0,0,\"" << kernel_names.at(pc).demangled << "\"\n";
|
||||
csv_encoder::write_row(ofs,
|
||||
pc.marker_id,
|
||||
pc.addr,
|
||||
"; " + kernel_names.at(pc).name,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
kernel_names.at(pc).demangled);
|
||||
}
|
||||
file << pc.marker_id << ',' << pc.addr << ",\"" << line->code_line->inst << "\","
|
||||
<< line->hitcount << ',' << line->latency << ',' << line->code_line->comment
|
||||
<< '\n';
|
||||
csv_encoder::write_row(ofs,
|
||||
pc.marker_id,
|
||||
pc.addr,
|
||||
line->code_line->inst,
|
||||
line->hitcount,
|
||||
line->latency,
|
||||
line->stall,
|
||||
line->idle,
|
||||
line->code_line->comment);
|
||||
}
|
||||
|
||||
OutputFile file(dir.parent_path() / ("stats_" + dir.filename().string() + ".csv"));
|
||||
file << ofs.str();
|
||||
}
|
||||
|
||||
if(!GlobalDefs::get().has_format("json")) return;
|
||||
@@ -113,17 +137,17 @@ CodeFile::~CodeFile()
|
||||
if(kernel_names.find(line.first) != kernel_names.end())
|
||||
{
|
||||
std::stringstream code;
|
||||
code << "[\"; " << kernel_names.at(line.first).name << "\", 100, "
|
||||
<< (isa.line_number - 1) << ", \"" << kernel_names.at(line.first).demangled
|
||||
<< "\", " << line.first.marker_id << ", " << line.first.addr << ", 0, 0]";
|
||||
|
||||
code << "[\"; " << kernel_names.at(line.first).name << "\",0," << (isa.line_number - 1)
|
||||
<< ",\"" << kernel_names.at(line.first).demangled << "\"," << line.first.marker_id
|
||||
<< "," << line.first.addr << ",0,0,0,0]";
|
||||
jcode.push_back(nlohmann::json::parse(code.str()));
|
||||
}
|
||||
|
||||
std::stringstream code;
|
||||
code << "[\"" << isa.code_line->inst << "\", 0, " << isa.line_number << ", \""
|
||||
<< isa.code_line->comment << "\", " << line.first.marker_id << ", " << line.first.addr
|
||||
<< ", " << isa.hitcount << ", " << isa.latency << "]";
|
||||
code << "[\"" << isa.code_line->inst << "\",0," << isa.line_number << ",\""
|
||||
<< isa.code_line->comment << "\"," << line.first.marker_id << "," << line.first.addr
|
||||
<< "," << isa.hitcount << "," << isa.latency << "," << isa.stall << "," << isa.idle
|
||||
<< "]";
|
||||
|
||||
jcode.push_back(nlohmann::json::parse(code.str()));
|
||||
|
||||
@@ -139,8 +163,9 @@ CodeFile::~CodeFile()
|
||||
nlohmann::json json;
|
||||
json["code"] = jcode;
|
||||
json["version"] = TOOL_VERSION;
|
||||
json["header"] = "ISA, _, LineNumber, Source, Codeobj, Vaddr, Hit, Latency, Stall, Idle";
|
||||
|
||||
OutputFile(filename) << json;
|
||||
OutputFile(dir / "code.json") << json;
|
||||
|
||||
nlohmann::json jsnapfiletree;
|
||||
size_t num_snap = 0;
|
||||
|
||||
@@ -24,7 +24,6 @@
|
||||
|
||||
#include "att_lib_wrapper.hpp"
|
||||
|
||||
#include <atomic>
|
||||
#include <map>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
@@ -39,12 +38,14 @@ struct CodeLine
|
||||
{
|
||||
using Instruction = rocprofiler::sdk::codeobj::disassembly::Instruction;
|
||||
|
||||
int line_number = 0;
|
||||
int type = 0;
|
||||
|
||||
std::atomic<size_t> hitcount{0};
|
||||
std::atomic<size_t> latency{0};
|
||||
int line_number{0};
|
||||
int type{0};
|
||||
std::shared_ptr<Instruction> code_line{nullptr};
|
||||
|
||||
size_t hitcount{0};
|
||||
size_t latency{0};
|
||||
size_t stall{0};
|
||||
size_t idle{0};
|
||||
};
|
||||
|
||||
class CodeFile
|
||||
@@ -53,11 +54,10 @@ class CodeFile
|
||||
|
||||
public:
|
||||
CodeFile() = default;
|
||||
CodeFile(const Fspath& dir, std::shared_ptr<AddressTable>& table);
|
||||
CodeFile(Fspath dir, std::shared_ptr<AddressTable> table);
|
||||
~CodeFile();
|
||||
|
||||
Fspath dir{};
|
||||
Fspath filename{};
|
||||
const Fspath dir{};
|
||||
std::unordered_map<pcinfo_t, int> line_numbers{};
|
||||
std::map<pcinfo_t, std::unique_ptr<CodeLine>> isa_map{};
|
||||
std::map<pcinfo_t, KernelName> kernel_names{};
|
||||
|
||||
@@ -28,7 +28,6 @@
|
||||
#include <rocprofiler-sdk/cxx/details/tokenize.hpp>
|
||||
|
||||
#include <dlfcn.h>
|
||||
#include <atomic>
|
||||
#include <cassert>
|
||||
#include <cstdlib>
|
||||
#include <mutex>
|
||||
|
||||
@@ -33,7 +33,6 @@
|
||||
#include <cxxabi.h>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <shared_mutex>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
@@ -87,7 +86,8 @@ get_trace_data(rocprofiler_att_decoder_record_type_t trace_id,
|
||||
bool bInvalid = false;
|
||||
for(size_t wave_n = 0; wave_n < trace_size; wave_n++)
|
||||
{
|
||||
auto& wave = reinterpret_cast<att_wave_data_t*>(trace_events)[wave_n];
|
||||
auto& wave = reinterpret_cast<att_wave_data_t*>(trace_events)[wave_n];
|
||||
int64_t prev_inst_time = wave.begin_time;
|
||||
|
||||
WaveFile(tool.config, wave);
|
||||
|
||||
@@ -102,12 +102,15 @@ get_trace_data(rocprofiler_att_decoder_record_type_t trace_id,
|
||||
try
|
||||
{
|
||||
auto& line = tool.get(inst.pc);
|
||||
line.hitcount.fetch_add(1, std::memory_order_relaxed);
|
||||
line.latency.fetch_add(inst.duration, std::memory_order_relaxed);
|
||||
line.hitcount += 1;
|
||||
line.latency += inst.duration;
|
||||
line.stall += inst.stall;
|
||||
line.idle += std::max<int64_t>(inst.time - prev_inst_time, 0);
|
||||
} catch(...)
|
||||
{
|
||||
bInvalid = true;
|
||||
}
|
||||
prev_inst_time = std::max(prev_inst_time, inst.time + inst.duration);
|
||||
}
|
||||
}
|
||||
if(bInvalid) ROCP_WARNING << "Could not fetch some instructions!";
|
||||
@@ -191,7 +194,7 @@ ToolData::~ToolData() = default;
|
||||
std::string
|
||||
demangle(std::string_view line)
|
||||
{
|
||||
int status;
|
||||
int status{0};
|
||||
char* c_name = abi::__cxa_demangle(line.data(), nullptr, nullptr, &status);
|
||||
|
||||
if(c_name == nullptr) return "";
|
||||
|
||||
@@ -28,7 +28,6 @@
|
||||
|
||||
#include <cxxabi.h>
|
||||
#include <array>
|
||||
#include <atomic>
|
||||
#include <cstdint>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
|
||||
@@ -28,8 +28,6 @@ namespace rocprofiler
|
||||
{
|
||||
namespace att_wrapper
|
||||
{
|
||||
std::map<size_t, std::unique_ptr<WaitcntList>> WaitcntList::_cache;
|
||||
|
||||
int64_t
|
||||
MemoryCounter::extract_waitcnt(const std::string& str) const
|
||||
{
|
||||
|
||||
@@ -26,7 +26,6 @@
|
||||
#include "lib/att-tool/att_lib_wrapper.hpp"
|
||||
#include "lib/att-tool/code.hpp"
|
||||
|
||||
#include <atomic>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
@@ -47,25 +46,16 @@ struct WaitcntList
|
||||
using isa_map_t = std::map<pcinfo_t, std::unique_ptr<CodeLine>>;
|
||||
using wave_t = att_wave_data_t;
|
||||
|
||||
WaitcntList() = default;
|
||||
|
||||
static const WaitcntList& Get(int gfxip, const wave_t& wave, isa_map_t& isa_map)
|
||||
WaitcntList(int gfxip, const wave_t& wave, isa_map_t& isa_map)
|
||||
{
|
||||
auto it = _cache.find(wave.traceID);
|
||||
if(it != _cache.end()) return *it->second;
|
||||
|
||||
auto ptr = std::make_unique<WaitcntList>();
|
||||
|
||||
if(gfxip == 9)
|
||||
ptr->mem_unroll = gfx9_construct(wave, isa_map);
|
||||
mem_unroll = gfx9_construct(wave, isa_map);
|
||||
else if(gfxip == 10 || gfxip == 11)
|
||||
ptr->mem_unroll = gfx10_construct(wave, isa_map);
|
||||
mem_unroll = gfx10_construct(wave, isa_map);
|
||||
else if(gfxip == 12)
|
||||
ptr->mem_unroll = gfx12_construct(wave, isa_map);
|
||||
mem_unroll = gfx12_construct(wave, isa_map);
|
||||
else
|
||||
throw std::runtime_error("Invalid gfxip: " + std::to_string(gfxip));
|
||||
|
||||
return *_cache.emplace(wave.traceID, std::move(ptr)).first->second;
|
||||
}
|
||||
|
||||
static std::vector<LineWaitcnt> gfx9_construct(const wave_t& wave, isa_map_t& isa_map);
|
||||
@@ -73,9 +63,6 @@ struct WaitcntList
|
||||
static std::vector<LineWaitcnt> gfx12_construct(const wave_t& wave, isa_map_t& isa_map);
|
||||
|
||||
std::vector<LineWaitcnt> mem_unroll{};
|
||||
|
||||
private:
|
||||
static std::map<size_t, std::unique_ptr<WaitcntList>> _cache;
|
||||
};
|
||||
|
||||
class MemoryCounter
|
||||
@@ -98,6 +85,12 @@ public:
|
||||
std::optional<std::vector<int>> handle_mem_op(const std::string& inst,
|
||||
std::vector<int>& flat_list);
|
||||
|
||||
void clearTo(std::vector<int>& out)
|
||||
{
|
||||
out.insert(out.end(), list.begin(), list.end());
|
||||
list.clear();
|
||||
};
|
||||
|
||||
const std::string name;
|
||||
Ordering order = Ordering::MEMORY_SEQUENTIAL;
|
||||
std::vector<int> list{};
|
||||
|
||||
@@ -64,6 +64,7 @@ union MemoryInst
|
||||
int ldcnt : 1;
|
||||
int stcnt : 1;
|
||||
int sampl : 1;
|
||||
int idle : 1;
|
||||
};
|
||||
int raw = 0;
|
||||
};
|
||||
@@ -81,6 +82,13 @@ classify(const std::string& inst)
|
||||
{
|
||||
if(inst.find("s_wait_alu") != npos) return MemoryInstType::TYPE_NOT_MEM;
|
||||
|
||||
if(inst.find("s_wait_idle") != npos)
|
||||
{
|
||||
MemoryInst type = MemoryInstType::TYPE_WAITCNT;
|
||||
type.idle = true;
|
||||
return type;
|
||||
}
|
||||
|
||||
MemoryInst type = MemoryInstType::TYPE_WAITCNT;
|
||||
if(inst.find("dscnt") != npos) type.dscnt = true;
|
||||
if(inst.find("bvhcnt") != npos) type.bvhcn = true;
|
||||
@@ -233,31 +241,26 @@ WaitcntList::gfx12_construct(const wave_t& wave, isa_map_t& isa_map)
|
||||
if(auto joined = expcnt.handle_mem_op(inst_str, empty_list))
|
||||
mem_unroll.emplace_back(LineWaitcnt{line_number, std::move(*joined)});
|
||||
}
|
||||
|
||||
if(type.sampl)
|
||||
{
|
||||
if(auto joined = samplecnt.handle_mem_op(inst_str, empty_list))
|
||||
mem_unroll.emplace_back(LineWaitcnt{line_number, std::move(*joined)});
|
||||
}
|
||||
|
||||
if(type.kmcnt)
|
||||
{
|
||||
if(auto joined = kmcnt.handle_mem_op(inst_str, empty_list))
|
||||
mem_unroll.emplace_back(LineWaitcnt{line_number, std::move(*joined)});
|
||||
}
|
||||
|
||||
if(type.stcnt)
|
||||
{
|
||||
if(auto joined = storecnt.handle_mem_op(inst_str, flat_stor))
|
||||
mem_unroll.emplace_back(LineWaitcnt{line_number, std::move(*joined)});
|
||||
}
|
||||
|
||||
if(type.ldcnt)
|
||||
{
|
||||
if(auto joined = loadcnt.handle_mem_op(inst_str, flat_load))
|
||||
mem_unroll.emplace_back(LineWaitcnt{line_number, std::move(*joined)});
|
||||
}
|
||||
|
||||
if(type.dscnt)
|
||||
{
|
||||
if(auto joined = dscnt.handle_mem_op(inst_str, flat_load))
|
||||
@@ -271,6 +274,20 @@ WaitcntList::gfx12_construct(const wave_t& wave, isa_map_t& isa_map)
|
||||
mem_unroll.emplace_back(LineWaitcnt{line_number, std::move(*joined)});
|
||||
}
|
||||
}
|
||||
|
||||
if(type.idle)
|
||||
{
|
||||
std::vector<int> all{};
|
||||
loadcnt.clearTo(all);
|
||||
storecnt.clearTo(all);
|
||||
samplecnt.clearTo(all);
|
||||
dscnt.clearTo(all);
|
||||
kmcnt.clearTo(all);
|
||||
expcnt.clearTo(all);
|
||||
bvhcnt.clearTo(all);
|
||||
|
||||
mem_unroll.emplace_back(LineWaitcnt{line_number, std::move(all)});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
+14
-13
@@ -93,7 +93,7 @@ TEST(att_decoder_waitcnt_test, gfx9)
|
||||
wave.instructions_array = insts.data();
|
||||
wave.instructions_size = insts.size();
|
||||
|
||||
const auto& data = WaitcntList::Get(9, wave, isa_map);
|
||||
auto data = WaitcntList(9, wave, isa_map);
|
||||
|
||||
std::map<int, std::set<int>> dependencies{};
|
||||
|
||||
@@ -183,7 +183,7 @@ TEST(att_decoder_waitcnt_test, gfx10)
|
||||
wave.instructions_array = insts.data();
|
||||
wave.instructions_size = insts.size();
|
||||
|
||||
const auto& data = WaitcntList::Get(10, wave, isa_map);
|
||||
auto data = WaitcntList(10, wave, isa_map);
|
||||
|
||||
std::map<int, std::set<int>> dependencies{};
|
||||
|
||||
@@ -285,7 +285,11 @@ TEST(att_decoder_waitcnt_test, gfx12)
|
||||
append_isa(37, "s_waitcnt dscnt(1)");
|
||||
append_isa(38, "s_waitcnt expcnt(0) bvhcnt(0)");
|
||||
append_isa(39, "s_waitcnt dscnt(0)");
|
||||
append_isa(40, "invalid");
|
||||
|
||||
append_isa(40, "ds_store");
|
||||
append_isa(41, "global_load");
|
||||
append_isa(42, "s_wait_idle");
|
||||
append_isa(43, "invalid");
|
||||
|
||||
std::vector<att_wave_instruction_t> insts{};
|
||||
for(size_t i = 0; i < isa_map.size(); i++)
|
||||
@@ -300,7 +304,7 @@ TEST(att_decoder_waitcnt_test, gfx12)
|
||||
wave.instructions_array = insts.data();
|
||||
wave.instructions_size = insts.size();
|
||||
|
||||
const auto& data = WaitcntList::Get(12, wave, isa_map);
|
||||
auto data = WaitcntList(12, wave, isa_map);
|
||||
|
||||
std::map<int, std::set<int>> dependencies{};
|
||||
|
||||
@@ -317,7 +321,7 @@ TEST(att_decoder_waitcnt_test, gfx12)
|
||||
ASSERT_EQ(dependencies.at(dep).size(), set.size());
|
||||
};
|
||||
|
||||
ASSERT_EQ(dependencies.size(), 11);
|
||||
ASSERT_EQ(dependencies.size(), 12);
|
||||
set_equal(6, {2, 3});
|
||||
set_equal(7, {4});
|
||||
set_equal(8, {5});
|
||||
@@ -329,6 +333,7 @@ TEST(att_decoder_waitcnt_test, gfx12)
|
||||
set_equal(37, {31});
|
||||
set_equal(38, {32, 33, 34, 35});
|
||||
set_equal(39, {36});
|
||||
set_equal(42, {40, 41});
|
||||
}
|
||||
|
||||
TEST(att_decoder_waitcnt_test, fail_conditions)
|
||||
@@ -347,22 +352,18 @@ TEST(att_decoder_waitcnt_test, fail_conditions)
|
||||
}
|
||||
|
||||
WaitcntList::wave_t wave{};
|
||||
wave.traceID = 4;
|
||||
wave.instructions_array = insts.data();
|
||||
wave.instructions_size = insts.size();
|
||||
|
||||
// It should give warning and return
|
||||
ASSERT_TRUE(WaitcntList::Get(9, wave, isa_map).mem_unroll.empty());
|
||||
wave.traceID++;
|
||||
ASSERT_TRUE(WaitcntList::Get(10, wave, isa_map).mem_unroll.empty());
|
||||
wave.traceID++;
|
||||
ASSERT_TRUE(WaitcntList::Get(12, wave, isa_map).mem_unroll.empty());
|
||||
wave.traceID++;
|
||||
ASSERT_TRUE(WaitcntList(9, wave, isa_map).mem_unroll.empty());
|
||||
ASSERT_TRUE(WaitcntList(10, wave, isa_map).mem_unroll.empty());
|
||||
ASSERT_TRUE(WaitcntList(12, wave, isa_map).mem_unroll.empty());
|
||||
|
||||
// it cant operate on invalid gfxip
|
||||
try
|
||||
{
|
||||
WaitcntList::Get(-1, wave, isa_map);
|
||||
WaitcntList(-1, wave, isa_map);
|
||||
// fail
|
||||
ASSERT_TRUE(false);
|
||||
} catch(std::runtime_error& e)
|
||||
|
||||
@@ -42,7 +42,7 @@ WaveFile::WaveFile(WaveConfig& config, const att_wave_data_t& wave)
|
||||
|
||||
assert(config.filemgr);
|
||||
|
||||
int assigned_id = config.id_count.at(wave.simd).at(wave.wave_id).fetch_add(1);
|
||||
int assigned_id = config.id_count.at(wave.simd).at(wave.wave_id)++;
|
||||
{
|
||||
std::stringstream namess;
|
||||
namess << "se" << config.shader_engine << "_sm" << (int) wave.simd << "_sl"
|
||||
@@ -87,8 +87,7 @@ WaveFile::WaveFile(WaveConfig& config, const att_wave_data_t& wave)
|
||||
|
||||
try
|
||||
{
|
||||
const WaitcntList& wait_list =
|
||||
WaitcntList::Get(config.filemgr->gfxip, wave, config.code->isa_map);
|
||||
auto wait_list = WaitcntList(config.filemgr->gfxip, wave, config.code->isa_map);
|
||||
|
||||
for(const auto& line : wait_list.mem_unroll)
|
||||
if(!line.dependencies.empty())
|
||||
|
||||
@@ -30,7 +30,6 @@
|
||||
|
||||
#include "att_lib_wrapper.hpp"
|
||||
|
||||
#include <atomic>
|
||||
#include <map>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
@@ -45,7 +44,7 @@ constexpr size_t SIMD_SIZE = 32;
|
||||
class WaveConfig
|
||||
{
|
||||
using WavestateArray = std::array<std::shared_ptr<WstatesFile>, ATT_WAVE_STATE_LAST>;
|
||||
using SIMD = std::array<std::atomic<int>, SIMD_SIZE>;
|
||||
using SIMD = std::array<size_t, SIMD_SIZE>;
|
||||
|
||||
public:
|
||||
WaveConfig(int se_id,
|
||||
|
||||
@@ -137,10 +137,10 @@ struct config : output_config
|
||||
std::string pc_sampling_method = get_env("ROCPROF_PC_SAMPLING_METHOD", "none");
|
||||
std::string pc_sampling_unit = get_env("ROCPROF_PC_SAMPLING_UNIT", "none");
|
||||
std::string extra_counters_contents = get_env("ROCPROF_EXTRA_COUNTERS_CONTENTS", "");
|
||||
std::string att_capability = get_env("ROCPROF_ATT_CAPABILITY", "trace");
|
||||
|
||||
std::unordered_set<size_t> kernel_filter_range = {};
|
||||
std::vector<std::set<std::string>> counters = {};
|
||||
std::string att_capability = get_env("ROCPROF_ATT_CAPABILITY", "");
|
||||
std::unordered_set<size_t> kernel_filter_range = {};
|
||||
std::vector<std::set<std::string>> counters = {};
|
||||
std::vector<att_perfcounter> att_param_perfcounters = {};
|
||||
|
||||
std::queue<CollectionPeriod> collection_periods = {};
|
||||
|
||||
@@ -295,7 +295,11 @@ DispatchThreadTracer::resource_init()
|
||||
if(it == params.end()) continue;
|
||||
|
||||
auto cache = rocprofiler::agent::get_hsa_agent(rocp_agent);
|
||||
CHECK(cache.has_value());
|
||||
if(!cache.has_value())
|
||||
{
|
||||
ROCP_CI_LOG(TRACE) << "Could not find HSA Agent for " << rocp_agent->id.handle;
|
||||
continue;
|
||||
}
|
||||
agents[*cache] = std::make_unique<ThreadTracerQueue>(it->second, rocp_agent->id);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -38,51 +38,52 @@ rocprofiler_configure_pytest_files(CONFIG pytest.ini COPY validate.py conftest.p
|
||||
|
||||
find_package(rocprofiler-sdk REQUIRED)
|
||||
|
||||
# hsa multiqueue dependency test
|
||||
set(IS_DISABLED ON)
|
||||
|
||||
add_test(
|
||||
NAME rocprofv3-test-hsa-multiqueue-att-cmd-env-ld-lib-path-execute
|
||||
COMMAND
|
||||
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> --log-level env --advanced-thread-trace
|
||||
1 --att-target-cu 1 --att-shader-engine-mask 0x11 --kernel-include-regex copyD
|
||||
--att-buffer-size 0x6000000 --att-simd-select 0x3 --att-parse testing1
|
||||
--att-serialize-all 1 -d ${CMAKE_CURRENT_BINARY_DIR}/%argt%-trace/cmd_input -o
|
||||
out --output-format json ${PRELOAD_ARGS} --
|
||||
$<TARGET_FILE:hsa_code_object_testapp>)
|
||||
set(LIB_PATH_LOC "${ROCM_PATH}/lib")
|
||||
set(LIB_PATH_ENV "ROCPROF_ATT_LIBRARY_PATH=${LIB_PATH_LOC}")
|
||||
|
||||
set_tests_properties(
|
||||
rocprofv3-test-hsa-multiqueue-att-cmd-env-ld-lib-path-execute
|
||||
PROPERTIES TIMEOUT 45 LABELS "integration-tests" ENVIRONMENT
|
||||
LD_LIBRARY_PATH=${CMAKE_LIBRARY_OUTPUT_DIRECTORY}:$ENV{LD_LIBRARY_PATH})
|
||||
# hsa multiqueue dependency test
|
||||
find_library(
|
||||
attdecoder
|
||||
HINTS ${LIB_PATH_LOC}
|
||||
PATHS ${ROCM_PATH}
|
||||
PATH_SUFFIXES lib
|
||||
NAMES att_decoder_trace)
|
||||
|
||||
if(attdecoder)
|
||||
set(IS_DISABLED OFF)
|
||||
endif()
|
||||
|
||||
# hsa multiqueue dependency test with lib path
|
||||
add_test(
|
||||
NAME rocprofv3-test-hsa-multiqueue-att-cmd-env-att-lib-path-execute
|
||||
COMMAND
|
||||
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> --log-level env --advanced-thread-trace
|
||||
1 --att-target-cu 1 --att-shader-engine-mask 0x11 --kernel-include-regex copyD
|
||||
--att-buffer-size 0x6000000 --att-simd-select 0x3 --att-parse testing1
|
||||
--att-serialize-all 1 -d ${CMAKE_CURRENT_BINARY_DIR}/%argt%-trace/cmd_input -o
|
||||
out --output-format json ${PRELOAD_ARGS} --
|
||||
--att-buffer-size 0x6000000 --att-simd-select 0x3 --att-serialize-all 1 -d
|
||||
${CMAKE_CURRENT_BINARY_DIR}/%argt%-trace/cmd_input -o out --output-format json
|
||||
${PRELOAD_ARGS} --att-library-path ${LIB_PATH_LOC} --
|
||||
$<TARGET_FILE:hsa_code_object_testapp>)
|
||||
|
||||
set_tests_properties(
|
||||
rocprofv3-test-hsa-multiqueue-att-cmd-env-att-lib-path-execute
|
||||
PROPERTIES TIMEOUT 45 LABELS "integration-tests" ENVIRONMENT
|
||||
ROCPROF_ATT_LIBRARY_PATH=${CMAKE_LIBRARY_OUTPUT_DIRECTORY})
|
||||
PROPERTIES TIMEOUT 45 LABELS "integration-tests" DISABLED ${IS_DISABLED})
|
||||
|
||||
# hsa multiqueue dependency test
|
||||
# hsa multiqueue dependency test with json input
|
||||
add_test(
|
||||
NAME rocprofv3-test-hsa-multiqueue-att-json-execute
|
||||
COMMAND
|
||||
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> --log-level env --att-library-path
|
||||
${CMAKE_LIBRARY_OUTPUT_DIRECTORY} -d
|
||||
${CMAKE_CURRENT_BINARY_DIR}/%argt%-trace/json_input -i
|
||||
${LIB_PATH_LOC} -d ${CMAKE_CURRENT_BINARY_DIR}/%argt%-trace/json_input -i
|
||||
${CMAKE_CURRENT_BINARY_DIR}/att_input.json ${PRELOAD_ARGS} --
|
||||
$<TARGET_FILE:hsa_code_object_testapp>)
|
||||
|
||||
set_tests_properties(rocprofv3-test-hsa-multiqueue-att-json-execute
|
||||
PROPERTIES TIMEOUT 45 LABELS "integration-tests")
|
||||
set_tests_properties(
|
||||
rocprofv3-test-hsa-multiqueue-att-json-execute
|
||||
PROPERTIES TIMEOUT 45 LABELS "integration-tests" DISABLED ${IS_DISABLED} ENVIRONMENT
|
||||
${LIB_PATH_ENV})
|
||||
|
||||
# validate output
|
||||
add_test(
|
||||
NAME rocprofv3-test-hsa-multiqueue-att-cmd-validate
|
||||
COMMAND
|
||||
@@ -108,22 +109,29 @@ set(MULTIQUEUE_JSON_VALIDATION_FILES
|
||||
|
||||
set_tests_properties(
|
||||
rocprofv3-test-hsa-multiqueue-att-cmd-validate
|
||||
PROPERTIES TIMEOUT 45 LABELS "integration-tests" DEPENDS
|
||||
PROPERTIES TIMEOUT
|
||||
45
|
||||
LABELS
|
||||
"integration-tests"
|
||||
DEPENDS
|
||||
"rocprofv3-test-hsa-multiqueue-att-cmd-ld-lib-path-execute"
|
||||
FAIL_REGULAR_EXPRESSION "AssertionError")
|
||||
FAIL_REGULAR_EXPRESSION
|
||||
"AssertionError"
|
||||
DISABLED
|
||||
${IS_DISABLED})
|
||||
|
||||
set_tests_properties(
|
||||
rocprofv3-test-hsa-multiqueue-att-json-validate
|
||||
PROPERTIES TIMEOUT 45 LABELS "integration-tests" DEPENDS
|
||||
"rocprofv3-test-hsa-multiqueue-att-json-execute" FAIL_REGULAR_EXPRESSION
|
||||
"AssertionError")
|
||||
|
||||
if(TARGET rocprofiler-sdk::att-decoder-testing1 AND TARGET
|
||||
rocprofiler-sdk::att-decoder-testing2)
|
||||
set(MISSING_TEST_DECODER_LIBS OFF)
|
||||
else()
|
||||
set(MISSING_TEST_DECODER_LIBS ON)
|
||||
endif()
|
||||
PROPERTIES TIMEOUT
|
||||
45
|
||||
LABELS
|
||||
"integration-tests"
|
||||
DEPENDS
|
||||
"rocprofv3-test-hsa-multiqueue-att-json-execute"
|
||||
FAIL_REGULAR_EXPRESSION
|
||||
"AssertionError"
|
||||
DISABLED
|
||||
${IS_DISABLED})
|
||||
|
||||
function(configure_att_input _FILENAME _OUTDIR)
|
||||
set(LIBRARY_OUTPUT_DIR ${_OUTDIR})
|
||||
@@ -134,74 +142,41 @@ endfunction()
|
||||
configure_att_input(att_input.yml "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}")
|
||||
configure_att_input(att_input_will_fail.yml "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}")
|
||||
|
||||
#
|
||||
# Uses att_library_path in YAML input to specify ATT library path
|
||||
#
|
||||
# test yaml input
|
||||
add_test(
|
||||
NAME rocprofv3-test-att-library-path-yaml-input
|
||||
COMMAND $<TARGET_FILE:rocprofiler-sdk::rocprofv3> -i
|
||||
${CMAKE_CURRENT_BINARY_DIR}/att_input.yml --log-level env --echo -- sleep 0)
|
||||
NAME rocprofv3-test-att-yaml-input
|
||||
COMMAND
|
||||
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> -i
|
||||
${CMAKE_CURRENT_BINARY_DIR}/att_input.yml --log-level env --echo --
|
||||
$<TARGET_FILE:hsa_code_object_testapp>)
|
||||
|
||||
set_tests_properties(
|
||||
rocprofv3-test-att-library-path-yaml-input
|
||||
PROPERTIES TIMEOUT 45 LABELS "integration-tests" DISABLED
|
||||
"${MISSING_TEST_DECODER_LIBS}")
|
||||
rocprofv3-test-att-yaml-input PROPERTIES TIMEOUT 45 LABELS "integration-tests"
|
||||
DISABLED ${IS_DISABLED})
|
||||
|
||||
# Invalid lib path has to fail
|
||||
add_test(
|
||||
NAME rocprofv3-test-att-library-path-yaml-input-will-fail
|
||||
NAME rocprofv3-test-att-yaml-input-will-fail
|
||||
COMMAND
|
||||
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> -i
|
||||
${CMAKE_CURRENT_BINARY_DIR}/att_input_will_fail.yml --log-level env --echo --
|
||||
sleep 0)
|
||||
$<TARGET_FILE:hsa_code_object_testapp>)
|
||||
|
||||
set_tests_properties(
|
||||
rocprofv3-test-att-library-path-yaml-input-will-fail
|
||||
PROPERTIES TIMEOUT 45 LABELS "integration-tests" WILL_FAIL ON DISABLED
|
||||
"${MISSING_TEST_DECODER_LIBS}")
|
||||
rocprofv3-test-att-yaml-input-will-fail
|
||||
PROPERTIES TIMEOUT 45 LABELS "integration-tests" WILL_FAIL ON DISABLED ${IS_DISABLED})
|
||||
|
||||
#
|
||||
# Uses --att-library-path to specify ATT library path
|
||||
#
|
||||
add_test(
|
||||
NAME rocprofv3-test-att-library-path-cmd-line
|
||||
COMMAND
|
||||
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> --att --att-library-path
|
||||
${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/att --att-parse testing2 --log-level env --echo
|
||||
-- sleep 0)
|
||||
|
||||
set_tests_properties(
|
||||
rocprofv3-test-att-library-path-cmd-line
|
||||
PROPERTIES TIMEOUT 45 LABELS "integration-tests" DISABLED
|
||||
"${MISSING_TEST_DECODER_LIBS}")
|
||||
|
||||
add_test(
|
||||
NAME rocprofv3-test-att-library-path-cmd-line-will-fail
|
||||
COMMAND
|
||||
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> --att --att-library-path
|
||||
${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/att --att-parse testing1 --log-level env --echo
|
||||
-- sleep 0)
|
||||
add_test(NAME rocprofv3-test-att-library-path-cmd-line-will-fail
|
||||
COMMAND $<TARGET_FILE:rocprofiler-sdk::rocprofv3> --att --att-library-path .
|
||||
--log-level env --echo -- $<TARGET_FILE:hsa_code_object_testapp>)
|
||||
|
||||
set_tests_properties(
|
||||
rocprofv3-test-att-library-path-cmd-line-will-fail
|
||||
PROPERTIES TIMEOUT 45 LABELS "integration-tests" WILL_FAIL ON DISABLED
|
||||
"${MISSING_TEST_DECODER_LIBS}")
|
||||
|
||||
#
|
||||
# Uses ROCPROF_ATT_LIBRARY_PATH to specify ATT library path
|
||||
#
|
||||
add_test(NAME rocprofv3-test-att-library-path-env-var
|
||||
COMMAND $<TARGET_FILE:rocprofiler-sdk::rocprofv3> --att --att-parse testing2
|
||||
--log-level env --echo -- sleep 0)
|
||||
|
||||
set_tests_properties(
|
||||
rocprofv3-test-att-library-path-env-var
|
||||
PROPERTIES TIMEOUT 45 LABELS "integration-tests" ENVIRONMENT
|
||||
"ROCPROF_ATT_LIBRARY_PATH=${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/att" DISABLED
|
||||
"${MISSING_TEST_DECODER_LIBS}")
|
||||
PROPERTIES TIMEOUT 45 LABELS "integration-tests" WILL_FAIL ON DISABLED ${IS_DISABLED})
|
||||
|
||||
add_test(NAME rocprofv3-test-att-library-path-env-var-will-fail
|
||||
COMMAND $<TARGET_FILE:rocprofiler-sdk::rocprofv3> --att --att-parse testing1
|
||||
--log-level env --echo -- sleep 0)
|
||||
COMMAND $<TARGET_FILE:rocprofiler-sdk::rocprofv3> --att --log-level env --echo
|
||||
-- $<TARGET_FILE:hsa_code_object_testapp>)
|
||||
|
||||
set_tests_properties(
|
||||
rocprofv3-test-att-library-path-env-var-will-fail
|
||||
@@ -210,11 +185,11 @@ set_tests_properties(
|
||||
LABELS
|
||||
"integration-tests"
|
||||
ENVIRONMENT
|
||||
"ROCPROF_ATT_LIBRARY_PATH=${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/att"
|
||||
"ROCPROF_ATT_LIBRARY_PATH=."
|
||||
WILL_FAIL
|
||||
ON
|
||||
DISABLED
|
||||
"${MISSING_TEST_DECODER_LIBS}")
|
||||
${IS_DISABLED})
|
||||
|
||||
#
|
||||
# Uses ATT and Counter Collection at the same time
|
||||
@@ -223,11 +198,10 @@ add_test(
|
||||
NAME rocprofv3-test-hsa-multiqueue-att-plus-pmc-execute
|
||||
COMMAND
|
||||
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> --log-level env --pmc SQ_WAVES
|
||||
--advanced-thread-trace --att-parse testing1 -d
|
||||
${CMAKE_CURRENT_BINARY_DIR}/%argt%-trace/cmd_input -o out --output-format json
|
||||
${PRELOAD_ARGS} -- $<TARGET_FILE:vector-ops>)
|
||||
--advanced-thread-trace -d ${CMAKE_CURRENT_BINARY_DIR}/%argt%-trace/cmd_input -o
|
||||
out --output-format json ${PRELOAD_ARGS} -- $<TARGET_FILE:vector-ops>)
|
||||
|
||||
set_tests_properties(
|
||||
rocprofv3-test-hsa-multiqueue-att-plus-pmc-execute
|
||||
PROPERTIES TIMEOUT 45 LABELS "integration-tests" ENVIRONMENT
|
||||
LD_LIBRARY_PATH=${CMAKE_LIBRARY_OUTPUT_DIRECTORY}:$ENV{LD_LIBRARY_PATH})
|
||||
PROPERTIES TIMEOUT 45 LABELS "integration-tests" DISABLED ${IS_DISABLED} ENVIRONMENT
|
||||
${LIB_PATH_ENV})
|
||||
|
||||
@@ -9,7 +9,6 @@
|
||||
],
|
||||
"truncate_kernels": true,
|
||||
"advanced_thread_trace": true,
|
||||
"att_parse": "testing1",
|
||||
"att_target_cu": 1,
|
||||
"att_shader_engine_mask": "0x11",
|
||||
"att_simd_select": "0x3",
|
||||
|
||||
@@ -22,31 +22,10 @@
|
||||
|
||||
jobs:
|
||||
- advanced_thread_trace: True
|
||||
att_parse: testing2
|
||||
att_library_path:
|
||||
- @LIBRARY_OUTPUT_DIR@/att
|
||||
|
||||
- advanced_thread_trace: True
|
||||
att_parse: testing2
|
||||
att_library_path:
|
||||
- @LIBRARY_OUTPUT_DIR@/att
|
||||
- @LIBRARY_OUTPUT_DIR@
|
||||
|
||||
- advanced_thread_trace: True
|
||||
att_parse: testing1
|
||||
att_library_path:
|
||||
- @LIBRARY_OUTPUT_DIR@/att
|
||||
- @LIBRARY_OUTPUT_DIR@
|
||||
|
||||
- advanced_thread_trace: True
|
||||
att_parse: testing1
|
||||
|
||||
- advanced_thread_trace: True
|
||||
att_parse: testing1
|
||||
att_library_path:
|
||||
- @LIBRARY_OUTPUT_DIR@
|
||||
|
||||
- advanced_thread_trace: True
|
||||
att_parse: testing1
|
||||
att_library_path:
|
||||
- @LIBRARY_OUTPUT_DIR@
|
||||
|
||||
Referens i nytt ärende
Block a user