SWDEV-528686: ATT fix for gfx12 s_wait_idle. Fixes for csv. Default to parse to trace. Fix for ROCR_VISIBLE_DEVICES. (#345)

* Fix for gfx12 s_wait_idle. Added wait field on att.csv

* Format and default to ATT to trace

* Update .mds

* No fatal error for invalid agent

* Tidy fixes

* Rename wait to idle, removed uneeded headers

* Remove unused traceID

* Tidy fix

* Fix csv output

* Formatting

* Fix tests

* Fix tests

* Fix for visible devices

* Review comment: Fix cmake

* Review suggestion

* Remove changelog/readme

* Review comments

* Review comment for CSV

* Formatting

---------

Co-authored-by: Giovanni Baraldi <gbaraldi@amd.com>

[ROCm/rocprofiler-sdk commit: a8f3397069]
This commit is contained in:
Baraldi, Giovanni
2025-04-25 18:49:16 +02:00
committad av GitHub
förälder cd106cda3c
incheckning f91f0712f7
18 ändrade filer med 250 tillägg och 275 borttagningar
+61 -75
Visa fil
@@ -656,89 +656,79 @@ For MPI applications (or other job launchers such as SLURM), place rocprofv3 ins
default_att_lib_path, att_support_args, att_support_inp = check_att_capability(
rocp_args
)
if att_support_args or len(att_support_inp) != 0:
choice_list = []
for keys, values in att_support_inp.items():
choice_list.extend(values)
if att_support_args:
choice_list.extend(list(att_support_args))
# remove duplicates
choice_list = list(set(choice_list))
choice_list = []
for keys, values in att_support_inp.items():
choice_list.extend(values)
if att_support_args:
choice_list.extend(list(att_support_args))
att_options = parser.add_argument_group("Advanced Thread Trace (ATT) options")
# remove duplicates
choice_list = list(set(choice_list))
add_parser_bool_argument(
att_options,
"--advanced-thread-trace",
"--att",
help="Enable ATT",
)
att_options = parser.add_argument_group("Advanced Thread Trace (ATT) options")
att_options.add_argument(
"--att-library-path",
help="Search path(s) to decoder library/libraries",
default=default_att_lib_path if not att_support_inp else None,
nargs="+",
)
add_parser_bool_argument(
att_options,
"--advanced-thread-trace",
"--att",
help="Enable ATT",
)
att_options.add_argument(
"--att-target-cu",
help="ATT target compute unit",
default=None,
)
att_options.add_argument(
"--att-library-path",
help="Search path(s) to decoder library/libraries",
default=default_att_lib_path if not att_support_inp else None,
nargs="+",
)
att_options.add_argument(
"--att-simd-select",
help="Select ATT SIMD",
default=None,
type=str,
)
att_options.add_argument(
"--att-target-cu",
help="ATT target compute unit",
default=None,
)
att_options.add_argument(
"--att-buffer-size",
help="Buffer Size",
default=None,
type=str,
)
att_options.add_argument(
"--att-simd-select",
help="Select ATT SIMD",
default=None,
type=str,
)
att_options.add_argument(
"--att-shader-engine-mask",
help="att shader engine mask",
default=None,
type=str,
)
att_options.add_argument(
"--att-buffer-size",
help="Buffer Size",
default=None,
type=str,
)
att_options.add_argument(
"--att-parse",
type=str.lower,
default=(
choice_list[0] if len(choice_list) == 1 and not att_support_inp else None
),
help="Select ATT Parse method from the choices",
choices=set(choice_list),
)
att_options.add_argument(
"--att-shader-engine-mask",
help="att shader engine mask",
default=None,
type=str,
)
att_options.add_argument(
"--att-perfcounters",
help="Set performance counters, and optionally their mask",
default=None,
type=str.upper,
)
att_options.add_argument(
"--att-perfcounters",
help="Set performance counters, and optionally their mask. gfx9 only.",
default=None,
type=str.upper,
)
att_options.add_argument(
"--att-perfcounter-ctrl",
help="Integer in [0,32] range specifying collection period.",
default=None,
type=int,
)
att_options.add_argument(
"--att-perfcounter-ctrl",
help="Integer in [0,32] range specifying collection period. gfx9 only.",
default=None,
type=int,
)
add_parser_bool_argument(
att_options,
"--att-serialize-all",
default=False,
help="Serialize all kernels",
)
add_parser_bool_argument(
att_options,
"--att-serialize-all",
default=False,
help="Serialize all kernels",
)
return (parser.parse_args(rocp_args), app_args, att_support_args, att_support_inp)
@@ -1408,11 +1398,7 @@ def run(app_args, args, **kwargs):
):
fatal_error("Advanced thread trace cannot be enabled with pc sampling")
if not args.att_parse:
fatal_error("provide the parser choice")
update_env("ROCPROF_ADVANCED_THREAD_TRACE", True, overwrite=True)
update_env("ROCPROF_ATT_CAPABILITY", args.att_parse, overwrite=True)
if args.att_target_cu is not None:
update_env(
Visa fil
@@ -22,6 +22,7 @@
#include "code.hpp"
#include <nlohmann/json.hpp>
#include "lib/output/csv.hpp"
#include "outputfile.hpp"
#include <fstream>
@@ -33,7 +34,7 @@ namespace rocprofiler
{
namespace att_wrapper
{
#define ATT_CSV_NAME "att_output.csv"
using csv_encoder = rocprofiler::tool::csv::csv_encoder<8>;
// Builds a json filetree by recursively inserting "path" into the json object.
void
@@ -48,10 +49,9 @@ navigate(nlohmann::json& json, std::vector<std::string>& path, const std::string
navigate(j, path, filename);
}
CodeFile::CodeFile(const Fspath& _dir, std::shared_ptr<AddressTable>& _table)
: dir(_dir)
, filename(_dir / "code.json")
, table(_table)
CodeFile::CodeFile(Fspath _dir, std::shared_ptr<AddressTable> _table)
: dir(std::move(_dir))
, table(std::move(_table))
{}
CodeFile::~CodeFile()
@@ -76,20 +76,44 @@ CodeFile::~CodeFile()
return a.first.marker_id < b.first.marker_id;
});
OutputFile file(dir / ATT_CSV_NAME);
std::stringstream ofs;
csv_encoder::write_row(ofs,
"CodeObj",
"Vaddr",
"Instruction",
"Hitcount",
"Latency",
"Stall",
"Idle",
"Source");
file << "CodeObj, Vaddr, Instruction, Hitcount, Latency, Source\n";
for(auto& [pc, line] : vec)
{
if(kernel_names.find(pc) != kernel_names.end())
{
file << pc.marker_id << ',' << pc.addr << ",\"; " << kernel_names.at(pc).name
<< "\",0,0,\"" << kernel_names.at(pc).demangled << "\"\n";
csv_encoder::write_row(ofs,
pc.marker_id,
pc.addr,
"; " + kernel_names.at(pc).name,
0,
0,
0,
0,
kernel_names.at(pc).demangled);
}
file << pc.marker_id << ',' << pc.addr << ",\"" << line->code_line->inst << "\","
<< line->hitcount << ',' << line->latency << ',' << line->code_line->comment
<< '\n';
csv_encoder::write_row(ofs,
pc.marker_id,
pc.addr,
line->code_line->inst,
line->hitcount,
line->latency,
line->stall,
line->idle,
line->code_line->comment);
}
OutputFile file(dir.parent_path() / ("stats_" + dir.filename().string() + ".csv"));
file << ofs.str();
}
if(!GlobalDefs::get().has_format("json")) return;
@@ -113,17 +137,17 @@ CodeFile::~CodeFile()
if(kernel_names.find(line.first) != kernel_names.end())
{
std::stringstream code;
code << "[\"; " << kernel_names.at(line.first).name << "\", 100, "
<< (isa.line_number - 1) << ", \"" << kernel_names.at(line.first).demangled
<< "\", " << line.first.marker_id << ", " << line.first.addr << ", 0, 0]";
code << "[\"; " << kernel_names.at(line.first).name << "\",0," << (isa.line_number - 1)
<< ",\"" << kernel_names.at(line.first).demangled << "\"," << line.first.marker_id
<< "," << line.first.addr << ",0,0,0,0]";
jcode.push_back(nlohmann::json::parse(code.str()));
}
std::stringstream code;
code << "[\"" << isa.code_line->inst << "\", 0, " << isa.line_number << ", \""
<< isa.code_line->comment << "\", " << line.first.marker_id << ", " << line.first.addr
<< ", " << isa.hitcount << ", " << isa.latency << "]";
code << "[\"" << isa.code_line->inst << "\",0," << isa.line_number << ",\""
<< isa.code_line->comment << "\"," << line.first.marker_id << "," << line.first.addr
<< "," << isa.hitcount << "," << isa.latency << "," << isa.stall << "," << isa.idle
<< "]";
jcode.push_back(nlohmann::json::parse(code.str()));
@@ -139,8 +163,9 @@ CodeFile::~CodeFile()
nlohmann::json json;
json["code"] = jcode;
json["version"] = TOOL_VERSION;
json["header"] = "ISA, _, LineNumber, Source, Codeobj, Vaddr, Hit, Latency, Stall, Idle";
OutputFile(filename) << json;
OutputFile(dir / "code.json") << json;
nlohmann::json jsnapfiletree;
size_t num_snap = 0;
@@ -24,7 +24,6 @@
#include "att_lib_wrapper.hpp"
#include <atomic>
#include <map>
#include <unordered_map>
#include <vector>
@@ -39,12 +38,14 @@ struct CodeLine
{
using Instruction = rocprofiler::sdk::codeobj::disassembly::Instruction;
int line_number = 0;
int type = 0;
std::atomic<size_t> hitcount{0};
std::atomic<size_t> latency{0};
int line_number{0};
int type{0};
std::shared_ptr<Instruction> code_line{nullptr};
size_t hitcount{0};
size_t latency{0};
size_t stall{0};
size_t idle{0};
};
class CodeFile
@@ -53,11 +54,10 @@ class CodeFile
public:
CodeFile() = default;
CodeFile(const Fspath& dir, std::shared_ptr<AddressTable>& table);
CodeFile(Fspath dir, std::shared_ptr<AddressTable> table);
~CodeFile();
Fspath dir{};
Fspath filename{};
const Fspath dir{};
std::unordered_map<pcinfo_t, int> line_numbers{};
std::map<pcinfo_t, std::unique_ptr<CodeLine>> isa_map{};
std::map<pcinfo_t, KernelName> kernel_names{};
@@ -28,7 +28,6 @@
#include <rocprofiler-sdk/cxx/details/tokenize.hpp>
#include <dlfcn.h>
#include <atomic>
#include <cassert>
#include <cstdlib>
#include <mutex>
@@ -33,7 +33,6 @@
#include <cxxabi.h>
#include <cstring>
#include <fstream>
#include <shared_mutex>
namespace rocprofiler
{
@@ -87,7 +86,8 @@ get_trace_data(rocprofiler_att_decoder_record_type_t trace_id,
bool bInvalid = false;
for(size_t wave_n = 0; wave_n < trace_size; wave_n++)
{
auto& wave = reinterpret_cast<att_wave_data_t*>(trace_events)[wave_n];
auto& wave = reinterpret_cast<att_wave_data_t*>(trace_events)[wave_n];
int64_t prev_inst_time = wave.begin_time;
WaveFile(tool.config, wave);
@@ -102,12 +102,15 @@ get_trace_data(rocprofiler_att_decoder_record_type_t trace_id,
try
{
auto& line = tool.get(inst.pc);
line.hitcount.fetch_add(1, std::memory_order_relaxed);
line.latency.fetch_add(inst.duration, std::memory_order_relaxed);
line.hitcount += 1;
line.latency += inst.duration;
line.stall += inst.stall;
line.idle += std::max<int64_t>(inst.time - prev_inst_time, 0);
} catch(...)
{
bInvalid = true;
}
prev_inst_time = std::max(prev_inst_time, inst.time + inst.duration);
}
}
if(bInvalid) ROCP_WARNING << "Could not fetch some instructions!";
@@ -191,7 +194,7 @@ ToolData::~ToolData() = default;
std::string
demangle(std::string_view line)
{
int status;
int status{0};
char* c_name = abi::__cxa_demangle(line.data(), nullptr, nullptr, &status);
if(c_name == nullptr) return "";
@@ -28,7 +28,6 @@
#include <cxxabi.h>
#include <array>
#include <atomic>
#include <cstdint>
#include <iostream>
#include <map>
@@ -28,8 +28,6 @@ namespace rocprofiler
{
namespace att_wrapper
{
std::map<size_t, std::unique_ptr<WaitcntList>> WaitcntList::_cache;
int64_t
MemoryCounter::extract_waitcnt(const std::string& str) const
{
@@ -26,7 +26,6 @@
#include "lib/att-tool/att_lib_wrapper.hpp"
#include "lib/att-tool/code.hpp"
#include <atomic>
#include <map>
#include <memory>
#include <unordered_map>
@@ -47,25 +46,16 @@ struct WaitcntList
using isa_map_t = std::map<pcinfo_t, std::unique_ptr<CodeLine>>;
using wave_t = att_wave_data_t;
WaitcntList() = default;
static const WaitcntList& Get(int gfxip, const wave_t& wave, isa_map_t& isa_map)
WaitcntList(int gfxip, const wave_t& wave, isa_map_t& isa_map)
{
auto it = _cache.find(wave.traceID);
if(it != _cache.end()) return *it->second;
auto ptr = std::make_unique<WaitcntList>();
if(gfxip == 9)
ptr->mem_unroll = gfx9_construct(wave, isa_map);
mem_unroll = gfx9_construct(wave, isa_map);
else if(gfxip == 10 || gfxip == 11)
ptr->mem_unroll = gfx10_construct(wave, isa_map);
mem_unroll = gfx10_construct(wave, isa_map);
else if(gfxip == 12)
ptr->mem_unroll = gfx12_construct(wave, isa_map);
mem_unroll = gfx12_construct(wave, isa_map);
else
throw std::runtime_error("Invalid gfxip: " + std::to_string(gfxip));
return *_cache.emplace(wave.traceID, std::move(ptr)).first->second;
}
static std::vector<LineWaitcnt> gfx9_construct(const wave_t& wave, isa_map_t& isa_map);
@@ -73,9 +63,6 @@ struct WaitcntList
static std::vector<LineWaitcnt> gfx12_construct(const wave_t& wave, isa_map_t& isa_map);
std::vector<LineWaitcnt> mem_unroll{};
private:
static std::map<size_t, std::unique_ptr<WaitcntList>> _cache;
};
class MemoryCounter
@@ -98,6 +85,12 @@ public:
std::optional<std::vector<int>> handle_mem_op(const std::string& inst,
std::vector<int>& flat_list);
void clearTo(std::vector<int>& out)
{
out.insert(out.end(), list.begin(), list.end());
list.clear();
};
const std::string name;
Ordering order = Ordering::MEMORY_SEQUENTIAL;
std::vector<int> list{};
@@ -64,6 +64,7 @@ union MemoryInst
int ldcnt : 1;
int stcnt : 1;
int sampl : 1;
int idle : 1;
};
int raw = 0;
};
@@ -81,6 +82,13 @@ classify(const std::string& inst)
{
if(inst.find("s_wait_alu") != npos) return MemoryInstType::TYPE_NOT_MEM;
if(inst.find("s_wait_idle") != npos)
{
MemoryInst type = MemoryInstType::TYPE_WAITCNT;
type.idle = true;
return type;
}
MemoryInst type = MemoryInstType::TYPE_WAITCNT;
if(inst.find("dscnt") != npos) type.dscnt = true;
if(inst.find("bvhcnt") != npos) type.bvhcn = true;
@@ -233,31 +241,26 @@ WaitcntList::gfx12_construct(const wave_t& wave, isa_map_t& isa_map)
if(auto joined = expcnt.handle_mem_op(inst_str, empty_list))
mem_unroll.emplace_back(LineWaitcnt{line_number, std::move(*joined)});
}
if(type.sampl)
{
if(auto joined = samplecnt.handle_mem_op(inst_str, empty_list))
mem_unroll.emplace_back(LineWaitcnt{line_number, std::move(*joined)});
}
if(type.kmcnt)
{
if(auto joined = kmcnt.handle_mem_op(inst_str, empty_list))
mem_unroll.emplace_back(LineWaitcnt{line_number, std::move(*joined)});
}
if(type.stcnt)
{
if(auto joined = storecnt.handle_mem_op(inst_str, flat_stor))
mem_unroll.emplace_back(LineWaitcnt{line_number, std::move(*joined)});
}
if(type.ldcnt)
{
if(auto joined = loadcnt.handle_mem_op(inst_str, flat_load))
mem_unroll.emplace_back(LineWaitcnt{line_number, std::move(*joined)});
}
if(type.dscnt)
{
if(auto joined = dscnt.handle_mem_op(inst_str, flat_load))
@@ -271,6 +274,20 @@ WaitcntList::gfx12_construct(const wave_t& wave, isa_map_t& isa_map)
mem_unroll.emplace_back(LineWaitcnt{line_number, std::move(*joined)});
}
}
if(type.idle)
{
std::vector<int> all{};
loadcnt.clearTo(all);
storecnt.clearTo(all);
samplecnt.clearTo(all);
dscnt.clearTo(all);
kmcnt.clearTo(all);
expcnt.clearTo(all);
bvhcnt.clearTo(all);
mem_unroll.emplace_back(LineWaitcnt{line_number, std::move(all)});
}
}
}
@@ -93,7 +93,7 @@ TEST(att_decoder_waitcnt_test, gfx9)
wave.instructions_array = insts.data();
wave.instructions_size = insts.size();
const auto& data = WaitcntList::Get(9, wave, isa_map);
auto data = WaitcntList(9, wave, isa_map);
std::map<int, std::set<int>> dependencies{};
@@ -183,7 +183,7 @@ TEST(att_decoder_waitcnt_test, gfx10)
wave.instructions_array = insts.data();
wave.instructions_size = insts.size();
const auto& data = WaitcntList::Get(10, wave, isa_map);
auto data = WaitcntList(10, wave, isa_map);
std::map<int, std::set<int>> dependencies{};
@@ -285,7 +285,11 @@ TEST(att_decoder_waitcnt_test, gfx12)
append_isa(37, "s_waitcnt dscnt(1)");
append_isa(38, "s_waitcnt expcnt(0) bvhcnt(0)");
append_isa(39, "s_waitcnt dscnt(0)");
append_isa(40, "invalid");
append_isa(40, "ds_store");
append_isa(41, "global_load");
append_isa(42, "s_wait_idle");
append_isa(43, "invalid");
std::vector<att_wave_instruction_t> insts{};
for(size_t i = 0; i < isa_map.size(); i++)
@@ -300,7 +304,7 @@ TEST(att_decoder_waitcnt_test, gfx12)
wave.instructions_array = insts.data();
wave.instructions_size = insts.size();
const auto& data = WaitcntList::Get(12, wave, isa_map);
auto data = WaitcntList(12, wave, isa_map);
std::map<int, std::set<int>> dependencies{};
@@ -317,7 +321,7 @@ TEST(att_decoder_waitcnt_test, gfx12)
ASSERT_EQ(dependencies.at(dep).size(), set.size());
};
ASSERT_EQ(dependencies.size(), 11);
ASSERT_EQ(dependencies.size(), 12);
set_equal(6, {2, 3});
set_equal(7, {4});
set_equal(8, {5});
@@ -329,6 +333,7 @@ TEST(att_decoder_waitcnt_test, gfx12)
set_equal(37, {31});
set_equal(38, {32, 33, 34, 35});
set_equal(39, {36});
set_equal(42, {40, 41});
}
TEST(att_decoder_waitcnt_test, fail_conditions)
@@ -347,22 +352,18 @@ TEST(att_decoder_waitcnt_test, fail_conditions)
}
WaitcntList::wave_t wave{};
wave.traceID = 4;
wave.instructions_array = insts.data();
wave.instructions_size = insts.size();
// It should give warning and return
ASSERT_TRUE(WaitcntList::Get(9, wave, isa_map).mem_unroll.empty());
wave.traceID++;
ASSERT_TRUE(WaitcntList::Get(10, wave, isa_map).mem_unroll.empty());
wave.traceID++;
ASSERT_TRUE(WaitcntList::Get(12, wave, isa_map).mem_unroll.empty());
wave.traceID++;
ASSERT_TRUE(WaitcntList(9, wave, isa_map).mem_unroll.empty());
ASSERT_TRUE(WaitcntList(10, wave, isa_map).mem_unroll.empty());
ASSERT_TRUE(WaitcntList(12, wave, isa_map).mem_unroll.empty());
// it cant operate on invalid gfxip
try
{
WaitcntList::Get(-1, wave, isa_map);
WaitcntList(-1, wave, isa_map);
// fail
ASSERT_TRUE(false);
} catch(std::runtime_error& e)
@@ -42,7 +42,7 @@ WaveFile::WaveFile(WaveConfig& config, const att_wave_data_t& wave)
assert(config.filemgr);
int assigned_id = config.id_count.at(wave.simd).at(wave.wave_id).fetch_add(1);
int assigned_id = config.id_count.at(wave.simd).at(wave.wave_id)++;
{
std::stringstream namess;
namess << "se" << config.shader_engine << "_sm" << (int) wave.simd << "_sl"
@@ -87,8 +87,7 @@ WaveFile::WaveFile(WaveConfig& config, const att_wave_data_t& wave)
try
{
const WaitcntList& wait_list =
WaitcntList::Get(config.filemgr->gfxip, wave, config.code->isa_map);
auto wait_list = WaitcntList(config.filemgr->gfxip, wave, config.code->isa_map);
for(const auto& line : wait_list.mem_unroll)
if(!line.dependencies.empty())
@@ -30,7 +30,6 @@
#include "att_lib_wrapper.hpp"
#include <atomic>
#include <map>
#include <unordered_map>
#include <vector>
@@ -45,7 +44,7 @@ constexpr size_t SIMD_SIZE = 32;
class WaveConfig
{
using WavestateArray = std::array<std::shared_ptr<WstatesFile>, ATT_WAVE_STATE_LAST>;
using SIMD = std::array<std::atomic<int>, SIMD_SIZE>;
using SIMD = std::array<size_t, SIMD_SIZE>;
public:
WaveConfig(int se_id,
@@ -137,10 +137,10 @@ struct config : output_config
std::string pc_sampling_method = get_env("ROCPROF_PC_SAMPLING_METHOD", "none");
std::string pc_sampling_unit = get_env("ROCPROF_PC_SAMPLING_UNIT", "none");
std::string extra_counters_contents = get_env("ROCPROF_EXTRA_COUNTERS_CONTENTS", "");
std::string att_capability = get_env("ROCPROF_ATT_CAPABILITY", "trace");
std::unordered_set<size_t> kernel_filter_range = {};
std::vector<std::set<std::string>> counters = {};
std::string att_capability = get_env("ROCPROF_ATT_CAPABILITY", "");
std::unordered_set<size_t> kernel_filter_range = {};
std::vector<std::set<std::string>> counters = {};
std::vector<att_perfcounter> att_param_perfcounters = {};
std::queue<CollectionPeriod> collection_periods = {};
@@ -295,7 +295,11 @@ DispatchThreadTracer::resource_init()
if(it == params.end()) continue;
auto cache = rocprofiler::agent::get_hsa_agent(rocp_agent);
CHECK(cache.has_value());
if(!cache.has_value())
{
ROCP_CI_LOG(TRACE) << "Could not find HSA Agent for " << rocp_agent->id.handle;
continue;
}
agents[*cache] = std::make_unique<ThreadTracerQueue>(it->second, rocp_agent->id);
}
}
@@ -38,51 +38,52 @@ rocprofiler_configure_pytest_files(CONFIG pytest.ini COPY validate.py conftest.p
find_package(rocprofiler-sdk REQUIRED)
# hsa multiqueue dependency test
set(IS_DISABLED ON)
add_test(
NAME rocprofv3-test-hsa-multiqueue-att-cmd-env-ld-lib-path-execute
COMMAND
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> --log-level env --advanced-thread-trace
1 --att-target-cu 1 --att-shader-engine-mask 0x11 --kernel-include-regex copyD
--att-buffer-size 0x6000000 --att-simd-select 0x3 --att-parse testing1
--att-serialize-all 1 -d ${CMAKE_CURRENT_BINARY_DIR}/%argt%-trace/cmd_input -o
out --output-format json ${PRELOAD_ARGS} --
$<TARGET_FILE:hsa_code_object_testapp>)
set(LIB_PATH_LOC "${ROCM_PATH}/lib")
set(LIB_PATH_ENV "ROCPROF_ATT_LIBRARY_PATH=${LIB_PATH_LOC}")
set_tests_properties(
rocprofv3-test-hsa-multiqueue-att-cmd-env-ld-lib-path-execute
PROPERTIES TIMEOUT 45 LABELS "integration-tests" ENVIRONMENT
LD_LIBRARY_PATH=${CMAKE_LIBRARY_OUTPUT_DIRECTORY}:$ENV{LD_LIBRARY_PATH})
# hsa multiqueue dependency test
find_library(
attdecoder
HINTS ${LIB_PATH_LOC}
PATHS ${ROCM_PATH}
PATH_SUFFIXES lib
NAMES att_decoder_trace)
if(attdecoder)
set(IS_DISABLED OFF)
endif()
# hsa multiqueue dependency test with lib path
add_test(
NAME rocprofv3-test-hsa-multiqueue-att-cmd-env-att-lib-path-execute
COMMAND
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> --log-level env --advanced-thread-trace
1 --att-target-cu 1 --att-shader-engine-mask 0x11 --kernel-include-regex copyD
--att-buffer-size 0x6000000 --att-simd-select 0x3 --att-parse testing1
--att-serialize-all 1 -d ${CMAKE_CURRENT_BINARY_DIR}/%argt%-trace/cmd_input -o
out --output-format json ${PRELOAD_ARGS} --
--att-buffer-size 0x6000000 --att-simd-select 0x3 --att-serialize-all 1 -d
${CMAKE_CURRENT_BINARY_DIR}/%argt%-trace/cmd_input -o out --output-format json
${PRELOAD_ARGS} --att-library-path ${LIB_PATH_LOC} --
$<TARGET_FILE:hsa_code_object_testapp>)
set_tests_properties(
rocprofv3-test-hsa-multiqueue-att-cmd-env-att-lib-path-execute
PROPERTIES TIMEOUT 45 LABELS "integration-tests" ENVIRONMENT
ROCPROF_ATT_LIBRARY_PATH=${CMAKE_LIBRARY_OUTPUT_DIRECTORY})
PROPERTIES TIMEOUT 45 LABELS "integration-tests" DISABLED ${IS_DISABLED})
# hsa multiqueue dependency test
# hsa multiqueue dependency test with json input
add_test(
NAME rocprofv3-test-hsa-multiqueue-att-json-execute
COMMAND
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> --log-level env --att-library-path
${CMAKE_LIBRARY_OUTPUT_DIRECTORY} -d
${CMAKE_CURRENT_BINARY_DIR}/%argt%-trace/json_input -i
${LIB_PATH_LOC} -d ${CMAKE_CURRENT_BINARY_DIR}/%argt%-trace/json_input -i
${CMAKE_CURRENT_BINARY_DIR}/att_input.json ${PRELOAD_ARGS} --
$<TARGET_FILE:hsa_code_object_testapp>)
set_tests_properties(rocprofv3-test-hsa-multiqueue-att-json-execute
PROPERTIES TIMEOUT 45 LABELS "integration-tests")
set_tests_properties(
rocprofv3-test-hsa-multiqueue-att-json-execute
PROPERTIES TIMEOUT 45 LABELS "integration-tests" DISABLED ${IS_DISABLED} ENVIRONMENT
${LIB_PATH_ENV})
# validate output
add_test(
NAME rocprofv3-test-hsa-multiqueue-att-cmd-validate
COMMAND
@@ -108,22 +109,29 @@ set(MULTIQUEUE_JSON_VALIDATION_FILES
set_tests_properties(
rocprofv3-test-hsa-multiqueue-att-cmd-validate
PROPERTIES TIMEOUT 45 LABELS "integration-tests" DEPENDS
PROPERTIES TIMEOUT
45
LABELS
"integration-tests"
DEPENDS
"rocprofv3-test-hsa-multiqueue-att-cmd-ld-lib-path-execute"
FAIL_REGULAR_EXPRESSION "AssertionError")
FAIL_REGULAR_EXPRESSION
"AssertionError"
DISABLED
${IS_DISABLED})
set_tests_properties(
rocprofv3-test-hsa-multiqueue-att-json-validate
PROPERTIES TIMEOUT 45 LABELS "integration-tests" DEPENDS
"rocprofv3-test-hsa-multiqueue-att-json-execute" FAIL_REGULAR_EXPRESSION
"AssertionError")
if(TARGET rocprofiler-sdk::att-decoder-testing1 AND TARGET
rocprofiler-sdk::att-decoder-testing2)
set(MISSING_TEST_DECODER_LIBS OFF)
else()
set(MISSING_TEST_DECODER_LIBS ON)
endif()
PROPERTIES TIMEOUT
45
LABELS
"integration-tests"
DEPENDS
"rocprofv3-test-hsa-multiqueue-att-json-execute"
FAIL_REGULAR_EXPRESSION
"AssertionError"
DISABLED
${IS_DISABLED})
function(configure_att_input _FILENAME _OUTDIR)
set(LIBRARY_OUTPUT_DIR ${_OUTDIR})
@@ -134,74 +142,41 @@ endfunction()
configure_att_input(att_input.yml "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}")
configure_att_input(att_input_will_fail.yml "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}")
#
# Uses att_library_path in YAML input to specify ATT library path
#
# test yaml input
add_test(
NAME rocprofv3-test-att-library-path-yaml-input
COMMAND $<TARGET_FILE:rocprofiler-sdk::rocprofv3> -i
${CMAKE_CURRENT_BINARY_DIR}/att_input.yml --log-level env --echo -- sleep 0)
NAME rocprofv3-test-att-yaml-input
COMMAND
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> -i
${CMAKE_CURRENT_BINARY_DIR}/att_input.yml --log-level env --echo --
$<TARGET_FILE:hsa_code_object_testapp>)
set_tests_properties(
rocprofv3-test-att-library-path-yaml-input
PROPERTIES TIMEOUT 45 LABELS "integration-tests" DISABLED
"${MISSING_TEST_DECODER_LIBS}")
rocprofv3-test-att-yaml-input PROPERTIES TIMEOUT 45 LABELS "integration-tests"
DISABLED ${IS_DISABLED})
# Invalid lib path has to fail
add_test(
NAME rocprofv3-test-att-library-path-yaml-input-will-fail
NAME rocprofv3-test-att-yaml-input-will-fail
COMMAND
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> -i
${CMAKE_CURRENT_BINARY_DIR}/att_input_will_fail.yml --log-level env --echo --
sleep 0)
$<TARGET_FILE:hsa_code_object_testapp>)
set_tests_properties(
rocprofv3-test-att-library-path-yaml-input-will-fail
PROPERTIES TIMEOUT 45 LABELS "integration-tests" WILL_FAIL ON DISABLED
"${MISSING_TEST_DECODER_LIBS}")
rocprofv3-test-att-yaml-input-will-fail
PROPERTIES TIMEOUT 45 LABELS "integration-tests" WILL_FAIL ON DISABLED ${IS_DISABLED})
#
# Uses --att-library-path to specify ATT library path
#
add_test(
NAME rocprofv3-test-att-library-path-cmd-line
COMMAND
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> --att --att-library-path
${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/att --att-parse testing2 --log-level env --echo
-- sleep 0)
set_tests_properties(
rocprofv3-test-att-library-path-cmd-line
PROPERTIES TIMEOUT 45 LABELS "integration-tests" DISABLED
"${MISSING_TEST_DECODER_LIBS}")
add_test(
NAME rocprofv3-test-att-library-path-cmd-line-will-fail
COMMAND
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> --att --att-library-path
${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/att --att-parse testing1 --log-level env --echo
-- sleep 0)
add_test(NAME rocprofv3-test-att-library-path-cmd-line-will-fail
COMMAND $<TARGET_FILE:rocprofiler-sdk::rocprofv3> --att --att-library-path .
--log-level env --echo -- $<TARGET_FILE:hsa_code_object_testapp>)
set_tests_properties(
rocprofv3-test-att-library-path-cmd-line-will-fail
PROPERTIES TIMEOUT 45 LABELS "integration-tests" WILL_FAIL ON DISABLED
"${MISSING_TEST_DECODER_LIBS}")
#
# Uses ROCPROF_ATT_LIBRARY_PATH to specify ATT library path
#
add_test(NAME rocprofv3-test-att-library-path-env-var
COMMAND $<TARGET_FILE:rocprofiler-sdk::rocprofv3> --att --att-parse testing2
--log-level env --echo -- sleep 0)
set_tests_properties(
rocprofv3-test-att-library-path-env-var
PROPERTIES TIMEOUT 45 LABELS "integration-tests" ENVIRONMENT
"ROCPROF_ATT_LIBRARY_PATH=${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/att" DISABLED
"${MISSING_TEST_DECODER_LIBS}")
PROPERTIES TIMEOUT 45 LABELS "integration-tests" WILL_FAIL ON DISABLED ${IS_DISABLED})
add_test(NAME rocprofv3-test-att-library-path-env-var-will-fail
COMMAND $<TARGET_FILE:rocprofiler-sdk::rocprofv3> --att --att-parse testing1
--log-level env --echo -- sleep 0)
COMMAND $<TARGET_FILE:rocprofiler-sdk::rocprofv3> --att --log-level env --echo
-- $<TARGET_FILE:hsa_code_object_testapp>)
set_tests_properties(
rocprofv3-test-att-library-path-env-var-will-fail
@@ -210,11 +185,11 @@ set_tests_properties(
LABELS
"integration-tests"
ENVIRONMENT
"ROCPROF_ATT_LIBRARY_PATH=${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/att"
"ROCPROF_ATT_LIBRARY_PATH=."
WILL_FAIL
ON
DISABLED
"${MISSING_TEST_DECODER_LIBS}")
${IS_DISABLED})
#
# Uses ATT and Counter Collection at the same time
@@ -223,11 +198,10 @@ add_test(
NAME rocprofv3-test-hsa-multiqueue-att-plus-pmc-execute
COMMAND
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> --log-level env --pmc SQ_WAVES
--advanced-thread-trace --att-parse testing1 -d
${CMAKE_CURRENT_BINARY_DIR}/%argt%-trace/cmd_input -o out --output-format json
${PRELOAD_ARGS} -- $<TARGET_FILE:vector-ops>)
--advanced-thread-trace -d ${CMAKE_CURRENT_BINARY_DIR}/%argt%-trace/cmd_input -o
out --output-format json ${PRELOAD_ARGS} -- $<TARGET_FILE:vector-ops>)
set_tests_properties(
rocprofv3-test-hsa-multiqueue-att-plus-pmc-execute
PROPERTIES TIMEOUT 45 LABELS "integration-tests" ENVIRONMENT
LD_LIBRARY_PATH=${CMAKE_LIBRARY_OUTPUT_DIRECTORY}:$ENV{LD_LIBRARY_PATH})
PROPERTIES TIMEOUT 45 LABELS "integration-tests" DISABLED ${IS_DISABLED} ENVIRONMENT
${LIB_PATH_ENV})
@@ -9,7 +9,6 @@
],
"truncate_kernels": true,
"advanced_thread_trace": true,
"att_parse": "testing1",
"att_target_cu": 1,
"att_shader_engine_mask": "0x11",
"att_simd_select": "0x3",
@@ -22,31 +22,10 @@
jobs:
- advanced_thread_trace: True
att_parse: testing2
att_library_path:
- @LIBRARY_OUTPUT_DIR@/att
- advanced_thread_trace: True
att_parse: testing2
att_library_path:
- @LIBRARY_OUTPUT_DIR@/att
- @LIBRARY_OUTPUT_DIR@
- advanced_thread_trace: True
att_parse: testing1
att_library_path:
- @LIBRARY_OUTPUT_DIR@/att
- @LIBRARY_OUTPUT_DIR@
- advanced_thread_trace: True
att_parse: testing1
- advanced_thread_trace: True
att_parse: testing1
att_library_path:
- @LIBRARY_OUTPUT_DIR@
- advanced_thread_trace: True
att_parse: testing1
att_library_path:
- @LIBRARY_OUTPUT_DIR@