2fa95e6d6d
* Enable PC sampling to be run alongside ATT. Add ATT to changelog. * Fix tests * Review comments --------- Co-authored-by: Giovanni Baraldi <gbaraldi@amd.com>
377 строки
11 KiB
C++
377 строки
11 KiB
C++
// MIT License
|
|
//
|
|
// Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All rights reserved.
|
|
//
|
|
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
// of this software and associated documentation files (the "Software"), to deal
|
|
// in the Software without restriction, including without limitation the rights
|
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
// copies of the Software, and to permit persons to whom the Software is
|
|
// furnished to do so, subject to the following conditions:
|
|
//
|
|
// The above copyright notice and this permission notice shall be included in all
|
|
// copies or substantial portions of the Software.
|
|
//
|
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
// SOFTWARE.
|
|
|
|
#include "lib/att-tool/waitcnt/analysis.hpp"
|
|
#include "lib/common/logging.hpp"
|
|
#include "lib/rocprofiler-sdk/registration.hpp"
|
|
|
|
#include <gtest/gtest.h>
|
|
#include <iostream>
|
|
|
|
namespace rocprofiler
|
|
{
|
|
namespace att_wrapper
|
|
{
|
|
// This is used so the first line number dont get skipped because their vaddr==0
|
|
constexpr uint64_t LINE_OFFSET = 1;
|
|
|
|
TEST(att_decoder_waitcnt_test, gfx9)
|
|
{
|
|
registration::init_logging();
|
|
|
|
constexpr size_t LOOP_CNT = 4;
|
|
|
|
WaitcntList::isa_map_t isa_map{};
|
|
|
|
auto append_isa = [&](size_t line_number, const char* line) {
|
|
pcinfo_t pc{};
|
|
pc.addr = line_number + LINE_OFFSET;
|
|
pc.marker_id = 0;
|
|
|
|
auto code = std::make_unique<CodeLine>();
|
|
code->code_line = std::make_shared<CodeLine::Instruction>();
|
|
code->code_line->inst = line;
|
|
code->line_number = line_number;
|
|
|
|
isa_map.emplace(pc, std::move(code));
|
|
};
|
|
|
|
append_isa(0, "s_nop 0");
|
|
append_isa(1, "v_add_ 0");
|
|
append_isa(2, "s_waitcnt vmcnt(0) lgkmcnt(0)");
|
|
append_isa(3, "s_add_ 0");
|
|
|
|
append_isa(4, "global_load_");
|
|
append_isa(5, "buffer_store_");
|
|
append_isa(6, "scratch_load_");
|
|
append_isa(7, "s_waitcnt lkgmcnt(0)");
|
|
append_isa(8, "s_waitcnt vmcnt(2)");
|
|
append_isa(9, "s_waitcnt vmcnt(1)");
|
|
append_isa(10, "s_load_");
|
|
append_isa(11, "s_store_");
|
|
append_isa(12, "s_sendmsg ");
|
|
append_isa(13, "s_waitcnt vmcnt(0) lgkmcnt(0)");
|
|
append_isa(14, "ds_load_");
|
|
append_isa(15, "ds_store_");
|
|
append_isa(16, "ds_load_");
|
|
append_isa(17, "s_waitcnt lgkmcnt(2)");
|
|
append_isa(18, "s_waitcnt lgkmcnt(1)");
|
|
append_isa(19, "flat_load_");
|
|
append_isa(20, "s_waitcnt vmcnt( 0) lgkmcnt(0x0)"); // some weird strings
|
|
append_isa(21, "invalid");
|
|
|
|
std::vector<wave_instruction_t> insts{};
|
|
|
|
for(size_t j = 0; j < LOOP_CNT; j++)
|
|
{
|
|
for(size_t i = 0; i < isa_map.size(); i++)
|
|
{
|
|
wave_instruction_t inst{};
|
|
inst.pc.addr = i + LINE_OFFSET;
|
|
insts.push_back(inst);
|
|
}
|
|
}
|
|
|
|
WaitcntList::wave_t wave{};
|
|
wave.instructions_array = insts.data();
|
|
wave.instructions_size = insts.size();
|
|
|
|
auto data = WaitcntList(9, wave, isa_map);
|
|
|
|
std::map<int, std::set<int>> dependencies{};
|
|
|
|
for(const auto& [dst, src] : data.mem_unroll)
|
|
{
|
|
auto& dep = dependencies[dst];
|
|
for(const auto& p : src)
|
|
dep.insert(p);
|
|
}
|
|
|
|
auto set_equal = [&](int dep, const std::set<int>& set) {
|
|
for(int s : set)
|
|
ASSERT_NE(dependencies.at(dep).find(s), dependencies.at(dep).end());
|
|
ASSERT_EQ(dependencies.at(dep).size(), set.size());
|
|
};
|
|
|
|
ASSERT_EQ(dependencies.size(), 6);
|
|
set_equal(8, {4});
|
|
set_equal(9, {5});
|
|
set_equal(13, {6, 10, 11, 12});
|
|
set_equal(17, {14});
|
|
set_equal(18, {15});
|
|
set_equal(20, {16, 19});
|
|
}
|
|
|
|
TEST(att_decoder_waitcnt_test, gfx10)
|
|
{
|
|
registration::init_logging();
|
|
|
|
WaitcntList::isa_map_t isa_map{};
|
|
|
|
auto append_isa = [&](size_t line_number, const char* line) {
|
|
pcinfo_t pc{};
|
|
pc.addr = line_number + LINE_OFFSET;
|
|
pc.marker_id = 0;
|
|
|
|
auto code = std::make_unique<CodeLine>();
|
|
code->code_line = std::make_shared<CodeLine::Instruction>();
|
|
code->code_line->inst = line;
|
|
code->line_number = line_number;
|
|
|
|
isa_map.emplace(pc, std::move(code));
|
|
};
|
|
|
|
append_isa(0, "buffer_load_");
|
|
append_isa(1, "global_load_");
|
|
append_isa(2, "v_add_ 0");
|
|
append_isa(3, "s_add_ 0");
|
|
append_isa(4, "buffer_store_");
|
|
append_isa(5, "s_waitcnt vmcnt(1)");
|
|
|
|
append_isa(6, "scratch_load_");
|
|
append_isa(7, "scratch_store_");
|
|
append_isa(8, "s_wait_alu ");
|
|
append_isa(9, "s_waitcnt vmcnt 0x2");
|
|
append_isa(10, "s_waitcnt vmcnt(1)");
|
|
append_isa(11, "s_waitcnt vscnt(1)");
|
|
append_isa(12, "s_waitcnt vmcnt(0)");
|
|
append_isa(13, "s_waitcnt vscnt(0)");
|
|
|
|
append_isa(14, "s_load");
|
|
append_isa(15, "s_store");
|
|
append_isa(16, "s_waitcnt lgkmcnt 0");
|
|
append_isa(17, "s_sendmsg");
|
|
append_isa(18, "s_sendmsg_rtn");
|
|
append_isa(19, "s_waitcnt lgkmcnt 0x2"); // waits on sendmsg_rtn
|
|
append_isa(20, "flat_load_");
|
|
append_isa(21, "flat_store_");
|
|
append_isa(22, "s_waitcnt vmcnt(0) lgkmcnt(0) vscnt(0)");
|
|
|
|
append_isa(23, "ds_load");
|
|
append_isa(24, "ds_store");
|
|
append_isa(25, "s_waitcnt lgkmcnt 0x1");
|
|
append_isa(26, "s_waitcnt lgkmcnt 0");
|
|
append_isa(27, "invalid");
|
|
|
|
std::vector<wave_instruction_t> insts{};
|
|
for(size_t i = 0; i < isa_map.size(); i++)
|
|
{
|
|
wave_instruction_t inst{};
|
|
inst.pc.addr = i + LINE_OFFSET;
|
|
insts.push_back(inst);
|
|
}
|
|
|
|
WaitcntList::wave_t wave{};
|
|
wave.instructions_array = insts.data();
|
|
wave.instructions_size = insts.size();
|
|
|
|
auto data = WaitcntList(10, wave, isa_map);
|
|
|
|
std::map<int, std::set<int>> dependencies{};
|
|
|
|
for(const auto& [dst, src] : data.mem_unroll)
|
|
{
|
|
auto& dep = dependencies[dst];
|
|
for(const auto& p : src)
|
|
dep.insert(p);
|
|
}
|
|
|
|
auto set_equal = [&](int dep, const std::set<int>& set) {
|
|
for(int s : set)
|
|
ASSERT_NE(dependencies.at(dep).find(s), dependencies.at(dep).end());
|
|
ASSERT_EQ(dependencies.at(dep).size(), set.size());
|
|
};
|
|
|
|
ASSERT_EQ(dependencies.size(), 10);
|
|
set_equal(5, {0});
|
|
set_equal(10, {1});
|
|
set_equal(11, {4});
|
|
set_equal(12, {6});
|
|
set_equal(13, {7});
|
|
set_equal(16, {14, 15});
|
|
set_equal(19, {17});
|
|
set_equal(22, {18, 20, 21});
|
|
set_equal(25, {23});
|
|
set_equal(26, {24});
|
|
}
|
|
|
|
TEST(att_decoder_waitcnt_test, gfx12)
|
|
{
|
|
registration::init_logging();
|
|
|
|
WaitcntList::isa_map_t isa_map{};
|
|
|
|
auto append_isa = [&](size_t line_number, const char* line) {
|
|
pcinfo_t pc{};
|
|
pc.addr = line_number + LINE_OFFSET;
|
|
pc.marker_id = 0;
|
|
|
|
auto code = std::make_unique<CodeLine>();
|
|
code->code_line = std::make_shared<CodeLine::Instruction>();
|
|
code->code_line->inst = line;
|
|
code->line_number = line_number;
|
|
|
|
isa_map.emplace(pc, std::move(code));
|
|
};
|
|
|
|
// messages
|
|
append_isa(0, "s_wait_alu ");
|
|
append_isa(1, "s_waitcnt samplecnt(0)");
|
|
append_isa(2, "s_sendmsg ");
|
|
append_isa(3, "s_sendmsg ");
|
|
append_isa(4, "s_sendmsg_rtn");
|
|
append_isa(5, "s_sendmsg_rtn");
|
|
append_isa(6, "s_waitcnt kmcnt(4)");
|
|
append_isa(7, "s_waitcnt kmcnt(2)");
|
|
append_isa(8, "s_waitcnt kmcnt(0)");
|
|
|
|
// scalar
|
|
append_isa(9, "s_load_");
|
|
append_isa(10, "s_store_");
|
|
append_isa(11, "s_sendmsg ");
|
|
append_isa(12, "s_waitcnt kmcnt(0)");
|
|
|
|
// flat
|
|
append_isa(13, "flat_load_");
|
|
append_isa(14, "flat_store_");
|
|
append_isa(15, "global_load_");
|
|
append_isa(16, "ds_load");
|
|
append_isa(17,
|
|
"s_waitcnt bvhcnt(0) expcnt(0) kmcnt(0) kmcnt(0) loadcnt(0) storecnt(0) "
|
|
"samplecnt(0) dscnt(0)");
|
|
|
|
// load/store
|
|
append_isa(18, "global_load");
|
|
append_isa(19, "buffer_load");
|
|
append_isa(20, "global_store");
|
|
append_isa(21, "global_wb");
|
|
append_isa(22, "buffer_store");
|
|
append_isa(23, "scratch_load");
|
|
append_isa(24, "scratch_store");
|
|
append_isa(25, "s_waitcnt loadcnt(2)");
|
|
append_isa(26, "s_waitcnt storecnt(2)");
|
|
append_isa(27, "s_waitcnt storecnt(0) loadcnt(0)");
|
|
|
|
// skipped
|
|
append_isa(28, "s_wait_alu");
|
|
append_isa(29, "s_mul ");
|
|
append_isa(30, "v_mul ");
|
|
|
|
// ds vs exp vs bvh
|
|
append_isa(31, "ds_store");
|
|
append_isa(32, "_bvh_");
|
|
append_isa(33, "_bvh_");
|
|
append_isa(34, "ds_param_load");
|
|
append_isa(35, "ds_direct");
|
|
append_isa(36, "ds_load");
|
|
append_isa(37, "s_waitcnt dscnt(1)");
|
|
append_isa(38, "s_waitcnt expcnt(0) bvhcnt(0)");
|
|
append_isa(39, "s_waitcnt dscnt(0)");
|
|
|
|
append_isa(40, "ds_store");
|
|
append_isa(41, "global_load");
|
|
append_isa(42, "s_wait_idle");
|
|
append_isa(43, "invalid");
|
|
|
|
std::vector<wave_instruction_t> insts{};
|
|
for(size_t i = 0; i < isa_map.size(); i++)
|
|
{
|
|
wave_instruction_t inst{};
|
|
inst.pc.addr = i + LINE_OFFSET;
|
|
insts.push_back(inst);
|
|
}
|
|
|
|
WaitcntList::wave_t wave{};
|
|
wave.instructions_array = insts.data();
|
|
wave.instructions_size = insts.size();
|
|
|
|
auto data = WaitcntList(12, wave, isa_map);
|
|
|
|
std::map<int, std::set<int>> dependencies{};
|
|
|
|
for(const auto& [dst, src] : data.mem_unroll)
|
|
{
|
|
auto& dep = dependencies[dst];
|
|
for(const auto& p : src)
|
|
dep.insert(p);
|
|
}
|
|
|
|
auto set_equal = [&](int dep, const std::set<int>& set) {
|
|
for(int s : set)
|
|
ASSERT_NE(dependencies.at(dep).find(s), dependencies.at(dep).end());
|
|
ASSERT_EQ(dependencies.at(dep).size(), set.size());
|
|
};
|
|
|
|
ASSERT_EQ(dependencies.size(), 12);
|
|
set_equal(6, {2, 3});
|
|
set_equal(7, {4});
|
|
set_equal(8, {5});
|
|
set_equal(12, {9, 10, 11});
|
|
set_equal(17, {13, 14, 15, 16});
|
|
set_equal(25, {18});
|
|
set_equal(26, {20, 21});
|
|
set_equal(27, {19, 22, 23, 24});
|
|
set_equal(37, {31});
|
|
set_equal(38, {32, 33, 34, 35});
|
|
set_equal(39, {36});
|
|
set_equal(42, {40, 41});
|
|
}
|
|
|
|
TEST(att_decoder_waitcnt_test, fail_conditions)
|
|
{
|
|
registration::init_logging();
|
|
|
|
WaitcntList::isa_map_t isa_map{};
|
|
|
|
std::vector<wave_instruction_t> insts{};
|
|
|
|
for(size_t i = 0; i < 10; i++)
|
|
{
|
|
wave_instruction_t inst{};
|
|
inst.pc.addr = i + LINE_OFFSET;
|
|
insts.push_back(inst);
|
|
}
|
|
|
|
WaitcntList::wave_t wave{};
|
|
wave.instructions_array = insts.data();
|
|
wave.instructions_size = insts.size();
|
|
|
|
// It should give warning and return
|
|
ASSERT_TRUE(WaitcntList(9, wave, isa_map).mem_unroll.empty());
|
|
ASSERT_TRUE(WaitcntList(10, wave, isa_map).mem_unroll.empty());
|
|
ASSERT_TRUE(WaitcntList(12, wave, isa_map).mem_unroll.empty());
|
|
|
|
// it cant operate on invalid gfxip
|
|
try
|
|
{
|
|
WaitcntList(-1, wave, isa_map);
|
|
// fail
|
|
ASSERT_TRUE(false);
|
|
} catch(std::runtime_error& e)
|
|
{
|
|
// pass
|
|
}
|
|
}
|
|
|
|
}; // namespace att_wrapper
|
|
}; // namespace rocprofiler
|