[SDK][rocprofv3] MI300 Stochastic PC sampling (#92)
* MI300 Stochastic PC sampling SDK API implementation
* ROCProfV3: Stochastic PC sampling Support (#94)
* ROCProfV3: MI300 Stochastic PC sampling initial draft
* ROCProfV3: Initial Stochastic PC sampling Tests (#95)
ROCProfV3: Initial Stochastic PC sampling tests
* Update rocprofiler_pc_sampling_record_stochastic_v0_t
- update doxygen docs for members
- replace rocprofiler_correlation_id_t with rocprofiler_async_correlation_id_t
* Relax the check in JSON tests
* drain PC sampling buffer during finalize_rocprofv3
* Increase timeout for "Test Install Build" step
- 10 minutes -> 20 minutes
- "Test Installed Packages" has 20 minutes so "Test Install Build" should also
---------
Co-authored-by: Jonathan R. Madsen <jonathanrmadsen@gmail.com>
[ROCm/rocprofiler-sdk commit: 49ce79a5b5]
This commit is contained in:
committed by
GitHub
orang tua
70c1c58c79
melakukan
0ca07105a3
@@ -32,7 +32,7 @@ env:
|
||||
navi4_EXCLUDE_TESTS_REGEX: "^(test-page-migration-(execute|validate))$"
|
||||
navi3_EXCLUDE_LABEL_REGEX: "^(pc-sampling|pc_sampling|openmp-target)$"
|
||||
vega20_EXCLUDE_LABEL_REGEX: "^(pc-sampling|pc_sampling|openmp-target)$"
|
||||
mi200_EXCLUDE_LABEL_REGEX: ""
|
||||
mi200_EXCLUDE_LABEL_REGEX: "^(stochastic)$"
|
||||
mi300_EXCLUDE_LABEL_REGEX: "^(pc-sampling|pc_sampling)$"
|
||||
mi300a_EXCLUDE_LABEL_REGEX: ""
|
||||
mi325_EXCLUDE_LABEL_REGEX: "^(pc-sampling|pc_sampling)$"
|
||||
@@ -124,7 +124,7 @@ jobs:
|
||||
|
||||
- name: Test Install Build
|
||||
if: ${{ contains(matrix.runner, env.CORE_EXT_RUNNER) }}
|
||||
timeout-minutes: 10
|
||||
timeout-minutes: 20
|
||||
shell: bash
|
||||
run: |
|
||||
CMAKE_PREFIX_PATH=/opt/rocprofiler-sdk cmake -B build-samples samples
|
||||
@@ -249,7 +249,7 @@ jobs:
|
||||
|
||||
- name: Test Install Build
|
||||
if: ${{ contains(matrix.runner, env.CORE_EXT_RUNNER) }}
|
||||
timeout-minutes: 10
|
||||
timeout-minutes: 20
|
||||
shell: bash
|
||||
run: |
|
||||
CMAKE_PREFIX_PATH=/opt/rocprofiler-sdk cmake -B build-samples samples
|
||||
|
||||
@@ -43,7 +43,7 @@ namespace pcs
|
||||
// TODO: Since this is used only within the `tool_init`,
|
||||
// we are safe using static constructor.
|
||||
// It would be nice to make this consistent with the `buffer_ids`.
|
||||
tool_agent_info_vec_t gpu_agents;
|
||||
tool_agent_info_vec_t gpu_agents = {};
|
||||
// The reason for using raw pointers is the following.
|
||||
// Sometimes, statically created objects of the client::pcs
|
||||
// namespace might be freed prior to the `tool_fini`,
|
||||
@@ -55,6 +55,12 @@ tool_agent_info_vec_t gpu_agents;
|
||||
// `pcs` namespace and export functions for registering/flushing/destroying buffers.
|
||||
pc_sampling_buffer_id_vec_t* buffer_ids = nullptr;
|
||||
|
||||
namespace
|
||||
{
|
||||
constexpr uint64_t host_trap_interval = 10000; // 10ms
|
||||
constexpr uint64_t stochastic_interval = 1048576; // 2 ^ 20 cycles
|
||||
} // namespace
|
||||
|
||||
void
|
||||
init()
|
||||
{
|
||||
@@ -67,6 +73,7 @@ fini()
|
||||
// Clear the data
|
||||
buffer_ids->clear();
|
||||
delete buffer_ids;
|
||||
buffer_ids = nullptr;
|
||||
}
|
||||
|
||||
pc_sampling_buffer_id_vec_t*
|
||||
@@ -112,7 +119,7 @@ find_all_gpu_agents_supporting_pc_sampling_impl(rocprofiler_agent_version_t vers
|
||||
<< "type=" << _agents[i]->type << "\n";
|
||||
}
|
||||
|
||||
*utils::get_output_stream() << ss.str() << std::endl;
|
||||
*utils::get_output_stream() << ss.str() << "\n";
|
||||
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
}
|
||||
@@ -161,8 +168,8 @@ query_avail_configs_for_agent(tool_agent_info* agent_info)
|
||||
// The query operation failed, so consider the PC sampling is unsupported at the agent.
|
||||
// This can happen if the PC sampling service is invoked within the ROCgdb.
|
||||
ss << "Querying PC sampling capabilities failed with status=" << status
|
||||
<< " :: " << rocprofiler_get_status_string(status) << std::endl;
|
||||
*utils::get_output_stream() << ss.str() << std::endl;
|
||||
<< " :: " << rocprofiler_get_status_string(status) << "\n";
|
||||
*utils::get_output_stream() << ss.str() << "\n";
|
||||
return false;
|
||||
}
|
||||
else if(agent_info->avail_configs->empty())
|
||||
@@ -172,7 +179,8 @@ query_avail_configs_for_agent(tool_agent_info* agent_info)
|
||||
}
|
||||
|
||||
ss << "The agent with the id: " << agent_info->agent_id.handle << " supports the "
|
||||
<< agent_info->avail_configs->size() << " configurations: " << std::endl;
|
||||
<< agent_info->avail_configs->size() << " configurations: "
|
||||
<< "\n";
|
||||
size_t ind = 0;
|
||||
for(auto& cfg : *agent_info->avail_configs)
|
||||
{
|
||||
@@ -181,7 +189,11 @@ query_avail_configs_for_agent(tool_agent_info* agent_info)
|
||||
<< "unit: " << cfg.unit << ", "
|
||||
<< "min_interval: " << cfg.min_interval << ", "
|
||||
<< "max_interval: " << cfg.max_interval << ", "
|
||||
<< "flags: " << std::hex << cfg.flags << std::dec << std::endl;
|
||||
<< "flags: " << std::hex << cfg.flags << std::dec
|
||||
<< ((cfg.flags == ROCPROFILER_PC_SAMPLING_CONFIGURATION_FLAGS_INTERVAL_POW2)
|
||||
? " (an interval value must be power of 2)"
|
||||
: "")
|
||||
<< "\n";
|
||||
}
|
||||
|
||||
*utils::get_output_stream() << ss.str() << std::flush;
|
||||
@@ -194,8 +206,9 @@ configure_pc_sampling_prefer_stochastic(tool_agent_info* agent_info,
|
||||
rocprofiler_context_id_t context_id,
|
||||
rocprofiler_buffer_id_t buffer_id)
|
||||
{
|
||||
int failures = 10;
|
||||
size_t interval = 0;
|
||||
auto stochastic_picked = false;
|
||||
int failures = 10;
|
||||
size_t interval = 0;
|
||||
do
|
||||
{
|
||||
// Update the list of available configurations
|
||||
@@ -216,9 +229,9 @@ configure_pc_sampling_prefer_stochastic(tool_agent_info* agent_info,
|
||||
{
|
||||
if(cfg.method == ROCPROFILER_PC_SAMPLING_METHOD_STOCHASTIC)
|
||||
{
|
||||
// Temporarily disable stochastic sampling as it's not fully supported.
|
||||
// first_stochastic_config = &cfg;
|
||||
// break;
|
||||
first_stochastic_config = &cfg;
|
||||
stochastic_picked = true;
|
||||
break;
|
||||
}
|
||||
else if(!first_host_trap_config &&
|
||||
cfg.method == ROCPROFILER_PC_SAMPLING_METHOD_HOST_TRAP)
|
||||
@@ -238,7 +251,7 @@ configure_pc_sampling_prefer_stochastic(tool_agent_info* agent_info,
|
||||
}
|
||||
else
|
||||
{
|
||||
interval = 10000;
|
||||
interval = stochastic_picked ? stochastic_interval : host_trap_interval;
|
||||
}
|
||||
|
||||
auto status = rocprofiler_configure_pc_sampling_service(context_id,
|
||||
@@ -251,8 +264,10 @@ configure_pc_sampling_prefer_stochastic(tool_agent_info* agent_info,
|
||||
if(status == ROCPROFILER_STATUS_SUCCESS)
|
||||
{
|
||||
*utils::get_output_stream()
|
||||
<< ">>> We chose PC sampling interval: " << interval
|
||||
<< " on the agent: " << agent_info->agent->id.handle << std::endl;
|
||||
<< ">>> We chose " << (stochastic_picked ? "stochastic" : "Host-Trap")
|
||||
<< " PC sampling with the interval: " << interval << " "
|
||||
<< (stochastic_picked ? "clock-cycles" : "micro seconds")
|
||||
<< " on the agent: " << agent_info->agent->id.handle << "\n";
|
||||
return;
|
||||
}
|
||||
else if(status != ROCPROFILER_STATUS_ERROR_NOT_AVAILABLE)
|
||||
@@ -279,6 +294,87 @@ configure_pc_sampling_prefer_stochastic(tool_agent_info* agent_info,
|
||||
ROCPROFILER_CHECK(ROCPROFILER_STATUS_ERROR);
|
||||
}
|
||||
|
||||
template <typename PcSamplingRecordT>
|
||||
void
|
||||
print_sample_common_fields(std::ostream& os, const PcSamplingRecordT* pc_sample)
|
||||
{
|
||||
os << "(code_obj_id, offset): (" << pc_sample->pc.code_object_id << ", 0x" << std::hex
|
||||
<< pc_sample->pc.code_object_offset << "), "
|
||||
<< "timestamp: " << std::dec << pc_sample->timestamp << ", "
|
||||
<< "exec: " << std::hex << std::setw(16) << pc_sample->exec_mask << ", "
|
||||
<< "workgroup_id_(x=" << std::dec << std::setw(5) << pc_sample->workgroup_id.x << ", "
|
||||
<< "y=" << std::setw(5) << pc_sample->workgroup_id.y << ", "
|
||||
<< "z=" << std::setw(5) << pc_sample->workgroup_id.z << "), "
|
||||
<< "wave_in_group: " << std::setw(2) << static_cast<unsigned int>(pc_sample->wave_in_group)
|
||||
<< ", "
|
||||
<< "chiplet: " << std::setw(2) << static_cast<unsigned int>(pc_sample->hw_id.chiplet) << ", "
|
||||
<< "dispatch_id: " << std::setw(7) << pc_sample->dispatch_id << ","
|
||||
<< "correlation: {internal=" << std::setw(7) << pc_sample->correlation_id.internal << ", "
|
||||
<< "external=" << std::setw(5) << pc_sample->correlation_id.external.value << "}, ";
|
||||
}
|
||||
|
||||
void
|
||||
print_sample(std::ostream& os, const rocprofiler_pc_sampling_record_host_trap_v0_t* sample)
|
||||
{
|
||||
print_sample_common_fields(os, sample);
|
||||
os << "\n";
|
||||
}
|
||||
|
||||
void
|
||||
print_sample(std::ostream& os, const rocprofiler_pc_sampling_record_stochastic_v0_t* sample)
|
||||
{
|
||||
print_sample_common_fields(os, sample);
|
||||
|
||||
if(sample->wave_issued)
|
||||
{
|
||||
auto* inst_c_str = rocprofiler_get_pc_sampling_instruction_type_name(
|
||||
static_cast<rocprofiler_pc_sampling_instruction_type_t>(sample->inst_type));
|
||||
utils::pcs_assert(inst_c_str != nullptr, "Invalid instruction type");
|
||||
os << "wave issued " << std::string(inst_c_str) << " instruction, ";
|
||||
}
|
||||
else
|
||||
{
|
||||
auto* reason_c_str = rocprofiler_get_pc_sampling_instruction_not_issued_reason_name(
|
||||
static_cast<rocprofiler_pc_sampling_instruction_not_issued_reason_t>(
|
||||
sample->snapshot.reason_not_issued));
|
||||
utils::pcs_assert(reason_c_str != nullptr, "Invalid not issued reason");
|
||||
os << "wave is stalled due to: " << std::string(reason_c_str) << " reason, ";
|
||||
}
|
||||
|
||||
auto snapshot = sample->snapshot;
|
||||
os << "two VALU instructions issued: " << static_cast<unsigned int>(snapshot.dual_issue_valu)
|
||||
<< ", ";
|
||||
|
||||
os << "arbiter state: {pipe issued: ("
|
||||
<< "VALU: " << static_cast<unsigned int>(snapshot.arb_state_issue_valu) << ", "
|
||||
<< "MATRIX: " << static_cast<unsigned int>(snapshot.arb_state_issue_matrix) << ", "
|
||||
<< "LDS: " << static_cast<unsigned int>(snapshot.arb_state_issue_lds) << ", "
|
||||
<< "LDS_DIRECT: " << static_cast<unsigned int>(snapshot.arb_state_issue_lds_direct) << ", "
|
||||
<< "SCALAR: " << static_cast<unsigned int>(snapshot.arb_state_issue_scalar) << ", "
|
||||
<< "TEX: " << static_cast<unsigned int>(snapshot.arb_state_issue_vmem_tex) << ", "
|
||||
<< "FLAT: " << static_cast<unsigned int>(snapshot.arb_state_issue_flat) << ", "
|
||||
<< "EXPORT: " << static_cast<unsigned int>(snapshot.arb_state_issue_exp) << ", "
|
||||
<< "MISC: " << static_cast<unsigned int>(snapshot.arb_state_issue_misc) << "), "
|
||||
<< "pipe stalled: ("
|
||||
<< "VALU: " << static_cast<unsigned int>(snapshot.arb_state_stall_valu) << ", "
|
||||
<< "MATRIX: " << static_cast<unsigned int>(snapshot.arb_state_stall_matrix) << ", "
|
||||
<< "LDS: " << static_cast<unsigned int>(snapshot.arb_state_stall_lds) << ", "
|
||||
<< "LDS_DIRECT: " << static_cast<unsigned int>(snapshot.arb_state_stall_lds_direct) << ", "
|
||||
<< "SCALAR: " << static_cast<unsigned int>(snapshot.arb_state_stall_scalar) << ", "
|
||||
<< "TEX: " << static_cast<unsigned int>(snapshot.arb_state_stall_vmem_tex) << ", "
|
||||
<< "FLAT: " << static_cast<unsigned int>(snapshot.arb_state_stall_flat) << ", "
|
||||
<< "EXPORT: " << static_cast<unsigned int>(snapshot.arb_state_stall_exp) << ", "
|
||||
<< "MISC: " << static_cast<unsigned int>(snapshot.arb_state_stall_misc) << ")}";
|
||||
|
||||
os << "\n";
|
||||
}
|
||||
|
||||
void
|
||||
print_sample(std::ostream& os, const rocprofiler_pc_sampling_record_invalid_t* /*sample*/)
|
||||
{
|
||||
os << "Invalid sample detected.\n";
|
||||
}
|
||||
|
||||
void
|
||||
rocprofiler_pc_sampling_callback(rocprofiler_context_id_t /*context_id*/,
|
||||
rocprofiler_buffer_id_t /*buffer_id*/,
|
||||
@@ -289,7 +385,7 @@ rocprofiler_pc_sampling_callback(rocprofiler_context_id_t /*context_id*/,
|
||||
{
|
||||
std::stringstream ss;
|
||||
ss << "The number of delivered samples is: " << num_headers << ", "
|
||||
<< "while the number of dropped samples is: " << drop_count << std::endl;
|
||||
<< "while the number of dropped samples is: " << drop_count << "\n";
|
||||
|
||||
for(size_t i = 0; i < num_headers; i++)
|
||||
{
|
||||
@@ -312,23 +408,21 @@ rocprofiler_pc_sampling_callback(rocprofiler_context_id_t /*context_id*/,
|
||||
auto* pc_sample = static_cast<rocprofiler_pc_sampling_record_host_trap_v0_t*>(
|
||||
cur_header->payload);
|
||||
|
||||
ss << "(code_obj_id, offset): (" << pc_sample->pc.code_object_id << ", 0x"
|
||||
<< std::hex << pc_sample->pc.code_object_offset << "), "
|
||||
<< "timestamp: " << std::dec << pc_sample->timestamp << ", "
|
||||
<< "exec: " << std::hex << std::setw(16) << pc_sample->exec_mask << ", "
|
||||
<< "workgroup_id_(x=" << std::dec << std::setw(5) << pc_sample->workgroup_id.x
|
||||
<< ", "
|
||||
<< "y=" << std::setw(5) << pc_sample->workgroup_id.y << ", "
|
||||
<< "z=" << std::setw(5) << pc_sample->workgroup_id.z << "), "
|
||||
<< "wave_in_group: " << std::setw(2)
|
||||
<< static_cast<unsigned int>(pc_sample->wave_in_group) << ", "
|
||||
<< "chiplet: " << std::setw(2)
|
||||
<< static_cast<unsigned int>(pc_sample->hw_id.chiplet) << ", "
|
||||
<< "dispatch_id: " << std::setw(7) << pc_sample->dispatch_id << ","
|
||||
<< "correlation: {internal=" << std::setw(7)
|
||||
<< pc_sample->correlation_id.internal << ", "
|
||||
<< "external=" << std::setw(5) << pc_sample->correlation_id.external.value << "}"
|
||||
<< std::endl;
|
||||
print_sample(ss, pc_sample);
|
||||
}
|
||||
else if(cur_header->kind == ROCPROFILER_PC_SAMPLING_RECORD_STOCHASTIC_V0_SAMPLE)
|
||||
{
|
||||
auto* pc_sample = static_cast<rocprofiler_pc_sampling_record_stochastic_v0_t*>(
|
||||
cur_header->payload);
|
||||
|
||||
print_sample(ss, pc_sample);
|
||||
}
|
||||
else if(cur_header->kind == ROCPROFILER_PC_SAMPLING_RECORD_INVALID_SAMPLE)
|
||||
{
|
||||
auto* pc_sample =
|
||||
static_cast<rocprofiler_pc_sampling_record_invalid_t*>(cur_header->payload);
|
||||
|
||||
print_sample(ss, pc_sample);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -341,7 +435,7 @@ rocprofiler_pc_sampling_callback(rocprofiler_context_id_t /*context_id*/,
|
||||
}
|
||||
}
|
||||
|
||||
*utils::get_output_stream() << ss.str() << std::endl;
|
||||
*utils::get_output_stream() << ss.str() << "\n";
|
||||
}
|
||||
} // namespace pcs
|
||||
} // namespace client
|
||||
|
||||
@@ -33,5 +33,19 @@ get_output_stream()
|
||||
static std::ostream* _v = nullptr;
|
||||
return _v;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Shows @p error_msg and aborts if @p condition is false.
|
||||
*
|
||||
*/
|
||||
void
|
||||
pcs_assert(bool condition, std::string_view error_msg)
|
||||
{
|
||||
if(!condition)
|
||||
{
|
||||
std::cerr << "PC Sampling Assertion Error: " << error_msg << "\n";
|
||||
abort();
|
||||
}
|
||||
}
|
||||
} // namespace utils
|
||||
} // namespace client
|
||||
|
||||
@@ -32,5 +32,8 @@ namespace utils
|
||||
{
|
||||
std::ostream*&
|
||||
get_output_stream();
|
||||
}
|
||||
|
||||
void
|
||||
pcs_assert(bool condition, std::string_view error_msg);
|
||||
} // namespace utils
|
||||
} // namespace client
|
||||
|
||||
@@ -28,6 +28,7 @@
|
||||
#include <rocprofiler-sdk/external_correlation.h>
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
#include <rocprofiler-sdk/internal_threading.h>
|
||||
#include <rocprofiler-sdk/pc_sampling.h>
|
||||
#include <rocprofiler-sdk/rocprofiler.h>
|
||||
#include <rocprofiler-sdk/cxx/name_info.hpp>
|
||||
#include <rocprofiler-sdk/cxx/perfetto.hpp>
|
||||
@@ -930,6 +931,93 @@ save(ArchiveT& ar, rocprofiler_pc_sampling_record_host_trap_v0_t data)
|
||||
ROCP_SDK_SAVE_DATA_BITFIELD("wave_in_grp", wave_in_group);
|
||||
}
|
||||
|
||||
template <typename ArchiveT>
|
||||
void
|
||||
save(ArchiveT& ar, rocprofiler_pc_sampling_record_stochastic_header_t data)
|
||||
{
|
||||
ROCP_SDK_SAVE_DATA_BITFIELD("has_mem_cnt", has_memory_counter);
|
||||
}
|
||||
|
||||
template <typename ArchiveT>
|
||||
void
|
||||
save_pc_sampling_inst_type(ArchiveT& ar, rocprofiler_pc_sampling_instruction_type_t inst_type)
|
||||
{
|
||||
ar(make_nvp("inst_type",
|
||||
std::string(rocprofiler_get_pc_sampling_instruction_type_name(inst_type))));
|
||||
}
|
||||
|
||||
template <typename ArchiveT>
|
||||
void
|
||||
save_pc_sampling_stall_reason(ArchiveT& ar,
|
||||
rocprofiler_pc_sampling_instruction_not_issued_reason_t stall_reason)
|
||||
{
|
||||
ar(make_nvp(
|
||||
"stall_reason",
|
||||
std::string(rocprofiler_get_pc_sampling_instruction_not_issued_reason_name(stall_reason))));
|
||||
}
|
||||
|
||||
template <typename ArchiveT>
|
||||
void
|
||||
save(ArchiveT& ar, rocprofiler_pc_sampling_snapshot_v0_t data)
|
||||
{
|
||||
save_pc_sampling_stall_reason(
|
||||
ar,
|
||||
static_cast<rocprofiler_pc_sampling_instruction_not_issued_reason_t>(
|
||||
data.reason_not_issued));
|
||||
|
||||
ROCP_SDK_SAVE_DATA_BITFIELD("dual_issue_valu", dual_issue_valu);
|
||||
|
||||
// Arb state (pipe issued)
|
||||
ROCP_SDK_SAVE_DATA_BITFIELD("arb_state_issue_valu", arb_state_issue_valu);
|
||||
ROCP_SDK_SAVE_DATA_BITFIELD("arb_state_issue_matrix", arb_state_issue_matrix);
|
||||
ROCP_SDK_SAVE_DATA_BITFIELD("arb_state_issue_lds", arb_state_issue_lds);
|
||||
ROCP_SDK_SAVE_DATA_BITFIELD("arb_state_issue_lds_direct", arb_state_issue_lds_direct);
|
||||
ROCP_SDK_SAVE_DATA_BITFIELD("arb_state_issue_scalar", arb_state_issue_scalar);
|
||||
ROCP_SDK_SAVE_DATA_BITFIELD("arb_state_issue_vmem_tex", arb_state_issue_vmem_tex);
|
||||
ROCP_SDK_SAVE_DATA_BITFIELD("arb_state_issue_flat", arb_state_issue_flat);
|
||||
ROCP_SDK_SAVE_DATA_BITFIELD("arb_state_issue_exp", arb_state_issue_exp);
|
||||
ROCP_SDK_SAVE_DATA_BITFIELD("arb_state_issue_misc", arb_state_issue_misc);
|
||||
ROCP_SDK_SAVE_DATA_BITFIELD("arb_state_issue_brmsg", arb_state_issue_brmsg);
|
||||
// Arb state (pipe stalled)
|
||||
ROCP_SDK_SAVE_DATA_BITFIELD("arb_state_stall_valu", arb_state_stall_valu);
|
||||
ROCP_SDK_SAVE_DATA_BITFIELD("arb_state_stall_matrix", arb_state_stall_matrix);
|
||||
ROCP_SDK_SAVE_DATA_BITFIELD("arb_state_stall_lds", arb_state_stall_lds);
|
||||
ROCP_SDK_SAVE_DATA_BITFIELD("arb_state_stall_lds_direct", arb_state_stall_lds_direct);
|
||||
ROCP_SDK_SAVE_DATA_BITFIELD("arb_state_stall_scalar", arb_state_stall_scalar);
|
||||
ROCP_SDK_SAVE_DATA_BITFIELD("arb_state_stall_vmem_tex", arb_state_stall_vmem_tex);
|
||||
ROCP_SDK_SAVE_DATA_BITFIELD("arb_state_stall_flat", arb_state_stall_flat);
|
||||
ROCP_SDK_SAVE_DATA_BITFIELD("arb_state_stall_exp", arb_state_stall_exp);
|
||||
ROCP_SDK_SAVE_DATA_BITFIELD("arb_state_stall_misc", arb_state_stall_misc);
|
||||
ROCP_SDK_SAVE_DATA_BITFIELD("arb_state_stall_brmsg", arb_state_stall_brmsg);
|
||||
}
|
||||
|
||||
template <typename ArchiveT>
|
||||
void
|
||||
save(ArchiveT& ar, rocprofiler_pc_sampling_record_stochastic_v0_t data)
|
||||
{
|
||||
// flags specific for stochastic sampling
|
||||
ROCP_SDK_SAVE_DATA_FIELD(flags);
|
||||
|
||||
// Common for host-trap and stochastic
|
||||
ROCP_SDK_SAVE_DATA_FIELD(hw_id);
|
||||
ROCP_SDK_SAVE_DATA_FIELD(pc);
|
||||
ROCP_SDK_SAVE_DATA_FIELD(exec_mask);
|
||||
ROCP_SDK_SAVE_DATA_FIELD(timestamp);
|
||||
ROCP_SDK_SAVE_DATA_FIELD(dispatch_id);
|
||||
ROCP_SDK_SAVE_DATA_VALUE("corr_id", correlation_id);
|
||||
ROCP_SDK_SAVE_DATA_VALUE("wrkgrp_id", workgroup_id);
|
||||
ROCP_SDK_SAVE_DATA_BITFIELD("wave_in_grp", wave_in_group);
|
||||
|
||||
// fields specific for stochastic
|
||||
ROCP_SDK_SAVE_DATA_BITFIELD("wave_issued", wave_issued);
|
||||
save_pc_sampling_inst_type(
|
||||
ar, static_cast<rocprofiler_pc_sampling_instruction_type_t>(data.inst_type));
|
||||
ROCP_SDK_SAVE_DATA_BITFIELD("wave_cnt", wave_count);
|
||||
ROCP_SDK_SAVE_DATA_FIELD(snapshot);
|
||||
|
||||
// TODO: add memory counters
|
||||
}
|
||||
|
||||
template <typename ArchiveT>
|
||||
void
|
||||
save(ArchiveT& ar, rocprofiler_agent_io_link_t data)
|
||||
|
||||
@@ -475,8 +475,9 @@ typedef enum
|
||||
typedef enum
|
||||
{
|
||||
ROCPROFILER_PC_SAMPLING_RECORD_NONE = 0,
|
||||
ROCPROFILER_PC_SAMPLING_RECORD_INVALID_SAMPLE, ///< ::rocprofiler_pc_sampling_record_invalid_t
|
||||
ROCPROFILER_PC_SAMPLING_RECORD_HOST_TRAP_V0_SAMPLE, ///< ::rocprofiler_pc_sampling_record_host_trap_v0_t
|
||||
ROCPROFILER_PC_SAMPLING_RECORD_STOCHASTIC_V0_SAMPLE, ///< for the future use
|
||||
ROCPROFILER_PC_SAMPLING_RECORD_STOCHASTIC_V0_SAMPLE, ///< ::rocprofiler_pc_sampling_record_stochastic_v0_t
|
||||
ROCPROFILER_PC_SAMPLING_RECORD_LAST,
|
||||
} rocprofiler_pc_sampling_record_kind_t;
|
||||
|
||||
|
||||
@@ -106,11 +106,13 @@ ROCPROFILER_EXTERN_C_INIT
|
||||
* 1. PC sampling is already configured with configuration different than requested,
|
||||
* 2. PC sampling is requested from a process that runs within the ROCgdb.
|
||||
* 3. HSA runtime does not support PC sampling.
|
||||
* 4. GPU device does not support requested PC sampling method.
|
||||
* @retval ::ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_KERNEL the amdgpu driver installed on the system
|
||||
* does not support the PC sampling feature
|
||||
* @retval ::ROCPROFILER_STATUS_ERROR a general error caused by the amdgpu driver
|
||||
* @retval ::ROCPROFILER_STATUS_ERROR_CONTEXT_CONFLICT counter collection service already
|
||||
* setup in the context
|
||||
* @retval ::ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT function invoked with an invalid argument
|
||||
*/
|
||||
rocprofiler_status_t
|
||||
rocprofiler_configure_pc_sampling_service(rocprofiler_context_id_t context_id,
|
||||
@@ -121,6 +123,19 @@ rocprofiler_configure_pc_sampling_service(rocprofiler_context_id_t conte
|
||||
rocprofiler_buffer_id_t buffer_id,
|
||||
int flags) ROCPROFILER_API;
|
||||
|
||||
/**
|
||||
* @brief Enumeration describing values of flags of ::rocprofiler_pc_sampling_configuration_t.
|
||||
*/
|
||||
typedef enum rocprofiler_pc_sampling_configuration_flags_t
|
||||
{
|
||||
ROCPROFILER_PC_SAMPLING_CONFIGURATION_FLAGS_NONE = 0,
|
||||
ROCPROFILER_PC_SAMPLING_CONFIGURATION_FLAGS_INTERVAL_POW2,
|
||||
ROCPROFILER_PC_SAMPLING_CONFIGURATION_FLAGS_LAST
|
||||
|
||||
/// @var ROCPROFILER_PC_SAMPLING_CONFIGURATION_FLAGS_INTERVAL_POW2
|
||||
/// @brief The interval value must be a power of 2.
|
||||
} rocprofiler_pc_sampling_configuration_flags_t;
|
||||
|
||||
/**
|
||||
* @brief PC sampling configuration supported by a GPU agent.
|
||||
*/
|
||||
@@ -131,7 +146,7 @@ typedef struct
|
||||
rocprofiler_pc_sampling_unit_t unit;
|
||||
size_t min_interval;
|
||||
size_t max_interval;
|
||||
uint64_t flags; /// for future use
|
||||
uint64_t flags; ///< take values from ::rocprofiler_pc_sampling_configuration_flags_t
|
||||
|
||||
/// @var method
|
||||
/// @brief Sampling method supported by the GPU agent.
|
||||
@@ -202,12 +217,11 @@ rocprofiler_query_pc_sampling_agent_configurations(
|
||||
*/
|
||||
typedef struct rocprofiler_pc_sampling_hw_id_v0_t
|
||||
{
|
||||
uint64_t chiplet : 6; ///< chiplet index (3 bits allocated by the ROCr runtime)
|
||||
uint64_t wave_id : 7; ///< wave slot index
|
||||
uint64_t simd_id : 2; ///< SIMD index
|
||||
uint64_t pipe_id : 4; ///< pipe index
|
||||
uint64_t cu_or_wgp_id : 4; ///< Index of compute unit on GFX9 or workgroup processer on other
|
||||
///< architectures
|
||||
uint64_t chiplet : 6; ///< chiplet index (3 bits allocated by the ROCr runtime)
|
||||
uint64_t wave_id : 7; ///< wave slot index
|
||||
uint64_t simd_id : 2; ///< SIMD index
|
||||
uint64_t pipe_id : 4; ///< pipe index
|
||||
uint64_t cu_or_wgp_id : 4;
|
||||
uint64_t shader_array_id : 1; ///< Shared array index
|
||||
uint64_t shader_engine_id : 5; ///< shared engine index
|
||||
uint64_t workgroup_id : 7; ///< thread_group index on GFX9, and workgroup index on GFX10+
|
||||
@@ -215,6 +229,9 @@ typedef struct rocprofiler_pc_sampling_hw_id_v0_t
|
||||
uint64_t queue_id : 4; ///< queue id
|
||||
uint64_t microengine_id : 2; ///< ACE (microengine) index
|
||||
uint64_t reserved0 : 16; ///< Reserved for the future use
|
||||
|
||||
/// @var cu_or_wgp_id
|
||||
/// @brief Compute unit index on GFX9 or workgroup processor index on GFX10+.
|
||||
} rocprofiler_pc_sampling_hw_id_v0_t;
|
||||
|
||||
/**
|
||||
@@ -242,7 +259,6 @@ typedef struct
|
||||
/// @ref code_object_id is equal to the ::ROCPROFILER_CODE_OBJECT_ID_NONE.
|
||||
} rocprofiler_pc_t;
|
||||
|
||||
// TODO: The definition of this struct might change over time.
|
||||
/**
|
||||
* @brief ROCProfiler Host-Trap PC Sampling Record.
|
||||
*/
|
||||
@@ -263,6 +279,239 @@ typedef struct rocprofiler_pc_sampling_record_host_trap_v0_t
|
||||
/// @brief API launch call id that matches dispatch ID
|
||||
} rocprofiler_pc_sampling_record_host_trap_v0_t;
|
||||
|
||||
/**
|
||||
* @brief The header of the @ref rocprofiler_pc_sampling_record_stochastic_v0_t, indicating
|
||||
* what fields of the @ref rocprofiler_pc_sampling_record_stochastic_v0_t instance are meaningful
|
||||
* for the sample.
|
||||
*/
|
||||
typedef struct rocprofiler_pc_sampling_record_stochastic_header_t
|
||||
{
|
||||
uint8_t has_memory_counter : 1; ///< pc sample provides memory counters information
|
||||
///< via ::rocprofiler_pc_sampling_memory_counters_t
|
||||
uint8_t reserved_type : 7;
|
||||
} rocprofiler_pc_sampling_record_stochastic_header_t;
|
||||
|
||||
/**
|
||||
* @brief Enumeration describing type of sampled issued instruction.
|
||||
*/
|
||||
typedef enum rocprofiler_pc_sampling_instruction_type_t
|
||||
{
|
||||
ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NONE = 0,
|
||||
ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_VALU, ///< vector ALU instruction
|
||||
ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_MATRIX, ///< matrix instruction
|
||||
ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_SCALAR, ///< scalar (memory) instruction
|
||||
ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_TEX, ///< texture memory instruction
|
||||
ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_LDS, ///< LDS memory instruction
|
||||
ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_LDS_DIRECT, ///< LDS direct memory instruction
|
||||
ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_FLAT, ///< flat memory instruction
|
||||
ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_EXPORT, ///< export instruction
|
||||
ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_MESSAGE, ///< message instruction
|
||||
ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_BARRIER, ///< barrier instruction
|
||||
ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_BRANCH_NOT_TAKEN,
|
||||
ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_BRANCH_TAKEN,
|
||||
ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_JUMP, ///< jump instruction
|
||||
ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_OTHER, ///< other types of instruction
|
||||
ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST, ///< no instruction issued
|
||||
ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_DUAL_VALU, /// dual VALU instruction
|
||||
ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_LAST
|
||||
|
||||
/// @var ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_BRANCH_NOT_TAKEN
|
||||
/// @brief Instruction representing a branch not being taken.
|
||||
/// @var ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_BRANCH_TAKEN
|
||||
/// @brief Instruction representing a taken branch.
|
||||
} rocprofiler_pc_sampling_instruction_type_t;
|
||||
|
||||
/**
|
||||
* @brief Enumeration describing reason for not issuing an instruction.
|
||||
*/
|
||||
typedef enum rocprofiler_pc_sampling_instruction_not_issued_reason_t
|
||||
{
|
||||
ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_NONE = 0,
|
||||
ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_NO_INSTRUCTION_AVAILABLE,
|
||||
ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ALU_DEPENDENCY,
|
||||
ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT, ///< waitcnt dependency
|
||||
ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_INTERNAL_INSTRUCTION,
|
||||
ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_BARRIER_WAIT, ///< waiting on a barrier
|
||||
ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ARBITER_NOT_WIN,
|
||||
ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ARBITER_WIN_EX_STALL,
|
||||
ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_OTHER_WAIT,
|
||||
ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_SLEEP_WAIT, ///< wave was sleeping
|
||||
ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_LAST
|
||||
|
||||
/// @var ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_NO_INSTRUCTION_AVAILABLE
|
||||
/// @brief No instruction available in the instruction cache.
|
||||
/// @var ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ALU_DEPENDENCY
|
||||
/// @brief ALU dependency not resolved.
|
||||
/// @var ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_INTERNAL_INSTRUCTION
|
||||
/// @brief Wave executes an internal instruction.
|
||||
/// @var ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ARBITER_NOT_WIN
|
||||
/// @brief The instruction did not win the arbiter.
|
||||
/// @var ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ARBITER_WIN_EX_STALL
|
||||
/// @brief Arbiter issued an instruction, but the execution pipe pushed it back from execution.
|
||||
/// @var ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_OTHER_WAIT
|
||||
/// @brief Other types of wait (e.g., wait for XNACK acknowledgment).
|
||||
|
||||
} rocprofiler_pc_sampling_instruction_not_issued_reason_t;
|
||||
|
||||
/**
|
||||
* @brief Data provided by stochastic sampling hardware.
|
||||
*
|
||||
*/
|
||||
typedef struct rocprofiler_pc_sampling_snapshot_v0_t
|
||||
{
|
||||
uint32_t reason_not_issued : 4;
|
||||
uint32_t reserved0 : 1; ///< reserved for future use
|
||||
uint32_t arb_state_issue_valu : 1; ///< arbiter issued a VALU instruction
|
||||
uint32_t arb_state_issue_matrix : 1; ///< arbiter issued a matrix instruction
|
||||
uint32_t arb_state_issue_lds : 1; ///< arbiter issued a LDS instruction
|
||||
uint32_t arb_state_issue_lds_direct : 1; ///< arbiter issued a LDS direct instruction
|
||||
uint32_t arb_state_issue_scalar : 1; ///< arbiter issued a scalar (SALU/SMEM) instruction
|
||||
uint32_t arb_state_issue_vmem_tex : 1; ///< arbiter issued a texture instruction
|
||||
uint32_t arb_state_issue_flat : 1; ///< arbiter issued a FLAT instruction
|
||||
uint32_t arb_state_issue_exp : 1; ///< arbiter issued a export instruction
|
||||
uint32_t arb_state_issue_misc : 1; ///< arbiter issued a miscellaneous instruction
|
||||
uint32_t arb_state_issue_brmsg : 1; ///< arbiter issued a branch/message instruction
|
||||
uint32_t arb_state_issue_reserved : 1; ///< reserved for the future use
|
||||
uint32_t arb_state_stall_valu : 1;
|
||||
uint32_t arb_state_stall_matrix : 1; ///< matrix instruction was stalled
|
||||
uint32_t arb_state_stall_lds : 1; ///< LDS instruction was stalled
|
||||
uint32_t arb_state_stall_lds_direct : 1; ///< LDS direct instruction was stalled
|
||||
uint32_t arb_state_stall_scalar : 1; ///< Scalar (SALU/SMEM) instruction was stalled
|
||||
uint32_t arb_state_stall_vmem_tex : 1; ///< texture instruction was stalled
|
||||
uint32_t arb_state_stall_flat : 1; ///< flat instruction was stalled
|
||||
uint32_t arb_state_stall_exp : 1; ///< export instruction was stalled
|
||||
uint32_t arb_state_stall_misc : 1; ///< miscellaneous instruction was stalled
|
||||
uint32_t arb_state_stall_brmsg : 1; ///< branch/message instruction was stalled
|
||||
uint32_t arb_state_state_reserved : 1; ///< reserved for the future use
|
||||
// We have two reserved bits
|
||||
uint32_t dual_issue_valu : 1;
|
||||
uint32_t reserved1 : 1; ///< reserved for the future use
|
||||
uint32_t reserved2 : 3; ///< reserved for the future use
|
||||
|
||||
/// @var reason_not_issued
|
||||
/// @brief The reason for not issuing an instruction. The field takes one of the value defined
|
||||
/// in @ref ::rocprofiler_pc_sampling_instruction_not_issued_reason_t
|
||||
/// @var arb_state_stall_valu
|
||||
/// @brief VALU instruction was stalled when a sample was generated
|
||||
/// @var dual_issue_valu
|
||||
/// @brief Two VALU instructions were issued for coexecution (MI3xx specific)
|
||||
} rocprofiler_pc_sampling_snapshot_v0_t;
|
||||
|
||||
/**
|
||||
* @brief Counters of issued but not yet completed instructions.
|
||||
*/
|
||||
typedef struct rocprofiler_pc_sampling_memory_counters_t
|
||||
{
|
||||
uint32_t load_cnt : 6;
|
||||
uint32_t store_cnt : 6;
|
||||
uint32_t bvh_cnt : 3;
|
||||
uint32_t sample_cnt : 6;
|
||||
uint32_t ds_cnt : 6;
|
||||
uint32_t km_cnt : 5;
|
||||
|
||||
/// @var load_cnt
|
||||
/// @brief Counts the number of VMEM load instructions issued but not yet completed.
|
||||
/// @var store_cnt
|
||||
/// @brief Counts the number of VMEM store instructions issued but not yet completed.
|
||||
/// @var bvh_cnt
|
||||
/// @brief Counts the number of VMEM BVH instructions issued but not yet completed.
|
||||
/// @var sample_cnt
|
||||
/// @brief Counts the number of VMEM sample instructions issued but not yet completed.
|
||||
/// @var ds_cnt
|
||||
/// @brief Counts the number of LDS instructions issued but not yet completed.
|
||||
/// @var km_cnt
|
||||
/// @brief Counts the number of scalar memory reads and memory instructions issued but not yet
|
||||
/// completed.
|
||||
} rocprofiler_pc_sampling_memory_counters_t;
|
||||
|
||||
/**
|
||||
* @brief ROCProfiler Stochastic PC Sampling Record.
|
||||
*/
|
||||
typedef struct rocprofiler_pc_sampling_record_stochastic_v0_t
|
||||
{
|
||||
uint64_t size; ///< Size of this struct
|
||||
rocprofiler_pc_sampling_record_stochastic_header_t flags;
|
||||
uint8_t wave_in_group;
|
||||
uint8_t wave_issued : 1;
|
||||
uint8_t inst_type : 5;
|
||||
uint8_t reserved : 2;
|
||||
rocprofiler_pc_sampling_hw_id_v0_t hw_id;
|
||||
rocprofiler_pc_t pc;
|
||||
uint64_t exec_mask;
|
||||
rocprofiler_dim3_t workgroup_id;
|
||||
uint32_t wave_count;
|
||||
uint64_t timestamp;
|
||||
uint64_t dispatch_id;
|
||||
rocprofiler_async_correlation_id_t correlation_id;
|
||||
rocprofiler_pc_sampling_snapshot_v0_t snapshot;
|
||||
rocprofiler_pc_sampling_memory_counters_t memory_counters;
|
||||
|
||||
/// @var flags
|
||||
/// @brief Defines what fields are meaningful for the sample.
|
||||
/// @var wave_in_group
|
||||
/// @brief wave position within the workgroup (0-15)
|
||||
/// @var wave_issued
|
||||
/// @brief wave issued the instruction represented with the PC
|
||||
/// @var inst_type
|
||||
/// @brief instruction type, takes a value defined in @ref
|
||||
/// ::rocprofiler_pc_sampling_instruction_type_t
|
||||
/// @var reserved
|
||||
/// @brief reserved 2 bits must be zero
|
||||
/// @var hw_id
|
||||
/// @brief @see ::rocprofiler_pc_sampling_hw_id_v0_t
|
||||
/// @var pc
|
||||
/// @brief information about sampled program counter
|
||||
/// @var exec_mask
|
||||
/// @brief active SIMD lanes at the moment of sampling
|
||||
/// @var workgroup_id
|
||||
/// @brief wave coordinates within the workgroup
|
||||
/// @var wave_count
|
||||
/// @brief active waves on the CU at the moment of sampling
|
||||
/// @var timestamp
|
||||
/// @brief timestamp when sample is generated
|
||||
/// @var dispatch_id
|
||||
/// @brief originating kernel dispatch ID
|
||||
/// @var correlation_id
|
||||
/// @brief API launch call id that matches dispatch ID
|
||||
/// @var snapshot
|
||||
/// @brief Data provided by stochastic sampling hardware. @see
|
||||
/// ::rocprofiler_pc_sampling_snapshot_v0_t
|
||||
/// @var memory_counters
|
||||
/// @brief Counters of issued but not yet completed instructions. @see
|
||||
/// ::rocprofiler_pc_sampling_memory_counters_t
|
||||
} rocprofiler_pc_sampling_record_stochastic_v0_t;
|
||||
|
||||
/**
|
||||
* @brief Record representing an invalid PC Sampling Record.
|
||||
*/
|
||||
typedef struct rocprofiler_pc_sampling_record_invalid_t
|
||||
{
|
||||
uint64_t size; ///< Size of the struct
|
||||
} rocprofiler_pc_sampling_record_invalid_t;
|
||||
|
||||
/**
|
||||
* @fn C compatible string representation of the PC sampling instruction type
|
||||
* @brief Return the string encoding of @ref rocprofiler_pc_sampling_instruction_type_t value
|
||||
* @param [in] instruction_type instruction type enum value
|
||||
* @return Will return a nullptr if invalid/unsupported @ref
|
||||
* rocprofiler_pc_sampling_instruction_type_t value is provided.
|
||||
*/
|
||||
const char*
|
||||
rocprofiler_get_pc_sampling_instruction_type_name(
|
||||
rocprofiler_pc_sampling_instruction_type_t instruction_type) ROCPROFILER_API;
|
||||
|
||||
/**
|
||||
* @fn C compatible string representation of reason for not issuing an instruciton
|
||||
* @brief Return the string encoding of @ref rocprofiler_pc_sampling_instruction_not_issued_reason_t
|
||||
* value
|
||||
* @param [in] not_issued_reason no issue reason enum value
|
||||
* @return Will return a nullptr if invalid/unsupported @ref
|
||||
* rocprofiler_pc_sampling_instruction_not_issued_reason_t value is provided.
|
||||
*/
|
||||
const char*
|
||||
rocprofiler_get_pc_sampling_instruction_not_issued_reason_name(
|
||||
rocprofiler_pc_sampling_instruction_not_issued_reason_t not_issued_reason) ROCPROFILER_API;
|
||||
|
||||
/** @} */
|
||||
|
||||
ROCPROFILER_EXTERN_C_FINI
|
||||
|
||||
@@ -168,5 +168,8 @@ using kernel_dispatch_buffered_output_with_stream_t =
|
||||
domain_type::KERNEL_DISPATCH>;
|
||||
using memory_copy_buffered_output_with_stream_t =
|
||||
buffered_output<tool_buffer_tracing_memory_copy_with_stream_record_t, domain_type::MEMORY_COPY>;
|
||||
using pc_sampling_stochastic_buffered_output_t =
|
||||
buffered_output<rocprofiler::tool::rocprofiler_tool_pc_sampling_stochastic_record_t,
|
||||
domain_type::PC_SAMPLING_STOCHASTIC>;
|
||||
} // namespace tool
|
||||
} // namespace rocprofiler
|
||||
|
||||
@@ -111,6 +111,7 @@ using stats_csv_encoder = csv_encoder<8>;
|
||||
using pc_sampling_host_trap_csv_encoder = csv_encoder<6>;
|
||||
using kernel_trace_with_stream_csv_encoder = csv_encoder<19>;
|
||||
using memory_copy_with_stream_csv_encoder = csv_encoder<8>;
|
||||
using pc_sampling_stochastic_csv_encoder = csv_encoder<10>;
|
||||
} // namespace csv
|
||||
} // namespace tool
|
||||
} // namespace rocprofiler
|
||||
|
||||
@@ -63,6 +63,10 @@ DEFINE_BUFFER_TYPE_NAME(PC_SAMPLING_HOST_TRAP,
|
||||
"pc_sampling_host_trap_stats")
|
||||
DEFINE_BUFFER_TYPE_NAME(ROCDECODE, "ROCDECODE_API", "rocdecode_api_trace", "rocdecode_api_stats")
|
||||
DEFINE_BUFFER_TYPE_NAME(ROCJPEG, "ROCJPEG_API", "rocjpeg_api_trace", "rocjpeg_api_stats")
|
||||
DEFINE_BUFFER_TYPE_NAME(PC_SAMPLING_STOCHASTIC,
|
||||
"PC_SAMPLING_STOCHASTIC",
|
||||
"pc_sampling_stochastic",
|
||||
"pc_sampling_stochastic_stats")
|
||||
|
||||
#undef DEFINE_BUFFER_TYPE_NAME
|
||||
|
||||
|
||||
@@ -39,6 +39,7 @@ enum class domain_type
|
||||
PC_SAMPLING_HOST_TRAP,
|
||||
ROCDECODE,
|
||||
ROCJPEG,
|
||||
PC_SAMPLING_STOCHASTIC,
|
||||
LAST,
|
||||
};
|
||||
|
||||
|
||||
@@ -877,6 +877,79 @@ generate_csv(const output_config& c
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
generate_csv(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const generator<rocprofiler_tool_pc_sampling_stochastic_record_t>& data,
|
||||
const stats_entry_t& stats)
|
||||
{
|
||||
if(data.empty()) return;
|
||||
|
||||
if(cfg.stats && stats)
|
||||
write_stats(get_stats_output_file(cfg, domain_type::PC_SAMPLING_STOCHASTIC), stats.entries);
|
||||
|
||||
auto ofs = tool::csv_output_file{cfg,
|
||||
domain_type::PC_SAMPLING_STOCHASTIC,
|
||||
tool::csv::pc_sampling_stochastic_csv_encoder{},
|
||||
{
|
||||
"Sample_Timestamp",
|
||||
"Exec_Mask",
|
||||
"Dispatch_Id",
|
||||
"Instruction",
|
||||
"Instruction_Comment",
|
||||
"Correlation_Id",
|
||||
"Wave_Issued_Instruction",
|
||||
"Instruction_Type",
|
||||
"Stall_Reason",
|
||||
"Wave_Count",
|
||||
}};
|
||||
for(auto ditr : data)
|
||||
{
|
||||
for(const auto& record : data.get(ditr))
|
||||
{
|
||||
std::string inst;
|
||||
std::string inst_comment;
|
||||
if(record.inst_index == -1)
|
||||
{
|
||||
// A sample originates from a blit kernel or self-modifying code,
|
||||
// so instruction cannot be decoded
|
||||
inst_comment = "Unrecognized code object id, physical virtual address of PC:" +
|
||||
std::to_string(record.pc_sample_record.pc.code_object_offset);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Provide decoded instruction and comment
|
||||
inst = tool_metadata.get_instruction(record.inst_index);
|
||||
inst_comment = tool_metadata.get_comment(record.inst_index);
|
||||
}
|
||||
|
||||
auto row_ss = std::stringstream{};
|
||||
rocprofiler::tool::csv::pc_sampling_stochastic_csv_encoder::write_row(
|
||||
row_ss,
|
||||
record.pc_sample_record.timestamp,
|
||||
record.pc_sample_record.exec_mask,
|
||||
record.pc_sample_record.dispatch_id,
|
||||
inst,
|
||||
inst_comment,
|
||||
record.pc_sample_record.correlation_id.internal,
|
||||
// As wave_issued is uint8_t of size 1, it can be dumped as char.
|
||||
// To prevent that, explicitly cast it to integer, so that CSV output
|
||||
// shows human-readable 0/1 values.
|
||||
static_cast<unsigned int>(record.pc_sample_record.wave_issued),
|
||||
std::string(rocprofiler_get_pc_sampling_instruction_type_name(
|
||||
static_cast<rocprofiler_pc_sampling_instruction_type_t>(
|
||||
record.pc_sample_record.inst_type))),
|
||||
std::string(rocprofiler_get_pc_sampling_instruction_not_issued_reason_name(
|
||||
static_cast<rocprofiler_pc_sampling_instruction_not_issued_reason_t>(
|
||||
record.pc_sample_record.snapshot.reason_not_issued))),
|
||||
// Similar reasoning as for wave_issued.
|
||||
static_cast<unsigned int>(record.pc_sample_record.wave_count));
|
||||
|
||||
ofs << row_ss.str();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
generate_csv(const output_config& cfg,
|
||||
const metadata& /*tool_metadata*/,
|
||||
|
||||
@@ -110,6 +110,12 @@ generate_csv(const output_config& c
|
||||
const generator<rocprofiler_tool_pc_sampling_host_trap_record_t>& data,
|
||||
const stats_entry_t& stats);
|
||||
|
||||
void
|
||||
generate_csv(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const generator<rocprofiler_tool_pc_sampling_stochastic_record_t>& data,
|
||||
const stats_entry_t& stats);
|
||||
|
||||
void
|
||||
generate_csv(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
|
||||
@@ -196,10 +196,10 @@ write_json(json_output& json_ar,
|
||||
generator<rocprofiler_buffer_tracing_scratch_memory_record_t> scratch_memory_gen,
|
||||
generator<rocprofiler_buffer_tracing_rccl_api_record_t> rccl_api_gen,
|
||||
generator<rocprofiler_buffer_tracing_memory_allocation_record_t> memory_allocation_gen,
|
||||
generator<rocprofiler_tool_pc_sampling_host_trap_record_t> pc_sampling_gen,
|
||||
generator<rocprofiler_buffer_tracing_rocdecode_api_record_t> rocdecode_api_gen,
|
||||
generator<rocprofiler_buffer_tracing_rocjpeg_api_record_t> rocjpeg_api_gen)
|
||||
|
||||
generator<rocprofiler_buffer_tracing_rocjpeg_api_record_t> rocjpeg_api_gen,
|
||||
generator<rocprofiler_tool_pc_sampling_host_trap_record_t> pc_sampling_host_trap_gen,
|
||||
generator<rocprofiler_tool_pc_sampling_stochastic_record_t> pc_sampling_stochastic_gen)
|
||||
{
|
||||
// summary
|
||||
{
|
||||
@@ -239,9 +239,10 @@ write_json(json_output& json_ar,
|
||||
json_ar(cereal::make_nvp("memory_copy", memory_copy_gen));
|
||||
json_ar(cereal::make_nvp("memory_allocation", memory_allocation_gen));
|
||||
json_ar(cereal::make_nvp("scratch_memory", scratch_memory_gen));
|
||||
json_ar(cereal::make_nvp("pc_sample_host_trap", pc_sampling_gen));
|
||||
json_ar(cereal::make_nvp("rocdecode_api", rocdecode_api_gen));
|
||||
json_ar(cereal::make_nvp("rocjpeg_api", rocjpeg_api_gen));
|
||||
json_ar(cereal::make_nvp("pc_sample_host_trap", pc_sampling_host_trap_gen));
|
||||
json_ar(cereal::make_nvp("pc_sample_stochastic", pc_sampling_stochastic_gen));
|
||||
json_ar.finishNode();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -94,9 +94,9 @@ write_json(json_output& j
|
||||
generator<rocprofiler_buffer_tracing_scratch_memory_record_t> scratch_memory_gen,
|
||||
generator<rocprofiler_buffer_tracing_rccl_api_record_t> rccl_api_gen,
|
||||
generator<rocprofiler_buffer_tracing_memory_allocation_record_t> memory_allocation_gen,
|
||||
generator<rocprofiler_tool_pc_sampling_host_trap_record_t> pc_sampling_gen,
|
||||
generator<rocprofiler_buffer_tracing_rocdecode_api_record_t> rocdecode_api_gen,
|
||||
generator<rocprofiler_buffer_tracing_rocjpeg_api_record_t> rocjpeg_api_gen);
|
||||
|
||||
generator<rocprofiler_buffer_tracing_rocjpeg_api_record_t> rocjpeg_api_gen,
|
||||
generator<rocprofiler_tool_pc_sampling_host_trap_record_t> pc_sampling_host_trap_gen,
|
||||
generator<rocprofiler_tool_pc_sampling_stochastic_record_t> pc_sampling_stochastic_gen);
|
||||
} // namespace tool
|
||||
} // namespace rocprofiler
|
||||
|
||||
@@ -453,7 +453,23 @@ generate_stats(const output_config& /* cfg*/,
|
||||
const metadata& /*tool_metadata*/,
|
||||
const generator<rocprofiler_tool_pc_sampling_host_trap_record_t>& /*data*/)
|
||||
{
|
||||
// TODO:
|
||||
// 1. Implement serialization for PC sampling stats.
|
||||
// The format differs significantly from tracing stats.
|
||||
// 2. Decide what is going to be part of the stats.
|
||||
// Some basic information is already available in the tool_metadata.pc_sampling_stats.
|
||||
// This contains the total number of valid VS invalid samples.
|
||||
return stats_entry_t{};
|
||||
}
|
||||
|
||||
stats_entry_t
|
||||
generate_stats(const output_config& /* cfg*/,
|
||||
const metadata& /*tool_metadata*/,
|
||||
const generator<rocprofiler_tool_pc_sampling_stochastic_record_t>& /*data*/)
|
||||
{
|
||||
// TODO: sames TODOS from the function above applies here.
|
||||
return stats_entry_t{};
|
||||
}
|
||||
|
||||
} // namespace tool
|
||||
} // namespace rocprofiler
|
||||
|
||||
@@ -90,6 +90,12 @@ stats_entry_t
|
||||
generate_stats(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const generator<rocprofiler_tool_pc_sampling_host_trap_record_t>& data);
|
||||
|
||||
stats_entry_t
|
||||
generate_stats(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const generator<rocprofiler_tool_pc_sampling_stochastic_record_t>& data);
|
||||
|
||||
void
|
||||
generate_stats(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
|
||||
@@ -87,6 +87,9 @@ using att_filenames_map_t = std::unordered_map<rocprofiler_dispatch_id_t, att_
|
||||
using code_object_load_info_vec_t = std::vector<rocprofiler::att_wrapper::CodeobjLoadInfo>;
|
||||
template <typename Tp>
|
||||
using synced_map = common::Synchronized<Tp, true>;
|
||||
template <typename Tp>
|
||||
using synced_obj = common::Synchronized<Tp, true>;
|
||||
using pc_sampling_stats_t = rocprofiler_tool_pc_sampling_stats;
|
||||
|
||||
enum class agent_indexing
|
||||
{
|
||||
@@ -133,6 +136,7 @@ struct metadata
|
||||
synced_map<host_function_info_map_t> host_functions = {};
|
||||
synced_map<code_object_load_info_vec_t> code_object_load = {};
|
||||
att_filenames_map_t att_filenames = {};
|
||||
synced_obj<pc_sampling_stats_t> pc_sampling_stats = {};
|
||||
|
||||
metadata() = default;
|
||||
metadata(inprocess);
|
||||
|
||||
@@ -23,6 +23,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
#include <rocprofiler-sdk/pc_sampling.h>
|
||||
#include <rocprofiler-sdk/cxx/codeobj/code_printing.hpp>
|
||||
#include <rocprofiler-sdk/cxx/serialization.hpp>
|
||||
|
||||
@@ -79,5 +80,32 @@ struct rocprofiler_tool_pc_sampling_host_trap_record_t
|
||||
}
|
||||
};
|
||||
|
||||
// TODO:: Check if we can template this structure
|
||||
struct rocprofiler_tool_pc_sampling_stochastic_record_t
|
||||
{
|
||||
rocprofiler_pc_sampling_record_stochastic_v0_t pc_sample_record;
|
||||
int64_t inst_index;
|
||||
|
||||
rocprofiler_tool_pc_sampling_stochastic_record_t(
|
||||
rocprofiler_pc_sampling_record_stochastic_v0_t record,
|
||||
int64_t index)
|
||||
: pc_sample_record(record)
|
||||
, inst_index(index)
|
||||
{}
|
||||
|
||||
template <typename ArchiveT>
|
||||
void save(ArchiveT& ar) const
|
||||
{
|
||||
ar(cereal::make_nvp("record", pc_sample_record));
|
||||
ar(cereal::make_nvp("inst_index", inst_index));
|
||||
}
|
||||
};
|
||||
|
||||
struct rocprofiler_tool_pc_sampling_stats
|
||||
{
|
||||
uint64_t valid_samples = 0;
|
||||
uint64_t invalid_samples = 0;
|
||||
};
|
||||
|
||||
} // namespace tool
|
||||
} // namespace rocprofiler
|
||||
|
||||
@@ -285,10 +285,28 @@ config::config()
|
||||
{"stochastic", ROCPROFILER_PC_SAMPLING_METHOD_STOCHASTIC},
|
||||
{"host_trap", ROCPROFILER_PC_SAMPLING_METHOD_HOST_TRAP}};
|
||||
|
||||
pc_sampling_method_value = pc_sampling_method_map.at(pc_sampling_method);
|
||||
try
|
||||
{
|
||||
pc_sampling_method_value = pc_sampling_method_map.at(pc_sampling_method);
|
||||
} catch(...)
|
||||
{
|
||||
ROCP_FATAL << "Invalid value for ROCPROF_PC_SAMPLING_METHOD: " << pc_sampling_method << "."
|
||||
<< "Valid choices are stochastic and host_trap\n";
|
||||
}
|
||||
|
||||
if(pc_sampling_method_value == ROCPROFILER_PC_SAMPLING_METHOD_HOST_TRAP)
|
||||
pc_sampling_host_trap = true;
|
||||
pc_sampling_unit_value = pc_sampling_unit_map.at(pc_sampling_unit);
|
||||
else if(pc_sampling_method_value == ROCPROFILER_PC_SAMPLING_METHOD_STOCHASTIC)
|
||||
pc_sampling_stochastic = true;
|
||||
|
||||
try
|
||||
{
|
||||
pc_sampling_unit_value = pc_sampling_unit_map.at(pc_sampling_unit);
|
||||
} catch(...)
|
||||
{
|
||||
ROCP_FATAL << "Invalid value for ROCPROF_PC_SAMPLING_UNIT: " << pc_sampling_unit << "."
|
||||
<< "Valid choices are instructions, cycles and time\n";
|
||||
}
|
||||
|
||||
if(auto _collection_period = get_env("ROCPROF_COLLECTION_PERIOD", "");
|
||||
!_collection_period.empty())
|
||||
|
||||
@@ -116,6 +116,7 @@ struct config : output_config
|
||||
bool list_metrics_output_file = get_env("ROCPROF_OUTPUT_LIST_METRICS_FILE", false);
|
||||
bool pc_sampling_host_trap = false;
|
||||
bool advanced_thread_trace = get_env("ROCPROF_ADVANCED_THREAD_TRACE", false);
|
||||
bool pc_sampling_stochastic = false;
|
||||
size_t pc_sampling_interval = get_env("ROCPROF_PC_SAMPLING_INTERVAL", 1);
|
||||
bool att_serialize_all = get_env("ROCPROF_ATT_PARAM_SERIALIZE_ALL", false);
|
||||
rocprofiler_pc_sampling_method_t pc_sampling_method_value = ROCPROFILER_PC_SAMPLING_METHOD_NONE;
|
||||
|
||||
@@ -147,10 +147,11 @@ struct buffer_ids
|
||||
rocprofiler_buffer_id_t pc_sampling_host_trap = {};
|
||||
rocprofiler_buffer_id_t rocdecode_api_trace = {};
|
||||
rocprofiler_buffer_id_t rocjpeg_api_trace = {};
|
||||
rocprofiler_buffer_id_t pc_sampling_stochastic = {};
|
||||
|
||||
auto as_array() const
|
||||
{
|
||||
return std::array<rocprofiler_buffer_id_t, 11>{hsa_api_trace,
|
||||
return std::array<rocprofiler_buffer_id_t, 12>{hsa_api_trace,
|
||||
hip_api_trace,
|
||||
kernel_trace,
|
||||
memory_copy_trace,
|
||||
@@ -160,7 +161,8 @@ struct buffer_ids
|
||||
rccl_api_trace,
|
||||
pc_sampling_host_trap,
|
||||
rocdecode_api_trace,
|
||||
rocjpeg_api_trace};
|
||||
rocjpeg_api_trace,
|
||||
pc_sampling_stochastic};
|
||||
}
|
||||
};
|
||||
|
||||
@@ -726,7 +728,8 @@ code_object_tracing_callback(rocprofiler_callback_tracing_record_t record,
|
||||
auto* obj_data = static_cast<tool::rocprofiler_code_object_info_t*>(record.payload);
|
||||
|
||||
CHECK_NOTNULL(tool_metadata)->add_code_object(*obj_data);
|
||||
if(tool::get_config().pc_sampling_host_trap)
|
||||
if(tool::get_config().pc_sampling_host_trap ||
|
||||
tool::get_config().pc_sampling_stochastic)
|
||||
{
|
||||
CHECK_NOTNULL(tool_metadata)->add_decoder(obj_data);
|
||||
}
|
||||
@@ -1178,6 +1181,10 @@ rocprofiler_pc_sampling_callback(rocprofiler_context_id_t /* context_id*/,
|
||||
{
|
||||
if(!headers) return;
|
||||
|
||||
// count number of valid VS invalid samples delivered by this callback
|
||||
uint64_t valid_samples_cnt = 0;
|
||||
uint64_t invalid_samples_cnt = 0;
|
||||
|
||||
for(size_t i = 0; i < num_headers; i++)
|
||||
{
|
||||
auto* cur_header = headers[i];
|
||||
@@ -1202,6 +1209,25 @@ rocprofiler_pc_sampling_callback(rocprofiler_context_id_t /* context_id*/,
|
||||
|
||||
rocprofiler::tool::write_ring_buffer(pc_sample_tool_record,
|
||||
domain_type::PC_SAMPLING_HOST_TRAP);
|
||||
|
||||
valid_samples_cnt++;
|
||||
}
|
||||
else if(cur_header->kind == ROCPROFILER_PC_SAMPLING_RECORD_STOCHASTIC_V0_SAMPLE)
|
||||
{
|
||||
auto* pc_sample = static_cast<rocprofiler_pc_sampling_record_stochastic_v0_t*>(
|
||||
cur_header->payload);
|
||||
|
||||
auto pc_sample_tool_record =
|
||||
rocprofiler::tool::rocprofiler_tool_pc_sampling_stochastic_record_t(
|
||||
*pc_sample, get_instruction_index(pc_sample->pc));
|
||||
|
||||
rocprofiler::tool::write_ring_buffer(pc_sample_tool_record,
|
||||
domain_type::PC_SAMPLING_STOCHASTIC);
|
||||
valid_samples_cnt++;
|
||||
}
|
||||
else if(cur_header->kind == ROCPROFILER_PC_SAMPLING_RECORD_INVALID_SAMPLE)
|
||||
{
|
||||
invalid_samples_cnt++;
|
||||
}
|
||||
}
|
||||
else
|
||||
@@ -1209,6 +1235,13 @@ rocprofiler_pc_sampling_callback(rocprofiler_context_id_t /* context_id*/,
|
||||
ROCP_FATAL << "unexpected rocprofiler_record_header_t category + kind";
|
||||
}
|
||||
}
|
||||
|
||||
// sum up number of valid/invalid samples for pc sampling stats
|
||||
tool_metadata->pc_sampling_stats.wlock(
|
||||
[valid_samples_cnt, invalid_samples_cnt](auto& pc_sampling_stats) {
|
||||
pc_sampling_stats.valid_samples += valid_samples_cnt;
|
||||
pc_sampling_stats.invalid_samples += invalid_samples_cnt;
|
||||
});
|
||||
}
|
||||
|
||||
void
|
||||
@@ -1377,6 +1410,52 @@ if_pc_sample_config_match(rocprofiler_agent_id_t agent_id,
|
||||
return false;
|
||||
}
|
||||
|
||||
void
|
||||
configure_pc_sampling_on_all_agents(uint64_t buffer_size,
|
||||
uint64_t buffer_watermark,
|
||||
void* tool_data)
|
||||
{
|
||||
auto method = tool::get_config().pc_sampling_method_value;
|
||||
auto unit = tool::get_config().pc_sampling_unit_value;
|
||||
|
||||
// Find the proper buffer_id based on the method
|
||||
auto* buffer_id = (method == ROCPROFILER_PC_SAMPLING_METHOD_HOST_TRAP)
|
||||
? &get_buffers().pc_sampling_host_trap
|
||||
: &get_buffers().pc_sampling_stochastic;
|
||||
|
||||
ROCPROFILER_CALL(rocprofiler_create_buffer(get_client_ctx(),
|
||||
buffer_size,
|
||||
buffer_watermark,
|
||||
ROCPROFILER_BUFFER_POLICY_LOSSLESS,
|
||||
rocprofiler_pc_sampling_callback,
|
||||
tool_data,
|
||||
buffer_id),
|
||||
"buffer creation");
|
||||
|
||||
bool config_match_found = false;
|
||||
auto agent_ptr_vec = get_gpu_agents();
|
||||
for(auto& itr : agent_ptr_vec)
|
||||
{
|
||||
if(if_pc_sample_config_match(
|
||||
itr->id, method, unit, tool::get_config().pc_sampling_interval))
|
||||
{
|
||||
config_match_found = true;
|
||||
int flags = 0;
|
||||
ROCPROFILER_CALL(
|
||||
rocprofiler_configure_pc_sampling_service(get_client_ctx(),
|
||||
itr->id,
|
||||
method,
|
||||
unit,
|
||||
tool::get_config().pc_sampling_interval,
|
||||
*buffer_id,
|
||||
flags),
|
||||
"configure PC sampling");
|
||||
}
|
||||
}
|
||||
if(!config_match_found)
|
||||
ROCP_FATAL << "Given PC sampling configuration is not supported on any of the agents";
|
||||
}
|
||||
|
||||
int
|
||||
tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
|
||||
{
|
||||
@@ -1745,38 +1824,11 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
|
||||
|
||||
if(tool::get_config().pc_sampling_host_trap)
|
||||
{
|
||||
ROCPROFILER_CALL(rocprofiler_create_buffer(get_client_ctx(),
|
||||
buffer_size,
|
||||
buffer_watermark,
|
||||
ROCPROFILER_BUFFER_POLICY_LOSSLESS,
|
||||
rocprofiler_pc_sampling_callback,
|
||||
tool_data,
|
||||
&get_buffers().pc_sampling_host_trap),
|
||||
"buffer creation");
|
||||
bool config_match_found = false;
|
||||
auto agent_ptr_vec = get_gpu_agents();
|
||||
for(auto& itr : agent_ptr_vec)
|
||||
{
|
||||
auto method = tool::get_config().pc_sampling_method_value;
|
||||
auto unit = tool::get_config().pc_sampling_unit_value;
|
||||
if(if_pc_sample_config_match(
|
||||
itr->id, method, unit, tool::get_config().pc_sampling_interval))
|
||||
{
|
||||
config_match_found = true;
|
||||
int flags = 0;
|
||||
ROCPROFILER_CALL(rocprofiler_configure_pc_sampling_service(
|
||||
get_client_ctx(),
|
||||
itr->id,
|
||||
method,
|
||||
unit,
|
||||
tool::get_config().pc_sampling_interval,
|
||||
get_buffers().pc_sampling_host_trap,
|
||||
flags),
|
||||
"configure PC sampling");
|
||||
}
|
||||
}
|
||||
if(!config_match_found)
|
||||
ROCP_FATAL << "Given PC sampling configuration is not supported on any of the agents";
|
||||
configure_pc_sampling_on_all_agents(buffer_size, buffer_watermark, tool_data);
|
||||
}
|
||||
else if(tool::get_config().pc_sampling_stochastic)
|
||||
{
|
||||
configure_pc_sampling_on_all_agents(buffer_size, buffer_watermark, tool_data);
|
||||
}
|
||||
|
||||
for(auto itr : get_buffers().as_array())
|
||||
@@ -1897,6 +1949,8 @@ tool_fini(void* /*tool_data*/)
|
||||
auto rocdecode_output =
|
||||
tool::rocdecode_buffered_output_t{tool::get_config().rocdecode_api_trace};
|
||||
auto rocjpeg_output = tool::rocjpeg_buffered_output_t{tool::get_config().rocjpeg_api_trace};
|
||||
auto pc_sampling_stochastic_output =
|
||||
tool::pc_sampling_stochastic_buffered_output_t{tool::get_config().pc_sampling_stochastic};
|
||||
|
||||
auto node_id_sort = [](const auto& lhs, const auto& rhs) { return lhs.node_id < rhs.node_id; };
|
||||
auto agents_output = CHECK_NOTNULL(tool_metadata)->agents;
|
||||
@@ -1917,6 +1971,7 @@ tool_fini(void* /*tool_data*/)
|
||||
generate_output(rocdecode_output, num_output, contributions);
|
||||
generate_output(pc_sampling_host_trap_output, num_output, contributions);
|
||||
generate_output(rocjpeg_output, num_output, contributions);
|
||||
generate_output(pc_sampling_stochastic_output, num_output, contributions);
|
||||
|
||||
if(tool::get_config().advanced_thread_trace && !tool::get_config().att_capability.empty() &&
|
||||
!tool_metadata->att_filenames.empty())
|
||||
@@ -1955,9 +2010,10 @@ tool_fini(void* /*tool_data*/)
|
||||
scratch_memory_output.get_generator(),
|
||||
rccl_output.get_generator(),
|
||||
memory_allocation_output.get_generator(),
|
||||
pc_sampling_host_trap_output.get_generator(),
|
||||
rocdecode_output.get_generator(),
|
||||
rocjpeg_output.get_generator());
|
||||
rocjpeg_output.get_generator(),
|
||||
pc_sampling_host_trap_output.get_generator(),
|
||||
pc_sampling_stochastic_output.get_generator());
|
||||
json_ar.finish_process();
|
||||
|
||||
tool::close_json(json_ar);
|
||||
@@ -2074,6 +2130,7 @@ tool_fini(void* /*tool_data*/)
|
||||
destroy_output(pc_sampling_host_trap_output);
|
||||
destroy_output(rocdecode_output);
|
||||
destroy_output(rocjpeg_output);
|
||||
destroy_output(pc_sampling_stochastic_output);
|
||||
|
||||
if(kernel_rename_and_stream_display_pair_dtors != nullptr)
|
||||
{
|
||||
|
||||
@@ -35,6 +35,87 @@
|
||||
|
||||
namespace
|
||||
{
|
||||
#define ROCPROFILER_INSTRUCTION_TYPE_STRING(CODE) \
|
||||
template <> \
|
||||
struct instruction_type_string<CODE> \
|
||||
{ \
|
||||
static constexpr auto name = #CODE; \
|
||||
};
|
||||
|
||||
#define ROCPROFILER_NO_ISSUE_REASON_STRING(CODE) \
|
||||
template <> \
|
||||
struct no_issue_reason_string<CODE> \
|
||||
{ \
|
||||
static constexpr auto name = #CODE; \
|
||||
};
|
||||
|
||||
template <size_t Idx>
|
||||
struct instruction_type_string;
|
||||
|
||||
template <size_t Idx>
|
||||
struct no_issue_reason_string;
|
||||
|
||||
ROCPROFILER_INSTRUCTION_TYPE_STRING(ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NONE);
|
||||
ROCPROFILER_INSTRUCTION_TYPE_STRING(ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_VALU);
|
||||
ROCPROFILER_INSTRUCTION_TYPE_STRING(ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_MATRIX);
|
||||
ROCPROFILER_INSTRUCTION_TYPE_STRING(ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_SCALAR);
|
||||
ROCPROFILER_INSTRUCTION_TYPE_STRING(ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_TEX);
|
||||
ROCPROFILER_INSTRUCTION_TYPE_STRING(ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_LDS);
|
||||
ROCPROFILER_INSTRUCTION_TYPE_STRING(ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_LDS_DIRECT);
|
||||
ROCPROFILER_INSTRUCTION_TYPE_STRING(ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_FLAT);
|
||||
ROCPROFILER_INSTRUCTION_TYPE_STRING(ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_EXPORT);
|
||||
ROCPROFILER_INSTRUCTION_TYPE_STRING(ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_MESSAGE);
|
||||
ROCPROFILER_INSTRUCTION_TYPE_STRING(ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_BARRIER);
|
||||
ROCPROFILER_INSTRUCTION_TYPE_STRING(ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_BRANCH_NOT_TAKEN);
|
||||
ROCPROFILER_INSTRUCTION_TYPE_STRING(ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_BRANCH_TAKEN);
|
||||
ROCPROFILER_INSTRUCTION_TYPE_STRING(ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_JUMP);
|
||||
ROCPROFILER_INSTRUCTION_TYPE_STRING(ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_OTHER);
|
||||
ROCPROFILER_INSTRUCTION_TYPE_STRING(ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_NO_INST);
|
||||
ROCPROFILER_INSTRUCTION_TYPE_STRING(ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_DUAL_VALU);
|
||||
|
||||
ROCPROFILER_NO_ISSUE_REASON_STRING(ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_NONE);
|
||||
ROCPROFILER_NO_ISSUE_REASON_STRING(
|
||||
ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_NO_INSTRUCTION_AVAILABLE);
|
||||
ROCPROFILER_NO_ISSUE_REASON_STRING(
|
||||
ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ALU_DEPENDENCY);
|
||||
ROCPROFILER_NO_ISSUE_REASON_STRING(ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT);
|
||||
ROCPROFILER_NO_ISSUE_REASON_STRING(
|
||||
ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_INTERNAL_INSTRUCTION);
|
||||
ROCPROFILER_NO_ISSUE_REASON_STRING(
|
||||
ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_BARRIER_WAIT);
|
||||
ROCPROFILER_NO_ISSUE_REASON_STRING(
|
||||
ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ARBITER_NOT_WIN);
|
||||
ROCPROFILER_NO_ISSUE_REASON_STRING(
|
||||
ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ARBITER_WIN_EX_STALL);
|
||||
ROCPROFILER_NO_ISSUE_REASON_STRING(
|
||||
ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_OTHER_WAIT);
|
||||
ROCPROFILER_NO_ISSUE_REASON_STRING(
|
||||
ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_SLEEP_WAIT);
|
||||
|
||||
template <size_t Idx, size_t... Tail>
|
||||
const char*
|
||||
get_instruction_type_name(rocprofiler_pc_sampling_instruction_type_t instruction_type,
|
||||
std::index_sequence<Idx, Tail...>)
|
||||
{
|
||||
if(instruction_type == Idx) return instruction_type_string<Idx>::name;
|
||||
// recursion until tail empty
|
||||
if constexpr(sizeof...(Tail) > 0)
|
||||
return get_instruction_type_name(instruction_type, std::index_sequence<Tail...>{});
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
template <size_t Idx, size_t... Tail>
|
||||
const char*
|
||||
get_no_issue_reason_name(rocprofiler_pc_sampling_instruction_not_issued_reason_t no_issue_reason,
|
||||
std::index_sequence<Idx, Tail...>)
|
||||
{
|
||||
if(no_issue_reason == Idx) return no_issue_reason_string<Idx>::name;
|
||||
// recursion until tail empty
|
||||
if constexpr(sizeof...(Tail) > 0)
|
||||
return get_no_issue_reason_name(no_issue_reason, std::index_sequence<Tail...>{});
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief The functions checks if the `ROCPROFILER_PC_SAMPLING_BETA_ENABLED` is set.
|
||||
* If so, it will enable PC sampling API. Otherwise, the API is reported
|
||||
@@ -130,4 +211,22 @@ rocprofiler_query_pc_sampling_agent_configurations(
|
||||
return ROCPROFILER_STATUS_ERROR_NOT_AVAILABLE;
|
||||
#endif
|
||||
}
|
||||
|
||||
const char*
|
||||
rocprofiler_get_pc_sampling_instruction_type_name(
|
||||
rocprofiler_pc_sampling_instruction_type_t instruction_type)
|
||||
{
|
||||
return get_instruction_type_name(
|
||||
instruction_type,
|
||||
std::make_index_sequence<ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_LAST>{});
|
||||
}
|
||||
|
||||
const char*
|
||||
rocprofiler_get_pc_sampling_instruction_not_issued_reason_name(
|
||||
rocprofiler_pc_sampling_instruction_not_issued_reason_t not_issued_reason)
|
||||
{
|
||||
return get_no_issue_reason_name(
|
||||
not_issued_reason,
|
||||
std::make_index_sequence<ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_LAST>{});
|
||||
}
|
||||
}
|
||||
|
||||
+198
-77
@@ -46,17 +46,46 @@ namespace
|
||||
{
|
||||
#define PC_SAMPLING_IOCTL_BITMASK 0xFFFF
|
||||
|
||||
/**
|
||||
* @brief Used to determine the version of PC sampling
|
||||
* IOCTL implementation in the driver.
|
||||
*
|
||||
* @todo Remove this once the KFD IOCTL is upstreamed
|
||||
*/
|
||||
struct pc_sampling_ioctl_version_t
|
||||
#define PC_SAMPLING_IOCTL_COMPUTE_VERSION(major, minor) ROCPROFILER_COMPUTE_VERSION(major, minor, 0)
|
||||
|
||||
using pcs_ioctl_version_t = uint32_t;
|
||||
|
||||
#define KFD_ROCP_PCS_METHOD_PAIR(KFD_ENUM_VAL, ROCP_ENUM_VAL) \
|
||||
template <> \
|
||||
struct pcs_method_pair<KFD_ENUM_VAL> \
|
||||
{ \
|
||||
static constexpr auto rocp_enum_val = ROCP_ENUM_VAL; \
|
||||
};
|
||||
|
||||
template <size_t Idx>
|
||||
struct pcs_method_pair;
|
||||
|
||||
KFD_ROCP_PCS_METHOD_PAIR(ROCPROFILER_IOCTL_PC_SAMPLING_METHOD_KIND_NONE,
|
||||
ROCPROFILER_PC_SAMPLING_METHOD_NONE);
|
||||
KFD_ROCP_PCS_METHOD_PAIR(ROCPROFILER_IOCTL_PC_SAMPLING_METHOD_KIND_HOSTTRAP_V1,
|
||||
ROCPROFILER_PC_SAMPLING_METHOD_HOST_TRAP);
|
||||
KFD_ROCP_PCS_METHOD_PAIR(ROCPROFILER_IOCTL_PC_SAMPLING_METHOD_KIND_STOCHASTIC_V1,
|
||||
ROCPROFILER_PC_SAMPLING_METHOD_STOCHASTIC);
|
||||
|
||||
template <size_t Idx, size_t... Tail>
|
||||
rocprofiler_pc_sampling_method_t
|
||||
get_rocp_pcs_method(rocprofiler_ioctl_pc_sampling_method_kind_t kfd_method,
|
||||
std::index_sequence<Idx, Tail...>)
|
||||
{
|
||||
uint32_t major_version; /// PC sampling IOCTL major version
|
||||
uint32_t minor_version; /// PC sampling IOCTL minor version
|
||||
};
|
||||
if(kfd_method == Idx) return pcs_method_pair<Idx>::rocp_enum_val;
|
||||
// recursion until tail empty
|
||||
if constexpr(sizeof...(Tail) > 0)
|
||||
return get_rocp_pcs_method(kfd_method, std::index_sequence<Tail...>{});
|
||||
// Return none value if matching fails
|
||||
return ROCPROFILER_PC_SAMPLING_METHOD_NONE;
|
||||
}
|
||||
|
||||
rocprofiler_pc_sampling_method_t
|
||||
get_rocp_pcs_method_from_kfd(rocprofiler_ioctl_pc_sampling_method_kind_t kfd_method)
|
||||
{
|
||||
return get_rocp_pcs_method(
|
||||
kfd_method, std::make_index_sequence<ROCPROFILER_IOCTL_PC_SAMPLING_METHOD_KIND_LAST>{});
|
||||
}
|
||||
|
||||
int
|
||||
kfd_open()
|
||||
@@ -137,7 +166,7 @@ get_ioctl_version(rocprofiler_ioctl_version_info_t& ioctl_version)
|
||||
* @return ::rocprofiler_status_t
|
||||
*/
|
||||
rocprofiler_status_t
|
||||
get_pc_sampling_ioctl_version(uint32_t kfd_gpu_id, pc_sampling_ioctl_version_t& pcs_ioctl_version)
|
||||
get_pc_sampling_ioctl_version(uint32_t kfd_gpu_id, pcs_ioctl_version_t* pcs_ioctl_version)
|
||||
{
|
||||
struct kfd_ioctl_pc_sample_args args;
|
||||
args.op = KFD_IOCTL_PCS_OP_QUERY_CAPABILITIES;
|
||||
@@ -172,29 +201,27 @@ get_pc_sampling_ioctl_version(uint32_t kfd_gpu_id, pc_sampling_ioctl_version_t&
|
||||
// `version` field contains PC Sampling IOCTL version
|
||||
auto version = args.version;
|
||||
// Lower 16 bits represent minor version
|
||||
pcs_ioctl_version.minor_version = version & PC_SAMPLING_IOCTL_BITMASK;
|
||||
auto minor_version = version & PC_SAMPLING_IOCTL_BITMASK;
|
||||
// Upper 16 bits represent major version
|
||||
pcs_ioctl_version.major_version = (version >> 16) & PC_SAMPLING_IOCTL_BITMASK;
|
||||
auto major_version = (version >> 16) & PC_SAMPLING_IOCTL_BITMASK;
|
||||
// finally, compute the version
|
||||
*pcs_ioctl_version = PC_SAMPLING_IOCTL_COMPUTE_VERSION(major_version, minor_version);
|
||||
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Check if PC sampling is supported on the device with @p kfd_gpu_id.
|
||||
* @brief Check if PC sampling feature is supported in KFD.
|
||||
*
|
||||
* Starting from KFD IOCTL 1.16, KFD delivers beta implementation of the PC sampling.
|
||||
* Furthermore, ROCProfiler-SDK expects PC sampling IOCTL 0.1 version.
|
||||
* @todo: Once KFD is upstreamed, ROCProfiler-SDK will rely only on KFD IOCTL version.
|
||||
*
|
||||
* @return ::rocprofiler_status_t
|
||||
* @retval ::ROCPROFILER_STATUS_SUCCESS PC sampling is supported in the driver.
|
||||
* Other values informs users about the reason why PC sampling is not supported.
|
||||
*/
|
||||
rocprofiler_status_t
|
||||
is_pc_sampling_supported(const rocprofiler_agent_t* agent)
|
||||
is_pc_sampling_supported()
|
||||
{
|
||||
auto kfd_gpu_id = agent->gpu_id;
|
||||
std::string_view agent_name = agent->name;
|
||||
// Verify KFD 1.16 version
|
||||
rocprofiler_ioctl_version_info_t ioctl_version = {.major_version = 0, .minor_version = 0};
|
||||
auto status = get_ioctl_version(ioctl_version);
|
||||
@@ -208,58 +235,131 @@ is_pc_sampling_supported(const rocprofiler_agent_t* agent)
|
||||
return ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_KERNEL;
|
||||
}
|
||||
|
||||
// TODO: remove once KFD is upstreamed
|
||||
// Verify PC sampling IOCTL version
|
||||
pc_sampling_ioctl_version_t pcs_ioctl_version = {.major_version = 0, .minor_version = 0};
|
||||
status = get_pc_sampling_ioctl_version(kfd_gpu_id, pcs_ioctl_version);
|
||||
if(status != ROCPROFILER_STATUS_SUCCESS)
|
||||
// PC Sampling feature is supported in the driver.
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Check if PC sampling method is supported on the agent.
|
||||
*
|
||||
* The function complements the @ref is_pc_sampling_supported function.
|
||||
* It introduces a strict check against the PC sampling IOCTL version
|
||||
* that tells us whether a certain PC sampling method is safe to be used
|
||||
* on the specific device architecture.
|
||||
*
|
||||
* @param method - PC sampling method to be checked
|
||||
* @param agent - The agent to be checked
|
||||
* @param pcs_ioctl_version - The PC sampling IOCTL version
|
||||
* @return ::rocprofiler_status_t
|
||||
* @retval ::ROCPROFILER_STATUS_SUCCESS - The method is supported
|
||||
* Other values informs users about the reason why the method is not supported.
|
||||
*/
|
||||
rocprofiler_status_t
|
||||
is_pc_sampling_method_supported(rocprofiler_pc_sampling_method_t method,
|
||||
const rocprofiler_agent_t* agent,
|
||||
pcs_ioctl_version_t pcs_ioctl_version)
|
||||
{
|
||||
std::string_view agent_name = agent->name;
|
||||
if(method == ROCPROFILER_PC_SAMPLING_METHOD_HOST_TRAP)
|
||||
{
|
||||
// The reason for not emitting the "PC sampling unavailable" message is the following.
|
||||
// Assume that all devices except one support PC sampling on the system.
|
||||
// By emitting the message for that one device that doesn't support PC sampling,
|
||||
// all tests and samples are skipped. Instead, tests and samples will ignore
|
||||
// that one problematic device and continue using PC sampling on other devices
|
||||
// that support this feature.
|
||||
return status;
|
||||
if(agent_name == "gfx90a")
|
||||
{
|
||||
// 0.1 version enables host-trap PC sampling on gfx90a
|
||||
if(pcs_ioctl_version >= PC_SAMPLING_IOCTL_COMPUTE_VERSION(0, 1))
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
else
|
||||
return ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_KERNEL;
|
||||
}
|
||||
else if(agent_name.find("gfx94") == 0)
|
||||
{
|
||||
// 0.3 version enables host-trap PC sampling on gfx940, gfx941, gfx942, etc.
|
||||
if(pcs_ioctl_version >= PC_SAMPLING_IOCTL_COMPUTE_VERSION(0, 3))
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
else
|
||||
return ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_KERNEL;
|
||||
}
|
||||
else if(agent_name.find("gfx95") == 0)
|
||||
{
|
||||
// 1.2 version enables host-trap PC sampling on gfx950
|
||||
if(pcs_ioctl_version >= PC_SAMPLING_IOCTL_COMPUTE_VERSION(1, 2))
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
else
|
||||
return ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_KERNEL;
|
||||
}
|
||||
}
|
||||
else if(agent_name == "gfx90a")
|
||||
else if(method == ROCPROFILER_PC_SAMPLING_METHOD_STOCHASTIC)
|
||||
{
|
||||
// For gfx90a, we expect PC sampling IOCTL to be at least 0.1.
|
||||
if(pcs_ioctl_version.major_version > 0 || pcs_ioctl_version.minor_version >= 1)
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
else
|
||||
return ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_KERNEL;
|
||||
}
|
||||
else if(agent_name.find("gfx94") == 0)
|
||||
{
|
||||
// We expect PC sampling IOCTL to be at least 0.3 for gfx940, gfx941, gfx942, etc.
|
||||
if(pcs_ioctl_version.major_version > 0 || pcs_ioctl_version.minor_version >= 3)
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
else
|
||||
return ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_KERNEL;
|
||||
}
|
||||
else if(agent_name.find("gfx95") == 0)
|
||||
{
|
||||
// As I am not sure if the PCS IOCTL is going to be bumped for gfx950,
|
||||
// I introduced a separate branch for it.
|
||||
// We expect PC sampling IOCTL to be at least 0.3 for gfx950.
|
||||
if(pcs_ioctl_version.major_version > 0 || pcs_ioctl_version.minor_version >= 3)
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
else
|
||||
return ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_KERNEL;
|
||||
if(agent_name == "gfx90a")
|
||||
{
|
||||
// gfx90a doesn't support stochastic PC sampling
|
||||
return ROCPROFILER_STATUS_ERROR_NOT_AVAILABLE;
|
||||
}
|
||||
else if(agent_name.find("gfx94") == 0)
|
||||
{
|
||||
// 1.3 version enables stochastic PC sampling on gfx940, gfx941, gfx942, etc.
|
||||
if(pcs_ioctl_version >= PC_SAMPLING_IOCTL_COMPUTE_VERSION(1, 3))
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
else
|
||||
return ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_KERNEL;
|
||||
}
|
||||
else if(agent_name.find("gfx95") == 0)
|
||||
{
|
||||
// 1.4 version enables stochastic PC sampling on gfx950
|
||||
if(pcs_ioctl_version >= PC_SAMPLING_IOCTL_COMPUTE_VERSION(1, 4))
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
else
|
||||
return ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_KERNEL;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// The agent does not support PC sampling.
|
||||
return ROCPROFILER_STATUS_ERROR_NOT_AVAILABLE;
|
||||
return ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
// Other architecture do not support the PC sampling method.
|
||||
return ROCPROFILER_STATUS_ERROR_NOT_AVAILABLE;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Returns the PC sampling IOCTL version if the PC sampling feature is supported in the
|
||||
* driver.
|
||||
*
|
||||
* First, check the minimal driver version via @ref is_pc_sampling_supported.
|
||||
* Then, determines the PC sampling IOCTL version via @ref get_pc_sampling_ioctl_version.
|
||||
*
|
||||
* @param [in] kfd_gpu_id - The KFD GPU identifier
|
||||
* @param [out] pcs_ioctl_version_t - The PC sampling IOCTL version
|
||||
* @return ::rocprofiler_status_t
|
||||
*/
|
||||
rocprofiler_status_t
|
||||
get_pcs_ioctl_version_if_kfd_supports(uint32_t kfd_gpu_id, pcs_ioctl_version_t* pcs_ioctl_version)
|
||||
{
|
||||
// Check if the PC sampling feature is supported in the driver
|
||||
auto status = is_pc_sampling_supported();
|
||||
if(status != ROCPROFILER_STATUS_SUCCESS) return status;
|
||||
|
||||
// Get the PC sampling IOCTL version
|
||||
status = get_pc_sampling_ioctl_version(kfd_gpu_id, pcs_ioctl_version);
|
||||
return status;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Same as @ref is_pc_sampling_method_supported.
|
||||
*/
|
||||
rocprofiler_status_t
|
||||
is_pc_sampling_method_supported(rocprofiler_ioctl_pc_sampling_method_kind_t ioctl_method,
|
||||
const rocprofiler_agent_t* agent,
|
||||
pcs_ioctl_version_t pcs_ioctl_version)
|
||||
{
|
||||
auto rocp_method = get_rocp_pcs_method_from_kfd(ioctl_method);
|
||||
return is_pc_sampling_method_supported(rocp_method, agent, pcs_ioctl_version);
|
||||
}
|
||||
|
||||
/**
|
||||
* @kfd_gpu_id represents the gpu identifier read from the content of the
|
||||
* /sys/class/kfd/kfd/topology/nodes/<node-id>/gpu_id.
|
||||
*/
|
||||
ROCPROFILER_IOCTL_STATUS
|
||||
rocprofiler_ioctl_status_t
|
||||
ioctl_query_pc_sampling_capabilities(uint32_t kfd_gpu_id,
|
||||
void* sample_info,
|
||||
uint32_t sample_info_sz,
|
||||
@@ -366,8 +466,9 @@ get_kfd_fd()
|
||||
rocprofiler_status_t
|
||||
ioctl_query_pcs_configs(const rocprofiler_agent_t* agent, rocp_pcs_cfgs_vec_t& rocp_configs)
|
||||
{
|
||||
if(auto status = is_pc_sampling_supported(agent); status != ROCPROFILER_STATUS_SUCCESS)
|
||||
return status;
|
||||
pcs_ioctl_version_t pcs_ioctl_version = 0;
|
||||
auto status = get_pcs_ioctl_version_if_kfd_supports(agent->gpu_id, &pcs_ioctl_version);
|
||||
if(status != ROCPROFILER_STATUS_SUCCESS) return status;
|
||||
|
||||
uint32_t kfd_gpu_id = agent->gpu_id;
|
||||
|
||||
@@ -400,8 +501,15 @@ ioctl_query_pcs_configs(const rocprofiler_agent_t* agent, rocp_pcs_cfgs_vec_t& r
|
||||
{
|
||||
// FIXME: Why this happens?
|
||||
if(ioctl_cfg.method == 0) continue;
|
||||
// Skip showing stochastic sampling until it's fully supported.
|
||||
if(ioctl_cfg.method == ROCPROFILER_IOCTL_PC_SAMPLING_METHOD_KIND_STOCHASTIC_V1) continue;
|
||||
|
||||
// Strict check whether the driver version (safely) supports the sampling method for
|
||||
// this specific device architecture.
|
||||
// If not, skip showing this configuration to the user, as it's not safe to use this
|
||||
// sampling method on this device.
|
||||
if(is_pc_sampling_method_supported(ioctl_cfg.method, agent, pcs_ioctl_version) !=
|
||||
ROCPROFILER_STATUS_SUCCESS)
|
||||
continue;
|
||||
|
||||
auto rocp_cfg = common::init_public_api_struct(rocprofiler_pc_sampling_configuration_t{});
|
||||
auto rocp_ret = convert_ioctl_pcs_config_to_rocp(ioctl_cfg, rocp_cfg);
|
||||
if(rocp_ret != ROCPROFILER_STATUS_SUCCESS)
|
||||
@@ -470,12 +578,15 @@ ioctl_pcs_create(const rocprofiler_agent_t* agent,
|
||||
uint64_t interval,
|
||||
uint32_t* ioctl_pcs_id)
|
||||
{
|
||||
if(auto status = is_pc_sampling_supported(agent); status != ROCPROFILER_STATUS_SUCCESS)
|
||||
return status;
|
||||
pcs_ioctl_version_t pcs_ioctl_version = 0;
|
||||
auto status = get_pcs_ioctl_version_if_kfd_supports(agent->gpu_id, &pcs_ioctl_version);
|
||||
if(status != ROCPROFILER_STATUS_SUCCESS) return status;
|
||||
|
||||
// Block configuring stochastic sampling until it's fully supported.
|
||||
if(method == ROCPROFILER_PC_SAMPLING_METHOD_STOCHASTIC)
|
||||
return ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
// Strict check: whether the driver version (safely) supports the sampling method for
|
||||
// this specific device architecture. If not, return an error and prevent the user from
|
||||
// using this sampling method on this device.
|
||||
status = is_pc_sampling_method_supported(method, agent, pcs_ioctl_version);
|
||||
if(status != ROCPROFILER_STATUS_SUCCESS) return status;
|
||||
|
||||
rocprofiler_ioctl_pc_sampling_info_t ioctl_cfg;
|
||||
auto ret = create_ioctl_pcs_config_from_rocp(ioctl_cfg, method, unit, interval);
|
||||
@@ -501,17 +612,27 @@ ioctl_pcs_create(const rocprofiler_agent_t* agent,
|
||||
auto ioctl_ret = ioctl(get_kfd_fd(), AMDKFD_IOC_PC_SAMPLE, &args);
|
||||
*ioctl_pcs_id = args.trace_id;
|
||||
|
||||
if(ioctl_ret != 0 && (errno == EBUSY || errno == EEXIST))
|
||||
if(ioctl_ret != 0)
|
||||
{
|
||||
// Currently, KFD uses EBUSY when e.g., PC sampling create is requested from
|
||||
// withing the ROCgdb.
|
||||
// On the other hand, EEXIST is used when one tries to create a PC sampling
|
||||
// with a configuration different than the one already active.
|
||||
return ROCPROFILER_STATUS_ERROR_NOT_AVAILABLE;
|
||||
}
|
||||
else if(ioctl_ret != 0)
|
||||
{
|
||||
return ROCPROFILER_STATUS_ERROR;
|
||||
if(errno == EBUSY || errno == EEXIST)
|
||||
{
|
||||
// Currently, KFD uses EBUSY when e.g., PC sampling create is requested from
|
||||
// withing the ROCgdb.
|
||||
// On the other hand, EEXIST is used when one tries to create a PC sampling
|
||||
// with a configuration different than the one already active.
|
||||
return ROCPROFILER_STATUS_ERROR_NOT_AVAILABLE;
|
||||
}
|
||||
else if(errno == EINVAL)
|
||||
{
|
||||
// invalid argument (e.g., interval must be power of 2, but a value that's
|
||||
// not power of 2 is provided)
|
||||
return ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
else
|
||||
{
|
||||
// generic error
|
||||
return ROCPROFILER_STATUS_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
|
||||
+14
-12
@@ -43,7 +43,7 @@ namespace ioctl
|
||||
// 4. ROCPROFILER_IOCTL_STATUS_UNAVAILABLE
|
||||
// We might replace 1, 2, and 4 with rocprofiler_status_t, but still lacking a counterpart
|
||||
// for the ROCPROFILER_IOCTL_STATUS_BUFFER_TOO_SMALL
|
||||
typedef enum _ROCPROFILER_IOCTL_STATUS
|
||||
typedef enum rocprofiler_ioctl_status_t
|
||||
{
|
||||
ROCPROFILER_IOCTL_STATUS_SUCCESS = 0, /// Operation successful // USED
|
||||
ROCPROFILER_IOCTL_STATUS_ERROR = 1, /// General error return if not otherwise specified // USED
|
||||
@@ -72,7 +72,7 @@ typedef enum _ROCPROFILER_IOCTL_STATUS
|
||||
ROCPROFILER_IOCTL_STATUS_MEMORY_ALREADY_REGISTERED = 35, /// Memory buffer already registered
|
||||
ROCPROFILER_IOCTL_STATUS_MEMORY_NOT_REGISTERED = 36, /// Memory buffer not registered
|
||||
ROCPROFILER_IOCTL_STATUS_MEMORY_ALIGNMENT = 37, /// Memory parameter not aligned
|
||||
} ROCPROFILER_IOCTL_STATUS;
|
||||
} rocprofiler_ioctl_status_t;
|
||||
|
||||
typedef struct rocprofiler_ioctl_version_info_s
|
||||
{
|
||||
@@ -80,27 +80,29 @@ typedef struct rocprofiler_ioctl_version_info_s
|
||||
uint32_t minor_version; /// supported IOCTL interface minor version
|
||||
} rocprofiler_ioctl_version_info_t;
|
||||
|
||||
typedef enum _ROCPROFILER_IOCTL_PC_SAMPLING_METHOD_KIND
|
||||
typedef enum rocprofiler_ioctl_pc_sampling_method_kind_t
|
||||
{
|
||||
ROCPROFILER_IOCTL_PC_SAMPLING_METHOD_KIND_NONE = 0,
|
||||
ROCPROFILER_IOCTL_PC_SAMPLING_METHOD_KIND_HOSTTRAP_V1 = 1,
|
||||
ROCPROFILER_IOCTL_PC_SAMPLING_METHOD_KIND_STOCHASTIC_V1,
|
||||
} ROCPROFILER_IOCTL_PC_SAMPLING_METHOD_KIND;
|
||||
ROCPROFILER_IOCTL_PC_SAMPLING_METHOD_KIND_LAST,
|
||||
} rocprofiler_ioctl_pc_sampling_method_kind_t;
|
||||
|
||||
typedef enum _ROCPROFILER_IOCTL_PC_SAMPLING_UNITS
|
||||
typedef enum rocprofiler_ioctl_pc_sampling_unit_interval_t
|
||||
{
|
||||
ROCPROFILER_IOCTL_PC_SAMPLING_UNIT_INTERVAL_MICROSECONDS,
|
||||
ROCPROFILER_IOCTL_PC_SAMPLING_UNIT_INTERVAL_CYCLES,
|
||||
ROCPROFILER_IOCTL_PC_SAMPLING_UNIT_INTERVAL_INSTRUCTIONS,
|
||||
} ROCPROFILER_IOCTL_PC_SAMPLING_UNIT_INTERVAL;
|
||||
} rocprofiler_ioctl_pc_sampling_unit_interval_t;
|
||||
|
||||
typedef struct rocprofiler_ioctl_pc_sampling_info_s
|
||||
{
|
||||
uint64_t interval;
|
||||
uint64_t interval_min;
|
||||
uint64_t interval_max;
|
||||
uint64_t flags;
|
||||
ROCPROFILER_IOCTL_PC_SAMPLING_METHOD_KIND method;
|
||||
ROCPROFILER_IOCTL_PC_SAMPLING_UNIT_INTERVAL units;
|
||||
uint64_t interval;
|
||||
uint64_t interval_min;
|
||||
uint64_t interval_max;
|
||||
uint64_t flags;
|
||||
rocprofiler_ioctl_pc_sampling_method_kind_t method;
|
||||
rocprofiler_ioctl_pc_sampling_unit_interval_t units;
|
||||
} rocprofiler_ioctl_pc_sampling_info_t;
|
||||
|
||||
} // namespace ioctl
|
||||
|
||||
+1
-1
@@ -1,7 +1,7 @@
|
||||
set(ROCPROFILER_LIB_PC_SAMPLING_PARSER_SOURCES pc_record_interface.cpp)
|
||||
set(ROCPROFILER_LIB_PC_SAMPLING_PARSER_HEADERS
|
||||
correlation.hpp gfx9.hpp gfx11.hpp parser_types.hpp pc_record_interface.hpp rocr.h
|
||||
stochastic_records.h translation.hpp)
|
||||
translation.hpp)
|
||||
|
||||
target_sources(
|
||||
rocprofiler-sdk-object-library PRIVATE ${ROCPROFILER_LIB_PC_SAMPLING_PARSER_SOURCES}
|
||||
|
||||
+2
@@ -241,6 +241,8 @@ add_upcoming_samples(const device_handle device,
|
||||
|
||||
auto& pc_sample = samples[p];
|
||||
pc_sample = copySample<GFXIP, PcSamplingRecordT>((const void*) (buffer + p));
|
||||
// skip invalid samples
|
||||
if(pc_sample.size == 0) continue;
|
||||
|
||||
// Convert PC -> (loaded code object id containing PC, offset within code object)
|
||||
if(!cache_addr_range.inrange(snap->pc))
|
||||
|
||||
@@ -32,7 +32,7 @@ public:
|
||||
TYPE_TEX,
|
||||
TYPE_LDS,
|
||||
TYPE_LDS_DIRECT,
|
||||
TYPE_EXP,
|
||||
TYPE_EXPORT,
|
||||
TYPE_MESSAGE,
|
||||
TYPE_BARRIER,
|
||||
TYPE_BRANCH_NOT_TAKEN,
|
||||
@@ -47,15 +47,15 @@ public:
|
||||
|
||||
enum reason_not_issued
|
||||
{
|
||||
REASON_NOT_AVAILABLE = 0,
|
||||
REASON_ALU,
|
||||
REASON_NO_INSTRUCTION_AVAILABLE = 0,
|
||||
REASON_ALU_DEPENDENCY,
|
||||
REASON_WAITCNT,
|
||||
REASON_ARBITER,
|
||||
REASON_SLEEP,
|
||||
REASON_BARRIER,
|
||||
REASON_ARBITER_NOT_WIN,
|
||||
REASON_SLEEP_WAIT,
|
||||
REASON_BARRIER_WAIT,
|
||||
REASON_OTHER_WAIT,
|
||||
REASON_INTERNAL = 31,
|
||||
REASON_EX_STALL = 31,
|
||||
REASON_INTERNAL_INSTRUCTION = 31,
|
||||
REASON_ARBITER_WIN_EX_STALL = 31,
|
||||
};
|
||||
|
||||
enum arb_state
|
||||
|
||||
+18
-15
@@ -25,6 +25,7 @@
|
||||
class GFX9
|
||||
{
|
||||
public:
|
||||
// matches values specified in perf_snapshot_data register
|
||||
enum inst_type_issued
|
||||
{
|
||||
TYPE_VALU = 0,
|
||||
@@ -33,7 +34,7 @@ public:
|
||||
TYPE_TEX,
|
||||
TYPE_LDS,
|
||||
TYPE_FLAT,
|
||||
TYPE_EXP,
|
||||
TYPE_EXPORT,
|
||||
TYPE_MESSAGE,
|
||||
TYPE_BARRIER,
|
||||
TYPE_BRANCH_NOT_TAKEN,
|
||||
@@ -46,30 +47,32 @@ public:
|
||||
TYPE_LDS_DIRECT = 31
|
||||
};
|
||||
|
||||
// matces values specified in perf_snapshot_data register
|
||||
enum reason_not_issued
|
||||
{
|
||||
REASON_NOT_AVAILABLE = 0,
|
||||
REASON_ALU,
|
||||
REASON_NO_INSTRUCTION_AVAILABLE = 0,
|
||||
REASON_ALU_DEPENDENCY,
|
||||
REASON_WAITCNT,
|
||||
REASON_INTERNAL,
|
||||
REASON_BARRIER,
|
||||
REASON_ARBITER,
|
||||
REASON_EX_STALL,
|
||||
REASON_INTERNAL_INSTRUCTION,
|
||||
REASON_BARRIER_WAIT,
|
||||
REASON_ARBITER_NOT_WIN,
|
||||
REASON_ARBITER_WIN_EX_STALL,
|
||||
REASON_OTHER_WAIT,
|
||||
REASON_LAST,
|
||||
REASON_SLEEP = 31
|
||||
REASON_SLEEP_WAIT = 31
|
||||
};
|
||||
|
||||
// matches the order of arb_state bits in perf_snapshot_data register
|
||||
enum arb_state
|
||||
{
|
||||
ISSUE_VALU = 0,
|
||||
ISSUE_MATRIX,
|
||||
ISSUE_SCALAR,
|
||||
ISSUE_VMEM_TEX,
|
||||
ISSUE_LDS,
|
||||
ISSUE_FLAT,
|
||||
ISSUE_MISC = 0,
|
||||
ISSUE_EXP,
|
||||
ISSUE_MISC,
|
||||
ISSUE_FLAT,
|
||||
ISSUE_LDS,
|
||||
ISSUE_VMEM_TEX,
|
||||
ISSUE_SCALAR,
|
||||
ISSUE_MATRIX,
|
||||
ISSUE_VALU,
|
||||
ISSUE_LAST,
|
||||
ISSUE_LDS_DIRECT = 31,
|
||||
ISSUE_BRMSG = 31,
|
||||
|
||||
+40
-2
@@ -22,6 +22,8 @@
|
||||
|
||||
#include "lib/rocprofiler-sdk/pc_sampling/parser/pc_record_interface.hpp"
|
||||
|
||||
#include "lib/common/utility.hpp"
|
||||
|
||||
template <>
|
||||
uint64_t
|
||||
PCSamplingParserContext::alloc<rocprofiler_pc_sampling_record_host_trap_v0_t>(
|
||||
@@ -127,6 +129,43 @@ PCSamplingParserContext::shouldFlipRocrBuffer(const dispatch_pkt_id_t& pkt) cons
|
||||
return corr_map->checkDispatch(pkt);
|
||||
}
|
||||
|
||||
template <typename PcSamplingRecordKindT>
|
||||
inline void
|
||||
emplace_records_in_buffer(rocprofiler::buffer::instance* buff,
|
||||
const PcSamplingRecordKindT* samples,
|
||||
size_t num_samples,
|
||||
rocprofiler_pc_sampling_record_kind_t record_kind)
|
||||
{
|
||||
for(size_t i = 0; i < num_samples; i++)
|
||||
buff->emplace(ROCPROFILER_BUFFER_CATEGORY_PC_SAMPLING, record_kind, samples[i]);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline void
|
||||
emplace_records_in_buffer<rocprofiler_pc_sampling_record_stochastic_v0_t>(
|
||||
rocprofiler::buffer::instance* buff,
|
||||
const rocprofiler_pc_sampling_record_stochastic_v0_t* samples,
|
||||
size_t num_samples,
|
||||
rocprofiler_pc_sampling_record_kind_t record_kind)
|
||||
{
|
||||
for(size_t i = 0; i < num_samples; i++)
|
||||
{
|
||||
if(samples[i].size == 0)
|
||||
{
|
||||
// `size == 0` internally means invalid sample, so generate it.
|
||||
auto invalid_sample = rocprofiler::common::init_public_api_struct(
|
||||
rocprofiler_pc_sampling_record_invalid_t{});
|
||||
buff->emplace(ROCPROFILER_BUFFER_CATEGORY_PC_SAMPLING,
|
||||
ROCPROFILER_PC_SAMPLING_RECORD_INVALID_SAMPLE,
|
||||
invalid_sample);
|
||||
}
|
||||
else
|
||||
{
|
||||
buff->emplace(ROCPROFILER_BUFFER_CATEGORY_PC_SAMPLING, record_kind, samples[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename PcSamplingRecordKindT>
|
||||
void
|
||||
PCSamplingParserContext::generate_upcoming_pc_record(
|
||||
@@ -141,8 +180,7 @@ PCSamplingParserContext::generate_upcoming_pc_record(
|
||||
if(!buff)
|
||||
throw std::runtime_error(fmt::format("Buffer with id: {} does not exists", buff_id.handle));
|
||||
|
||||
for(size_t i = 0; i < num_samples; i++)
|
||||
buff->emplace(ROCPROFILER_BUFFER_CATEGORY_PC_SAMPLING, record_kind, samples[i]);
|
||||
emplace_records_in_buffer(buff, samples, num_samples, record_kind);
|
||||
}
|
||||
|
||||
template <>
|
||||
|
||||
+1
-1
@@ -25,9 +25,9 @@
|
||||
#include "lib/rocprofiler-sdk/buffer.hpp"
|
||||
#include "lib/rocprofiler-sdk/pc_sampling/parser/correlation.hpp"
|
||||
#include "lib/rocprofiler-sdk/pc_sampling/parser/parser_types.hpp"
|
||||
#include "lib/rocprofiler-sdk/pc_sampling/parser/stochastic_records.h"
|
||||
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
#include <rocprofiler-sdk/pc_sampling.h>
|
||||
#include <rocprofiler-sdk/cxx/hash.hpp>
|
||||
#include <rocprofiler-sdk/cxx/operators.hpp>
|
||||
|
||||
|
||||
+311
-232
@@ -30,75 +30,113 @@
|
||||
#include <rocprofiler-sdk/cxx/operators.hpp>
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <cassert>
|
||||
#include <cstddef>
|
||||
|
||||
#define GFXIP_MAJOR 9
|
||||
|
||||
#define TYPECHECK(x) \
|
||||
snapshots.push_back(rocprofiler_pc_sampling_snapshot_v1_t{.dual_issue_valu = 0, \
|
||||
.inst_type = ::PCSAMPLE::x, \
|
||||
.reason_not_issued = 0, \
|
||||
.arb_state_issue = 0, \
|
||||
.arb_state_stall = 0});
|
||||
#define UNROLL_TYPECHECK() \
|
||||
TYPECHECK(TYPE_VALU); \
|
||||
TYPECHECK(TYPE_MATRIX); \
|
||||
TYPECHECK(TYPE_SCALAR); \
|
||||
TYPECHECK(TYPE_TEX); \
|
||||
TYPECHECK(TYPE_LDS); \
|
||||
TYPECHECK(TYPE_FLAT); \
|
||||
TYPECHECK(TYPE_EXP); \
|
||||
TYPECHECK(TYPE_MESSAGE); \
|
||||
TYPECHECK(TYPE_BARRIER); \
|
||||
TYPECHECK(TYPE_BRANCH_NOT_TAKEN); \
|
||||
TYPECHECK(TYPE_BRANCH_TAKEN); \
|
||||
TYPECHECK(TYPE_JUMP); \
|
||||
TYPECHECK(TYPE_OTHER); \
|
||||
TYPECHECK(TYPE_NO_INST);
|
||||
#define RECORD_INST_TYPE(x) \
|
||||
{ \
|
||||
PcSamplingRecordT sample{}; \
|
||||
sample.inst_type = ROCPROFILER_PC_SAMPLING_INSTRUCTION##_##x; \
|
||||
snapshots.push_back(sample); \
|
||||
}
|
||||
|
||||
#define REASONCHECK(x) \
|
||||
snapshots.push_back(rocprofiler_pc_sampling_snapshot_v1_t{.dual_issue_valu = 0, \
|
||||
.inst_type = 0, \
|
||||
.reason_not_issued = ::PCSAMPLE::x, \
|
||||
.arb_state_issue = 0, \
|
||||
.arb_state_stall = 0});
|
||||
#define UNROLL_REASONCHECK(x) \
|
||||
REASONCHECK(REASON_NOT_AVAILABLE); \
|
||||
REASONCHECK(REASON_ALU); \
|
||||
REASONCHECK(REASON_WAITCNT); \
|
||||
REASONCHECK(REASON_INTERNAL); \
|
||||
REASONCHECK(REASON_BARRIER); \
|
||||
REASONCHECK(REASON_ARBITER); \
|
||||
REASONCHECK(REASON_EX_STALL); \
|
||||
REASONCHECK(REASON_OTHER_WAIT);
|
||||
#define GENERATE_RECORDS_INST_TYPE() \
|
||||
RECORD_INST_TYPE(TYPE_VALU); \
|
||||
RECORD_INST_TYPE(TYPE_MATRIX); \
|
||||
RECORD_INST_TYPE(TYPE_SCALAR); \
|
||||
RECORD_INST_TYPE(TYPE_TEX); \
|
||||
RECORD_INST_TYPE(TYPE_LDS); \
|
||||
RECORD_INST_TYPE(TYPE_FLAT); \
|
||||
RECORD_INST_TYPE(TYPE_EXPORT); \
|
||||
RECORD_INST_TYPE(TYPE_MESSAGE); \
|
||||
RECORD_INST_TYPE(TYPE_BARRIER); \
|
||||
RECORD_INST_TYPE(TYPE_BRANCH_NOT_TAKEN); \
|
||||
RECORD_INST_TYPE(TYPE_BRANCH_TAKEN); \
|
||||
RECORD_INST_TYPE(TYPE_JUMP); \
|
||||
RECORD_INST_TYPE(TYPE_OTHER); \
|
||||
RECORD_INST_TYPE(TYPE_NO_INST);
|
||||
|
||||
#define ARBCHECK1(x, y) \
|
||||
snapshots.push_back( \
|
||||
rocprofiler_pc_sampling_snapshot_v1_t{.dual_issue_valu = 0, \
|
||||
.inst_type = 0, \
|
||||
.reason_not_issued = 0, \
|
||||
.arb_state_issue = 1 << ::PCSAMPLE::x, \
|
||||
.arb_state_stall = 1 << ::PCSAMPLE::y});
|
||||
#define ARBCHECK2(x) \
|
||||
ARBCHECK1(x, ISSUE_VALU); \
|
||||
ARBCHECK1(x, ISSUE_MATRIX); \
|
||||
ARBCHECK1(x, ISSUE_SCALAR); \
|
||||
ARBCHECK1(x, ISSUE_VMEM_TEX); \
|
||||
ARBCHECK1(x, ISSUE_LDS); \
|
||||
ARBCHECK1(x, ISSUE_FLAT); \
|
||||
ARBCHECK1(x, ISSUE_EXP); \
|
||||
ARBCHECK1(x, ISSUE_MISC);
|
||||
#define RECORD_NOT_ISSUED_REASON(x) \
|
||||
{ \
|
||||
PcSamplingRecordT sample{}; \
|
||||
sample.snapshot.reason_not_issued = ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED##_##x; \
|
||||
snapshots.push_back(sample); \
|
||||
}
|
||||
|
||||
#define UNROLL_ARBCHECK() \
|
||||
ARBCHECK2(ISSUE_VALU); \
|
||||
ARBCHECK2(ISSUE_MATRIX); \
|
||||
ARBCHECK2(ISSUE_SCALAR); \
|
||||
ARBCHECK2(ISSUE_VMEM_TEX); \
|
||||
ARBCHECK2(ISSUE_LDS); \
|
||||
ARBCHECK2(ISSUE_FLAT); \
|
||||
ARBCHECK2(ISSUE_EXP); \
|
||||
ARBCHECK2(ISSUE_MISC);
|
||||
#define GENERATE_RECORDS_NOT_ISSUED_REASON(x) \
|
||||
RECORD_NOT_ISSUED_REASON(REASON_NO_INSTRUCTION_AVAILABLE); \
|
||||
RECORD_NOT_ISSUED_REASON(REASON_ALU_DEPENDENCY); \
|
||||
RECORD_NOT_ISSUED_REASON(REASON_WAITCNT); \
|
||||
RECORD_NOT_ISSUED_REASON(REASON_INTERNAL_INSTRUCTION); \
|
||||
RECORD_NOT_ISSUED_REASON(REASON_BARRIER_WAIT); \
|
||||
RECORD_NOT_ISSUED_REASON(REASON_ARBITER_NOT_WIN); \
|
||||
RECORD_NOT_ISSUED_REASON(REASON_ARBITER_WIN_EX_STALL); \
|
||||
RECORD_NOT_ISSUED_REASON(REASON_OTHER_WAIT);
|
||||
|
||||
#define RECORD_ARBSTATE_ISSUE_STALL(x, y) \
|
||||
{ \
|
||||
PcSamplingRecordT sample{}; \
|
||||
sample.snapshot.arb_state##_##x = 1; \
|
||||
sample.snapshot.arb_state##_##y = 1; \
|
||||
snapshots.push_back(sample); \
|
||||
}
|
||||
|
||||
// Respecting the order of elements in GFX9:arb_state that match the order of arb_state bits
|
||||
// in perf_snapshot_data register
|
||||
#define RECORD_ARBSTATE_ISSUE(x) \
|
||||
RECORD_ARBSTATE_ISSUE_STALL(x, stall_misc); \
|
||||
RECORD_ARBSTATE_ISSUE_STALL(x, stall_exp); \
|
||||
RECORD_ARBSTATE_ISSUE_STALL(x, stall_flat); \
|
||||
RECORD_ARBSTATE_ISSUE_STALL(x, stall_lds); \
|
||||
RECORD_ARBSTATE_ISSUE_STALL(x, stall_vmem_tex); \
|
||||
RECORD_ARBSTATE_ISSUE_STALL(x, stall_scalar); \
|
||||
RECORD_ARBSTATE_ISSUE_STALL(x, stall_matrix); \
|
||||
RECORD_ARBSTATE_ISSUE_STALL(x, stall_valu);
|
||||
|
||||
// Respecting the order of elements in GFX9:arb_state that match the order of arb_state bits
|
||||
// in perf_snapshot_data register
|
||||
#define GENERATE_RECORDS_ARBSTATE_ISSUE() \
|
||||
RECORD_ARBSTATE_ISSUE(issue_misc); \
|
||||
RECORD_ARBSTATE_ISSUE(issue_exp); \
|
||||
RECORD_ARBSTATE_ISSUE(issue_flat); \
|
||||
RECORD_ARBSTATE_ISSUE(issue_lds); \
|
||||
RECORD_ARBSTATE_ISSUE(issue_vmem_tex); \
|
||||
RECORD_ARBSTATE_ISSUE(issue_scalar); \
|
||||
RECORD_ARBSTATE_ISSUE(issue_matrix); \
|
||||
RECORD_ARBSTATE_ISSUE(issue_valu);
|
||||
|
||||
#define NON_GFX9_ARBSTATE_IS_ZERO(x, y) \
|
||||
EXPECT_EQ(x.snapshot.arb_state_issue_lds_direct, 0); \
|
||||
EXPECT_EQ(y.snapshot.arb_state_issue_lds_direct, 0); \
|
||||
EXPECT_EQ(x.snapshot.arb_state_issue_brmsg, 0); \
|
||||
EXPECT_EQ(y.snapshot.arb_state_issue_brmsg, 0); \
|
||||
\
|
||||
EXPECT_EQ(x.snapshot.arb_state_stall_lds_direct, 0); \
|
||||
EXPECT_EQ(y.snapshot.arb_state_stall_lds_direct, 0); \
|
||||
EXPECT_EQ(x.snapshot.arb_state_stall_brmsg, 0); \
|
||||
EXPECT_EQ(y.snapshot.arb_state_stall_brmsg, 0);
|
||||
|
||||
#define MATCH_ARBSTATE(x, y) \
|
||||
EXPECT_EQ(x.snapshot.arb_state_issue_valu, y.snapshot.arb_state_issue_valu); \
|
||||
EXPECT_EQ(x.snapshot.arb_state_issue_matrix, y.snapshot.arb_state_issue_matrix); \
|
||||
EXPECT_EQ(x.snapshot.arb_state_issue_lds, y.snapshot.arb_state_issue_lds); \
|
||||
EXPECT_EQ(x.snapshot.arb_state_issue_scalar, y.snapshot.arb_state_issue_scalar); \
|
||||
EXPECT_EQ(x.snapshot.arb_state_issue_vmem_tex, y.snapshot.arb_state_issue_vmem_tex); \
|
||||
EXPECT_EQ(x.snapshot.arb_state_issue_flat, y.snapshot.arb_state_issue_flat); \
|
||||
EXPECT_EQ(x.snapshot.arb_state_issue_exp, y.snapshot.arb_state_issue_exp); \
|
||||
EXPECT_EQ(x.snapshot.arb_state_issue_misc, y.snapshot.arb_state_issue_misc); \
|
||||
\
|
||||
EXPECT_EQ(x.snapshot.arb_state_stall_valu, y.snapshot.arb_state_stall_valu); \
|
||||
EXPECT_EQ(x.snapshot.arb_state_stall_matrix, y.snapshot.arb_state_stall_matrix); \
|
||||
EXPECT_EQ(x.snapshot.arb_state_stall_lds, y.snapshot.arb_state_stall_lds); \
|
||||
EXPECT_EQ(x.snapshot.arb_state_stall_scalar, y.snapshot.arb_state_stall_scalar); \
|
||||
EXPECT_EQ(x.snapshot.arb_state_stall_vmem_tex, y.snapshot.arb_state_stall_vmem_tex); \
|
||||
EXPECT_EQ(x.snapshot.arb_state_stall_flat, y.snapshot.arb_state_stall_flat); \
|
||||
EXPECT_EQ(x.snapshot.arb_state_stall_exp, y.snapshot.arb_state_stall_exp); \
|
||||
EXPECT_EQ(x.snapshot.arb_state_stall_misc, y.snapshot.arb_state_stall_misc); \
|
||||
\
|
||||
NON_GFX9_ARBSTATE_IS_ZERO(x, y)
|
||||
|
||||
template <typename PcSamplingRecordT>
|
||||
class WaveSnapTest
|
||||
@@ -134,10 +172,11 @@ public:
|
||||
snap.correlation_id = dispatch->getMockId().raw;
|
||||
|
||||
snap.perf_snapshot_data = (inst_type << 3) | (reason << 7);
|
||||
snap.perf_snapshot_data |= 0x1; // sample is valid
|
||||
snap.perf_snapshot_data |= (arb_issue << 10) | (arb_stall << 18);
|
||||
snap.perf_snapshot_data1 = wave_cnt;
|
||||
|
||||
assert(dispatch.get());
|
||||
EXPECT_NE(dispatch.get(), nullptr);
|
||||
dispatch->submit(packet_union_t{.snap = snap});
|
||||
};
|
||||
|
||||
@@ -156,180 +195,213 @@ public:
|
||||
this->buffer->genUpcomingSamples(max_wave_number);
|
||||
for(size_t i = 0; i < max_wave_number; i++)
|
||||
this->genPCSample(
|
||||
i, GFX9::TYPE_LDS, GFX9::REASON_ALU, GFX9::ISSUE_VALU, GFX9::ISSUE_VALU);
|
||||
i, GFX9::TYPE_LDS, GFX9::REASON_ALU_DEPENDENCY, GFX9::ISSUE_VALU, GFX9::ISSUE_VALU);
|
||||
}
|
||||
|
||||
void CheckBuffers() override
|
||||
{
|
||||
auto parsed = this->buffer->get_parsed_buffer(9); // GFXIP==9
|
||||
assert(parsed.size() == 1);
|
||||
assert(parsed[0].size() == max_wave_number);
|
||||
EXPECT_EQ(parsed.size(), 1);
|
||||
EXPECT_EQ(parsed[0].size(), max_wave_number);
|
||||
|
||||
for(size_t i = 0; i < max_wave_number; i++)
|
||||
assert(parsed[0][i].wave_count == i);
|
||||
EXPECT_EQ(parsed[0][i].wave_count, i);
|
||||
}
|
||||
|
||||
const size_t max_wave_number = 64;
|
||||
std::vector<PcSamplingRecordT> snapshots;
|
||||
};
|
||||
|
||||
// class InstTypeTest : public WaveSnapTest
|
||||
// {
|
||||
// public:
|
||||
// void FillBuffers() override
|
||||
// {
|
||||
// // Loop over inst_type_issued
|
||||
// UNROLL_TYPECHECK();
|
||||
// buffer->genUpcomingSamples(GFX9::TYPE_LAST);
|
||||
// for(int i = 0; i < GFX9::TYPE_LAST; i++)
|
||||
// genPCSample(i, i, GFX9::REASON_ALU, GFX9::ISSUE_MATRIX, GFX9::ISSUE_MATRIX);
|
||||
// }
|
||||
template <typename PcSamplingRecordT>
|
||||
class InstTypeTest : public WaveSnapTest<PcSamplingRecordT>
|
||||
{
|
||||
public:
|
||||
void FillBuffers() override
|
||||
{
|
||||
// Loop over inst_type_issued
|
||||
GENERATE_RECORDS_INST_TYPE();
|
||||
this->buffer->genUpcomingSamples(GFX9::TYPE_LAST);
|
||||
for(int i = 0; i < GFX9::TYPE_LAST; i++)
|
||||
this->genPCSample(
|
||||
i, i, GFX9::REASON_ALU_DEPENDENCY, GFX9::ISSUE_MATRIX, GFX9::ISSUE_MATRIX);
|
||||
}
|
||||
|
||||
// void CheckBuffers() override
|
||||
// {
|
||||
// auto parsed = buffer->get_parsed_buffer(9); // GFXIP==9
|
||||
// assert(parsed.size() == 1);
|
||||
// assert(parsed[0].size() == GFX9::TYPE_LAST);
|
||||
// assert(snapshots.size() == GFX9::TYPE_LAST);
|
||||
void CheckBuffers() override
|
||||
{
|
||||
auto parsed = this->buffer->get_parsed_buffer(9); // GFXIP==9
|
||||
EXPECT_EQ(parsed.size(), 1);
|
||||
EXPECT_EQ(parsed[0].size(), GFX9::TYPE_LAST);
|
||||
EXPECT_EQ(snapshots.size(), GFX9::TYPE_LAST);
|
||||
|
||||
// for(size_t i = 0; i < GFX9::TYPE_LAST; i++)
|
||||
// assert(snapshots[i].inst_type == parsed[0][i].snapshot.inst_type);
|
||||
// }
|
||||
for(size_t i = 0; i < GFX9::TYPE_LAST; i++)
|
||||
EXPECT_EQ(snapshots[i].inst_type, parsed[0][i].inst_type);
|
||||
}
|
||||
|
||||
// std::vector<rocprofiler_pc_sampling_snapshot_v1_t> snapshots;
|
||||
// };
|
||||
std::vector<PcSamplingRecordT> snapshots;
|
||||
};
|
||||
|
||||
// class StallReasonTest : public WaveSnapTest
|
||||
// {
|
||||
// public:
|
||||
// void FillBuffers() override
|
||||
// {
|
||||
// // Loop over reason_not_issued
|
||||
// UNROLL_REASONCHECK();
|
||||
// buffer->genUpcomingSamples(GFX9::REASON_LAST);
|
||||
// for(int i = 0; i < GFX9::REASON_LAST; i++)
|
||||
// genPCSample(i, GFX9::TYPE_MATRIX, i, GFX9::ISSUE_MATRIX, GFX9::ISSUE_MATRIX);
|
||||
// }
|
||||
template <typename PcSamplingRecordT>
|
||||
class StallReasonTest : public WaveSnapTest<PcSamplingRecordT>
|
||||
{
|
||||
public:
|
||||
void FillBuffers() override
|
||||
{
|
||||
// Loop over reason_not_issued
|
||||
GENERATE_RECORDS_NOT_ISSUED_REASON();
|
||||
this->buffer->genUpcomingSamples(GFX9::REASON_LAST);
|
||||
for(int i = 0; i < GFX9::REASON_LAST; i++)
|
||||
this->genPCSample(i, GFX9::TYPE_MATRIX, i, GFX9::ISSUE_MATRIX, GFX9::ISSUE_MATRIX);
|
||||
}
|
||||
|
||||
// void CheckBuffers() override
|
||||
// {
|
||||
// auto parsed = buffer->get_parsed_buffer(9); // GFXIP==9
|
||||
// assert(parsed.size() == 1);
|
||||
// assert(parsed[0].size() == GFX9::REASON_LAST);
|
||||
// assert(snapshots.size() == GFX9::REASON_LAST);
|
||||
void CheckBuffers() override
|
||||
{
|
||||
auto parsed = this->buffer->get_parsed_buffer(9); // GFXIP==9
|
||||
EXPECT_EQ(parsed.size(), 1);
|
||||
EXPECT_EQ(parsed[0].size(), GFX9::REASON_LAST);
|
||||
EXPECT_EQ(snapshots.size(), GFX9::REASON_LAST);
|
||||
|
||||
// for(size_t i = 0; i < GFX9::REASON_LAST; i++)
|
||||
// assert(snapshots[i].reason_not_issued == parsed[0][i].snapshot.reason_not_issued);
|
||||
// }
|
||||
for(size_t i = 0; i < GFX9::REASON_LAST; i++)
|
||||
EXPECT_EQ(snapshots[i].snapshot.reason_not_issued,
|
||||
parsed[0][i].snapshot.reason_not_issued);
|
||||
}
|
||||
|
||||
// std::vector<rocprofiler_pc_sampling_snapshot_v1_t> snapshots;
|
||||
// };
|
||||
std::vector<PcSamplingRecordT> snapshots;
|
||||
};
|
||||
|
||||
// class ArbStateTest : public WaveSnapTest
|
||||
// {
|
||||
// public:
|
||||
// void FillBuffers() override
|
||||
// {
|
||||
// // Loop over arb_state_issue
|
||||
// UNROLL_ARBCHECK();
|
||||
// buffer->genUpcomingSamples(GFX9::ISSUE_LAST * GFX9::ISSUE_LAST);
|
||||
// for(int i = 0; i < GFX9::ISSUE_LAST; i++)
|
||||
// for(int j = 0; j < GFX9::ISSUE_LAST; j++)
|
||||
// genPCSample(i, GFX9::TYPE_MATRIX, GFX9::REASON_ALU, 1 << i, 1 << j);
|
||||
// }
|
||||
template <typename PcSamplingRecordT>
|
||||
class ArbStateTest : public WaveSnapTest<PcSamplingRecordT>
|
||||
{
|
||||
public:
|
||||
void FillBuffers() override
|
||||
{
|
||||
// Loop over arb_state_issue
|
||||
GENERATE_RECORDS_ARBSTATE_ISSUE();
|
||||
this->buffer->genUpcomingSamples(GFX9::ISSUE_LAST * GFX9::ISSUE_LAST);
|
||||
// To match the order of instantiating snapshots inside `GENERATE_RECORDS_ARBSTATE_ISSUE`
|
||||
// we loop over GFX9::
|
||||
for(int i = 0; i < GFX9::ISSUE_LAST; i++)
|
||||
for(int j = 0; j < GFX9::ISSUE_LAST; j++)
|
||||
this->genPCSample(
|
||||
i, GFX9::TYPE_MATRIX, GFX9::REASON_ALU_DEPENDENCY, 1 << i, 1 << j);
|
||||
}
|
||||
|
||||
// void CheckBuffers() override
|
||||
// {
|
||||
// auto parsed = buffer->get_parsed_buffer(9); // GFXIP==9
|
||||
// assert(parsed.size() == 1);
|
||||
// assert(parsed[0].size() == GFX9::ISSUE_LAST * GFX9::ISSUE_LAST);
|
||||
// assert(snapshots.size() == GFX9::ISSUE_LAST * GFX9::ISSUE_LAST);
|
||||
void CheckBuffers() override
|
||||
{
|
||||
auto parsed = this->buffer->get_parsed_buffer(9); // GFXIP==9
|
||||
EXPECT_EQ(parsed.size(), 1);
|
||||
EXPECT_EQ(parsed[0].size(), GFX9::ISSUE_LAST * GFX9::ISSUE_LAST);
|
||||
EXPECT_EQ(snapshots.size(), GFX9::ISSUE_LAST * GFX9::ISSUE_LAST);
|
||||
|
||||
// for(size_t i = 0; i < GFX9::ISSUE_LAST * GFX9::ISSUE_LAST; i++)
|
||||
// {
|
||||
// auto& snap = snapshots[i];
|
||||
// assert(snap.arb_state_issue == parsed[0][i].snapshot.arb_state_issue);
|
||||
// assert(snap.arb_state_stall == parsed[0][i].snapshot.arb_state_stall);
|
||||
// }
|
||||
// }
|
||||
for(size_t i = 0; i < GFX9::ISSUE_LAST * GFX9::ISSUE_LAST; i++)
|
||||
{
|
||||
auto& snap = snapshots[i];
|
||||
MATCH_ARBSTATE(snap, parsed[0][i])
|
||||
}
|
||||
}
|
||||
|
||||
// std::vector<rocprofiler_pc_sampling_snapshot_v1_t> snapshots;
|
||||
// };
|
||||
std::vector<PcSamplingRecordT> snapshots;
|
||||
};
|
||||
|
||||
// class WaveIssueAndErrorTest : public WaveSnapTest
|
||||
// {
|
||||
// void FillBuffers() override
|
||||
// {
|
||||
// buffer->genUpcomingSamples(16);
|
||||
// for(int valid = 0; valid <= 1; valid++)
|
||||
// for(int issued = 0; issued <= 1; issued++)
|
||||
// for(int dual = 0; dual <= 1; dual++)
|
||||
// for(int error = 0; error <= 1; error++)
|
||||
// genPCSample(valid, issued, dual, error);
|
||||
// }
|
||||
template <typename PcSamplingRecordT, typename PcSamplingRecordInvalidT>
|
||||
class WaveIssueAndErrorTest : public WaveSnapTest<PcSamplingRecordT>
|
||||
{
|
||||
struct pc_sampling_test_record_t
|
||||
{
|
||||
bool valid;
|
||||
union
|
||||
{
|
||||
PcSamplingRecordT valid_record;
|
||||
PcSamplingRecordInvalidT invalid_record;
|
||||
};
|
||||
};
|
||||
|
||||
// void CheckBuffers() override
|
||||
// {
|
||||
// const int num_combinations = 16;
|
||||
// auto parsed = buffer->get_parsed_buffer(9); // GFXIP==9
|
||||
// assert(parsed.size() == 1);
|
||||
// assert(parsed[0].size() == num_combinations);
|
||||
// assert(compare.size() == num_combinations);
|
||||
void FillBuffers() override
|
||||
{
|
||||
this->buffer->genUpcomingSamples(16);
|
||||
for(int valid = 0; valid <= 1; valid++)
|
||||
for(int issued = 0; issued <= 1; issued++)
|
||||
for(int dual = 0; dual <= 1; dual++)
|
||||
for(int error = 0; error <= 1; error++)
|
||||
genPCSample(valid, issued, dual, error);
|
||||
}
|
||||
|
||||
// for(size_t i = 0; i < num_combinations; i++)
|
||||
// {
|
||||
// assert(compare[i].flags.valid == parsed[0][i].flags.valid);
|
||||
// assert(compare[i].wave_issued == parsed[0][i].wave_issued);
|
||||
// assert(compare[i].snapshot.dual_issue_valu == parsed[0][i].snapshot.dual_issue_valu);
|
||||
// }
|
||||
// }
|
||||
void CheckBuffers() override
|
||||
{
|
||||
const int num_combinations = 16;
|
||||
auto parsed = this->buffer->get_parsed_buffer(9); // GFXIP==9
|
||||
EXPECT_EQ(parsed.size(), 1);
|
||||
EXPECT_EQ(parsed[0].size(), num_combinations);
|
||||
EXPECT_EQ(compare.size(), num_combinations);
|
||||
|
||||
// union trap_snapshot_v1
|
||||
// {
|
||||
// struct
|
||||
// {
|
||||
// uint32_t valid : 1;
|
||||
// uint32_t issued : 1;
|
||||
// uint32_t dual : 1;
|
||||
// uint32_t reserved : 23;
|
||||
// uint32_t error : 1;
|
||||
// uint32_t reserved2 : 5;
|
||||
// };
|
||||
// uint32_t raw;
|
||||
// };
|
||||
for(size_t i = 0; i < num_combinations; i++)
|
||||
{
|
||||
if(compare[i].valid)
|
||||
{
|
||||
EXPECT_EQ(compare[i].valid_record.wave_issued, parsed[0][i].wave_issued);
|
||||
EXPECT_EQ(compare[i].valid_record.snapshot.dual_issue_valu,
|
||||
parsed[0][i].snapshot.dual_issue_valu);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Internally (inside the parser) invalid samples are represented with
|
||||
// PcSamplingRecordT of size 0. Eventually, those records are replaced with the
|
||||
// PcSamplingRecordInvalidT prior to putting inside the SDK buffer.
|
||||
EXPECT_EQ(parsed[0][i].size, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// void genPCSample(bool valid, bool issued, bool dual, bool error)
|
||||
// {
|
||||
// rocprofiler_pc_sampling_record_t sample;
|
||||
// ::memset(&sample, 0, sizeof(sample));
|
||||
// // TODO: Since code objects are not mocked, use pc.code_object_offset
|
||||
// // as the absolute physical address of the mocked PC.
|
||||
// sample.pc.code_object_offset = dispatch->unique_id;
|
||||
union trap_snapshot_v1
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint32_t valid : 1;
|
||||
uint32_t issued : 1;
|
||||
uint32_t dual : 1;
|
||||
uint32_t reserved : 23;
|
||||
uint32_t error : 1;
|
||||
uint32_t reserved2 : 5;
|
||||
};
|
||||
uint32_t raw;
|
||||
};
|
||||
|
||||
// sample.correlation_id.internal = dispatch->getMockId().raw;
|
||||
void genPCSample(bool valid, bool issued, bool dual, bool error)
|
||||
{
|
||||
pc_sampling_test_record_t record{};
|
||||
record.valid = valid && !error;
|
||||
if(record.valid)
|
||||
{
|
||||
// Fill in the data for the valid record.
|
||||
auto& sample = record.valid_record;
|
||||
|
||||
// sample.flags.valid = valid && !error;
|
||||
// sample.wave_issued = issued;
|
||||
// sample.snapshot.dual_issue_valu = dual;
|
||||
// TODO: Since code objects are not mocked, use pc.code_object_offset
|
||||
// as the absolute physical address of the mocked PC.
|
||||
sample.pc.code_object_offset = this->dispatch->unique_id;
|
||||
|
||||
// assert(dispatch.get());
|
||||
sample.correlation_id.internal = this->dispatch->getMockId().raw;
|
||||
|
||||
// compare.push_back(sample);
|
||||
sample.wave_issued = issued;
|
||||
sample.snapshot.dual_issue_valu = dual;
|
||||
|
||||
// trap_snapshot_v1 snap;
|
||||
// snap.valid = valid;
|
||||
// snap.issued = issued;
|
||||
// snap.dual = dual;
|
||||
// snap.error = error;
|
||||
EXPECT_NE(this->dispatch.get(), nullptr);
|
||||
}
|
||||
|
||||
// perf_sample_snapshot_v1 pss;
|
||||
// pss.perf_snapshot_data = snap.raw;
|
||||
// pss.correlation_id = dispatch->getMockId().raw;
|
||||
// dispatch->submit(std::move(pss));
|
||||
// };
|
||||
compare.push_back(record);
|
||||
|
||||
// std::vector<rocprofiler_pc_sampling_record_t> compare;
|
||||
// };
|
||||
trap_snapshot_v1 snap;
|
||||
snap.valid = valid;
|
||||
snap.issued = issued;
|
||||
snap.dual = dual;
|
||||
snap.error = error;
|
||||
|
||||
perf_sample_snapshot_v1 pss;
|
||||
pss.perf_snapshot_data = snap.raw;
|
||||
pss.correlation_id = this->dispatch->getMockId().raw;
|
||||
this->dispatch->submit(std::move(pss));
|
||||
};
|
||||
|
||||
std::vector<pc_sampling_test_record_t> compare;
|
||||
};
|
||||
|
||||
template <typename PcSamplingRecordT>
|
||||
class HwIdTest : public WaveSnapTest<PcSamplingRecordT>
|
||||
@@ -405,23 +477,23 @@ class HwIdTest : public WaveSnapTest<PcSamplingRecordT>
|
||||
void CheckBuffers() override
|
||||
{
|
||||
auto parsed = this->buffer->get_parsed_buffer(9); // GFXIP==9
|
||||
assert(parsed.size() == 1);
|
||||
assert(parsed[0].size() == 3);
|
||||
assert(compare.size() == 3);
|
||||
EXPECT_EQ(parsed.size(), 1);
|
||||
EXPECT_EQ(parsed[0].size(), 3);
|
||||
EXPECT_EQ(compare.size(), 3);
|
||||
|
||||
for(size_t i = 0; i < 3; i++)
|
||||
{
|
||||
// Comparing individual fields
|
||||
assert(compare[i].hw_id.wave_id == parsed[0][i].hw_id.wave_id);
|
||||
assert(compare[i].hw_id.simd_id == parsed[0][i].hw_id.simd_id);
|
||||
assert(compare[i].hw_id.pipe_id == parsed[0][i].hw_id.pipe_id);
|
||||
assert(compare[i].hw_id.cu_or_wgp_id == parsed[0][i].hw_id.cu_or_wgp_id);
|
||||
assert(compare[i].hw_id.shader_array_id == parsed[0][i].hw_id.shader_array_id);
|
||||
assert(compare[i].hw_id.shader_engine_id == parsed[0][i].hw_id.shader_engine_id);
|
||||
assert(compare[i].hw_id.workgroup_id == parsed[0][i].hw_id.workgroup_id);
|
||||
assert(compare[i].hw_id.vm_id == parsed[0][i].hw_id.vm_id);
|
||||
assert(compare[i].hw_id.queue_id == parsed[0][i].hw_id.queue_id);
|
||||
assert(compare[i].hw_id.microengine_id == parsed[0][i].hw_id.microengine_id);
|
||||
EXPECT_EQ(compare[i].hw_id.wave_id, parsed[0][i].hw_id.wave_id);
|
||||
EXPECT_EQ(compare[i].hw_id.simd_id, parsed[0][i].hw_id.simd_id);
|
||||
EXPECT_EQ(compare[i].hw_id.pipe_id, parsed[0][i].hw_id.pipe_id);
|
||||
EXPECT_EQ(compare[i].hw_id.cu_or_wgp_id, parsed[0][i].hw_id.cu_or_wgp_id);
|
||||
EXPECT_EQ(compare[i].hw_id.shader_array_id, parsed[0][i].hw_id.shader_array_id);
|
||||
EXPECT_EQ(compare[i].hw_id.shader_engine_id, parsed[0][i].hw_id.shader_engine_id);
|
||||
EXPECT_EQ(compare[i].hw_id.workgroup_id, parsed[0][i].hw_id.workgroup_id);
|
||||
EXPECT_EQ(compare[i].hw_id.vm_id, parsed[0][i].hw_id.vm_id);
|
||||
EXPECT_EQ(compare[i].hw_id.queue_id, parsed[0][i].hw_id.queue_id);
|
||||
EXPECT_EQ(compare[i].hw_id.microengine_id, parsed[0][i].hw_id.microengine_id);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -451,8 +523,9 @@ class HwIdTest : public WaveSnapTest<PcSamplingRecordT>
|
||||
// raw register value
|
||||
snap.hw_id = hw_id.raw;
|
||||
snap.correlation_id = this->dispatch->getMockId().raw;
|
||||
snap.perf_snapshot_data |= 0x1; // sample is valid
|
||||
|
||||
assert(this->dispatch.get());
|
||||
EXPECT_NE(this->dispatch.get(), nullptr);
|
||||
this->dispatch->submit(snap);
|
||||
};
|
||||
|
||||
@@ -473,26 +546,26 @@ class WaveOtherFieldsTest : public WaveSnapTest<PcSamplingRecordT>
|
||||
void CheckBuffers() override
|
||||
{
|
||||
auto parsed = this->buffer->get_parsed_buffer(9); // GFXIP==9
|
||||
assert(parsed.size() == 1);
|
||||
assert(parsed[0].size() == 3);
|
||||
assert(compare.size() == 3);
|
||||
EXPECT_EQ(parsed.size(), 1);
|
||||
EXPECT_EQ(parsed[0].size(), 3);
|
||||
EXPECT_EQ(compare.size(), 3);
|
||||
|
||||
for(size_t i = 0; i < 3; i++)
|
||||
{
|
||||
// TODO: if we decide to test flags, make specialization for
|
||||
// rocprofiler_pc_sampling_record_stochastic_v0_t
|
||||
// assert(parsed[0][i].flags.has_stall_reason == true);
|
||||
// assert(parsed[0][i].flags.has_wave_cnt == true);
|
||||
// assert(parsed[0][i].flags.reserved == false);
|
||||
// EXPECT_EQ(parsed[0][i].flags.has_stall_reason, true);
|
||||
// EXPECT_EQ(parsed[0][i].flags.has_wave_cnt, true);
|
||||
// EXPECT_EQ(parsed[0][i].flags.reserved, false);
|
||||
|
||||
assert(compare[i].exec_mask == parsed[0][i].exec_mask);
|
||||
assert(compare[i].workgroup_id == parsed[0][i].workgroup_id);
|
||||
EXPECT_EQ(compare[i].exec_mask, parsed[0][i].exec_mask);
|
||||
EXPECT_EQ(compare[i].workgroup_id, parsed[0][i].workgroup_id);
|
||||
|
||||
assert(compare[i].hw_id.chiplet == parsed[0][i].hw_id.chiplet);
|
||||
assert(compare[i].wave_in_group == parsed[0][i].wave_in_group);
|
||||
EXPECT_EQ(compare[i].hw_id.chiplet, parsed[0][i].hw_id.chiplet);
|
||||
EXPECT_EQ(compare[i].wave_in_group, parsed[0][i].wave_in_group);
|
||||
// TODO: handle HW_ID as well.
|
||||
// assert(compare[i].hw_id == parsed[0][i].hw_id);
|
||||
assert(compare[i].correlation_id.internal == parsed[0][i].correlation_id.internal);
|
||||
// EXPECT_EQ(compare[i].hw_id, parsed[0][i].hw_id);
|
||||
EXPECT_EQ(compare[i].correlation_id.internal, parsed[0][i].correlation_id.internal);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -525,7 +598,11 @@ class WaveOtherFieldsTest : public WaveSnapTest<PcSamplingRecordT>
|
||||
snap.chiplet_and_wave_id = (chip << 8) | (wave & 0x3F);
|
||||
snap.correlation_id = this->dispatch->getMockId().raw;
|
||||
|
||||
assert(this->dispatch.get());
|
||||
// to ensure all stochastic samples are generated properly,
|
||||
// marked them as valid
|
||||
snap.perf_snapshot_data |= 0x1; // set the bit indicating the sample is valid
|
||||
|
||||
EXPECT_NE(this->dispatch.get(), nullptr);
|
||||
this->dispatch->submit(snap);
|
||||
|
||||
(void) pc;
|
||||
@@ -538,10 +615,12 @@ TEST(pcs_parser, gfx9_test)
|
||||
{
|
||||
// Tests specific to stochastic sampling only
|
||||
WaveCntTest<rocprofiler_pc_sampling_record_stochastic_v0_t>{}.Test();
|
||||
// InstTypeTest{}.Test();
|
||||
// StallReasonTest{}.Test();
|
||||
// ArbStateTest{}.Test();
|
||||
// WaveIssueAndErrorTest{}.Test();
|
||||
InstTypeTest<rocprofiler_pc_sampling_record_stochastic_v0_t>{}.Test();
|
||||
StallReasonTest<rocprofiler_pc_sampling_record_stochastic_v0_t>{}.Test();
|
||||
ArbStateTest<rocprofiler_pc_sampling_record_stochastic_v0_t>{}.Test();
|
||||
WaveIssueAndErrorTest<rocprofiler_pc_sampling_record_stochastic_v0_t,
|
||||
rocprofiler_pc_sampling_record_invalid_t>{}
|
||||
.Test();
|
||||
|
||||
// Tests commong for both host trap and stochastic sampling.
|
||||
HwIdTest<rocprofiler_pc_sampling_record_host_trap_v0_t>{}.Test();
|
||||
|
||||
+2
@@ -309,6 +309,8 @@ public:
|
||||
::memset(&uni, 0, sizeof(uni));
|
||||
uni.snap.pc = dispatch->unique_id;
|
||||
uni.snap.correlation_id = dispatch->getMockId().raw;
|
||||
// mark sample valid in case of stochastic sampling tests
|
||||
uni.snap.perf_snapshot_data |= 0x1; // stochastic sample is valid
|
||||
dispatch->submit(uni);
|
||||
};
|
||||
void print()
|
||||
|
||||
+1
@@ -250,6 +250,7 @@ multithread_codeobj(size_t tid, Latch* latch)
|
||||
for(int s = 0; s < NUM_SAMPLES; s++)
|
||||
{
|
||||
uni.snap.pc = pc_base_addr + s;
|
||||
uni.snap.perf_snapshot_data |= 0x1; // sample is valid
|
||||
dispatch->submit(uni);
|
||||
}
|
||||
|
||||
|
||||
+124
-84
@@ -30,7 +30,8 @@
|
||||
#include "lib/rocprofiler-sdk/pc_sampling/parser/gfx9.hpp"
|
||||
#include "lib/rocprofiler-sdk/pc_sampling/parser/parser_types.hpp"
|
||||
#include "lib/rocprofiler-sdk/pc_sampling/parser/rocr.h"
|
||||
#include "lib/rocprofiler-sdk/pc_sampling/parser/stochastic_records.h"
|
||||
|
||||
#include <rocprofiler-sdk/pc_sampling.h>
|
||||
|
||||
// TODO: refactor the commented code for stochastic sampling
|
||||
|
||||
@@ -51,7 +52,6 @@
|
||||
|
||||
// ret.wave_count = sample.perf_snapshot_data1 & 0x3F;
|
||||
|
||||
// ret.wave_issued = sample.perf_snapshot_data >> 1;
|
||||
// ret.snapshot.dual_issue_valu = sample.perf_snapshot_data >> 2;
|
||||
// ret.snapshot.inst_type = sample.perf_snapshot_data >> 3;
|
||||
// ret.snapshot.reason_not_issued = (sample.perf_snapshot_data >> 7) & 0x7;
|
||||
@@ -103,90 +103,76 @@
|
||||
|
||||
// #undef BITSHIFT
|
||||
|
||||
// #define LUTOVERLOAD(sname) this->operator[](GFX::sname) = PCSAMPLE::sname
|
||||
#define LUTOVERLOAD(sname, rocp_prefix) this->operator[](GFX::sname) = rocp_prefix##_##sname
|
||||
#define LUTOVERLOAD_INST(sname) LUTOVERLOAD(sname, ROCPROFILER_PC_SAMPLING_INSTRUCTION)
|
||||
#define LUTOVERLOAD_INST_NOT_ISSUED(sname) \
|
||||
LUTOVERLOAD(sname, ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED)
|
||||
|
||||
// template <typename GFX>
|
||||
// class GFX_REASON_LUT : public std::array<int, 32>
|
||||
// {
|
||||
// public:
|
||||
// GFX_REASON_LUT()
|
||||
// {
|
||||
// std::memset(data(), 0, size() * sizeof(int));
|
||||
// LUTOVERLOAD(REASON_NOT_AVAILABLE);
|
||||
// LUTOVERLOAD(REASON_ALU);
|
||||
// LUTOVERLOAD(REASON_WAITCNT);
|
||||
// LUTOVERLOAD(REASON_INTERNAL);
|
||||
// LUTOVERLOAD(REASON_BARRIER);
|
||||
// LUTOVERLOAD(REASON_ARBITER);
|
||||
// LUTOVERLOAD(REASON_EX_STALL);
|
||||
// LUTOVERLOAD(REASON_OTHER_WAIT);
|
||||
// LUTOVERLOAD(REASON_SLEEP);
|
||||
// }
|
||||
// };
|
||||
template <typename GFX>
|
||||
struct gfx_inst_lut : public std::array<int, 32>
|
||||
{
|
||||
gfx_inst_lut()
|
||||
{
|
||||
std::memset(data(), 0, size() * sizeof(int));
|
||||
LUTOVERLOAD_INST(TYPE_VALU);
|
||||
LUTOVERLOAD_INST(TYPE_MATRIX);
|
||||
LUTOVERLOAD_INST(TYPE_SCALAR);
|
||||
LUTOVERLOAD_INST(TYPE_TEX);
|
||||
LUTOVERLOAD_INST(TYPE_LDS);
|
||||
LUTOVERLOAD_INST(TYPE_LDS_DIRECT);
|
||||
LUTOVERLOAD_INST(TYPE_FLAT);
|
||||
LUTOVERLOAD_INST(TYPE_EXPORT);
|
||||
LUTOVERLOAD_INST(TYPE_MESSAGE);
|
||||
LUTOVERLOAD_INST(TYPE_BARRIER);
|
||||
LUTOVERLOAD_INST(TYPE_BRANCH_NOT_TAKEN);
|
||||
LUTOVERLOAD_INST(TYPE_BRANCH_TAKEN);
|
||||
LUTOVERLOAD_INST(TYPE_JUMP);
|
||||
LUTOVERLOAD_INST(TYPE_OTHER);
|
||||
LUTOVERLOAD_INST(TYPE_NO_INST);
|
||||
LUTOVERLOAD_INST(TYPE_DUAL_VALU);
|
||||
}
|
||||
};
|
||||
|
||||
// template <typename GFX>
|
||||
// class GFX_INST_LUT : public std::array<int, 32>
|
||||
// {
|
||||
// public:
|
||||
// GFX_INST_LUT()
|
||||
// {
|
||||
// std::memset(data(), 0, size() * sizeof(int));
|
||||
// LUTOVERLOAD(TYPE_VALU);
|
||||
// LUTOVERLOAD(TYPE_MATRIX);
|
||||
// LUTOVERLOAD(TYPE_SCALAR);
|
||||
// LUTOVERLOAD(TYPE_TEX);
|
||||
// LUTOVERLOAD(TYPE_LDS);
|
||||
// LUTOVERLOAD(TYPE_LDS_DIRECT);
|
||||
// LUTOVERLOAD(TYPE_FLAT);
|
||||
// LUTOVERLOAD(TYPE_EXP);
|
||||
// LUTOVERLOAD(TYPE_MESSAGE);
|
||||
// LUTOVERLOAD(TYPE_BARRIER);
|
||||
// LUTOVERLOAD(TYPE_BRANCH_NOT_TAKEN);
|
||||
// LUTOVERLOAD(TYPE_BRANCH_TAKEN);
|
||||
// LUTOVERLOAD(TYPE_JUMP);
|
||||
// LUTOVERLOAD(TYPE_OTHER);
|
||||
// LUTOVERLOAD(TYPE_NO_INST);
|
||||
// LUTOVERLOAD(TYPE_DUAL_VALU);
|
||||
// }
|
||||
// };
|
||||
template <typename GFX>
|
||||
struct gfx_reason_lut : public std::array<int, 32>
|
||||
{
|
||||
gfx_reason_lut()
|
||||
{
|
||||
std::memset(data(), 0, size() * sizeof(int));
|
||||
LUTOVERLOAD_INST_NOT_ISSUED(REASON_NO_INSTRUCTION_AVAILABLE);
|
||||
LUTOVERLOAD_INST_NOT_ISSUED(REASON_ALU_DEPENDENCY);
|
||||
LUTOVERLOAD_INST_NOT_ISSUED(REASON_WAITCNT);
|
||||
LUTOVERLOAD_INST_NOT_ISSUED(REASON_INTERNAL_INSTRUCTION);
|
||||
LUTOVERLOAD_INST_NOT_ISSUED(REASON_BARRIER_WAIT);
|
||||
LUTOVERLOAD_INST_NOT_ISSUED(REASON_ARBITER_NOT_WIN);
|
||||
LUTOVERLOAD_INST_NOT_ISSUED(REASON_ARBITER_WIN_EX_STALL);
|
||||
LUTOVERLOAD_INST_NOT_ISSUED(REASON_OTHER_WAIT);
|
||||
LUTOVERLOAD_INST_NOT_ISSUED(REASON_SLEEP_WAIT);
|
||||
}
|
||||
};
|
||||
|
||||
// template <typename GFX>
|
||||
// inline int
|
||||
// translate_reason(int in)
|
||||
// {
|
||||
// static GFX_REASON_LUT<GFX> lut;
|
||||
// return lut[in & 0x1F];
|
||||
// }
|
||||
template <typename GFX>
|
||||
inline int
|
||||
translate_inst(int in)
|
||||
{
|
||||
static gfx_inst_lut<GFX> lut;
|
||||
return lut[in & 0x1F];
|
||||
}
|
||||
|
||||
// template <typename GFX>
|
||||
// inline int
|
||||
// translate_inst(int in)
|
||||
// {
|
||||
// static GFX_INST_LUT<GFX> lut;
|
||||
// return lut[in & 0x1F];
|
||||
// }
|
||||
template <typename GFX>
|
||||
inline int
|
||||
translate_reason(int in)
|
||||
{
|
||||
static gfx_reason_lut<GFX> lut;
|
||||
return lut[in & 0x1F];
|
||||
}
|
||||
|
||||
// #undef LUTOVERLOAD
|
||||
|
||||
// template <bool HostTrap, typename GFX>
|
||||
// inline rocprofiler_pc_sampling_record_t
|
||||
// copySample(const void* sample)
|
||||
// {
|
||||
// if(HostTrap) return copyHostTrapSample(*(const perf_sample_host_trap_v1*) sample);
|
||||
|
||||
// rocprofiler_pc_sampling_record_t ret =
|
||||
// copyStochasticSample<GFX>(*(const perf_sample_snapshot_v1*) sample);
|
||||
|
||||
// ret.snapshot.inst_type = translate_inst<GFX>(ret.snapshot.inst_type);
|
||||
// ret.snapshot.arb_state_issue = translate_arb<GFX>(ret.snapshot.arb_state_issue);
|
||||
// ret.snapshot.arb_state_stall = translate_arb<GFX>(ret.snapshot.arb_state_stall);
|
||||
// ret.snapshot.reason_not_issued = translate_reason<GFX>(ret.snapshot.reason_not_issued);
|
||||
|
||||
// return ret;
|
||||
// }
|
||||
#undef LUTOVERLOAD_INST_NOT_ISSUED
|
||||
#undef LUTOVERLOAD_INST
|
||||
#undef LUTOVERLOAD
|
||||
|
||||
#define EXTRACT_BITS(val, bit_end, bit_start) \
|
||||
(val >> bit_start) & ((1U << (bit_end - bit_start + 1)) - 1)
|
||||
((val >> bit_start) & ((1U << (bit_end - bit_start + 1)) - 1))
|
||||
|
||||
template <typename GFX, typename PcSamplingRecordT, typename SType>
|
||||
inline void
|
||||
@@ -228,8 +214,6 @@ copyHwId<GFX9, rocprofiler_pc_sampling_hw_id_v0_t>(rocprofiler_pc_sampling_hw_id
|
||||
hw_id.microengine_id = EXTRACT_BITS(hw_id_reg, 31, 30);
|
||||
}
|
||||
|
||||
#undef EXTRACT_BITS
|
||||
|
||||
template <typename PcSamplingRecordT, typename SType>
|
||||
inline PcSamplingRecordT
|
||||
copySampleHeader(const SType& sample)
|
||||
@@ -276,11 +260,65 @@ inline rocprofiler_pc_sampling_record_stochastic_v0_t
|
||||
copySample<GFX9, rocprofiler_pc_sampling_record_stochastic_v0_t>(const void* sample)
|
||||
{
|
||||
const auto& sample_ = *static_cast<const perf_sample_snapshot_v1*>(sample);
|
||||
auto ret = copySampleHeader<rocprofiler_pc_sampling_record_stochastic_v0_t>(sample_);
|
||||
|
||||
// Extracting data from the perf_snapshot_data register
|
||||
auto perf_snapshot_data = sample_.perf_snapshot_data;
|
||||
// The sample is valid iff neither of perf_snapshot_data.valid and perf_snapshot_data.error == 0
|
||||
// is one
|
||||
auto valid = static_cast<bool>(EXTRACT_BITS(perf_snapshot_data, 0, 0) &
|
||||
~EXTRACT_BITS(perf_snapshot_data, 26, 26));
|
||||
if(!valid)
|
||||
{
|
||||
// To reduce refactoring of the PC sampling parser, we agreed to internally represent
|
||||
// invalid samples with `rocprofiler_pc_sampling_record_stochastic_v0_t` with size 0.
|
||||
// Eventually, those records are replaced with rocprofiler_pc_sampling_record_invalid_t
|
||||
// and placed into the SDK buffer consumed by the end tool.
|
||||
rocprofiler_pc_sampling_record_stochastic_v0_t invalid{};
|
||||
invalid.size = 0;
|
||||
// No need to further process invalid samples
|
||||
return invalid;
|
||||
}
|
||||
|
||||
auto ret = copySampleHeader<rocprofiler_pc_sampling_record_stochastic_v0_t>(sample_);
|
||||
copyChipletId<GFX9>(ret, sample_);
|
||||
copyHwId<GFX9>(ret.hw_id, sample_.hw_id);
|
||||
ret.wave_count = sample_.perf_snapshot_data1 & 0x3F;
|
||||
// TODO: implement logic for manipulating stochastic related fields
|
||||
|
||||
// no memory counters on GFX9
|
||||
ret.flags.has_memory_counter = false;
|
||||
|
||||
// wave issued an instruction
|
||||
ret.wave_issued = EXTRACT_BITS(perf_snapshot_data, 1, 1);
|
||||
// type of issued instruction, valid only if `ret.wave_issued` is true.
|
||||
ret.inst_type = translate_inst<GFX9>(EXTRACT_BITS(perf_snapshot_data, 6, 3));
|
||||
// two VALU instructions issued in this cycles
|
||||
ret.snapshot.dual_issue_valu = EXTRACT_BITS(perf_snapshot_data, 2, 2);
|
||||
// reason for not issuing an instruction, valid only if `ret.wave_issued` is false
|
||||
ret.snapshot.reason_not_issued = translate_reason<GFX9>(EXTRACT_BITS(perf_snapshot_data, 9, 7));
|
||||
|
||||
// arbiter state information
|
||||
uint16_t arb_state = EXTRACT_BITS(perf_snapshot_data, 25, 10);
|
||||
ret.snapshot.arb_state_issue_valu = EXTRACT_BITS(arb_state, 7, 7);
|
||||
ret.snapshot.arb_state_issue_matrix = EXTRACT_BITS(arb_state, 6, 6);
|
||||
ret.snapshot.arb_state_issue_lds = EXTRACT_BITS(arb_state, 3, 3);
|
||||
ret.snapshot.arb_state_issue_scalar = EXTRACT_BITS(arb_state, 5, 5);
|
||||
ret.snapshot.arb_state_issue_vmem_tex = EXTRACT_BITS(arb_state, 4, 4);
|
||||
ret.snapshot.arb_state_issue_flat = EXTRACT_BITS(arb_state, 2, 2);
|
||||
ret.snapshot.arb_state_issue_exp = EXTRACT_BITS(arb_state, 1, 1);
|
||||
ret.snapshot.arb_state_issue_misc = EXTRACT_BITS(arb_state, 0, 0);
|
||||
|
||||
ret.snapshot.arb_state_stall_valu = EXTRACT_BITS(arb_state, 15, 15);
|
||||
ret.snapshot.arb_state_stall_matrix = EXTRACT_BITS(arb_state, 14, 14);
|
||||
ret.snapshot.arb_state_stall_lds = EXTRACT_BITS(arb_state, 11, 11);
|
||||
ret.snapshot.arb_state_stall_scalar = EXTRACT_BITS(arb_state, 13, 13);
|
||||
ret.snapshot.arb_state_stall_vmem_tex = EXTRACT_BITS(arb_state, 12, 12);
|
||||
ret.snapshot.arb_state_stall_flat = EXTRACT_BITS(arb_state, 10, 10);
|
||||
ret.snapshot.arb_state_stall_exp = EXTRACT_BITS(arb_state, 9, 9);
|
||||
ret.snapshot.arb_state_stall_misc = EXTRACT_BITS(arb_state, 8, 8);
|
||||
|
||||
// Extracting data from the perf_snapshot_data1 register
|
||||
// Active waves on CU at the moment of sampling
|
||||
ret.wave_count = EXTRACT_BITS(sample_.perf_snapshot_data1, 5, 0);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -309,3 +347,5 @@ copySample<GFX11, rocprofiler_pc_sampling_record_stochastic_v0_t>(const void* sa
|
||||
// ret.wave_count = sample_.perf_snapshot_data1 & 0x3F;
|
||||
return ret;
|
||||
}
|
||||
|
||||
#undef EXTRACT_BITS
|
||||
|
||||
@@ -263,6 +263,7 @@ flush_internal_agent_buffers(rocprofiler_buffer_id_t buffer_id)
|
||||
auto* service = get_configured_pc_sampling_service().load();
|
||||
if(service && ctx->pc_sampler.get() == service)
|
||||
{
|
||||
rocprofiler_status_t status = ROCPROFILER_STATUS_SUCCESS;
|
||||
// The context `ctx` (that holds the buffer with `buffer_id`)
|
||||
// is the one containing PC sampling service.
|
||||
// The HSA interception table is registered.
|
||||
@@ -272,7 +273,10 @@ flush_internal_agent_buffers(rocprofiler_buffer_id_t buffer_id)
|
||||
if(agent_session->buffer_id.handle == buffer_id.handle)
|
||||
{
|
||||
// Flush internal PC sampling buffers filled by the agent
|
||||
return hsa::flush_internal_agent_buffers(agent_session.get());
|
||||
// NOTE: one rocprofiler-SDK PC sampling buffer can be tied
|
||||
// to multiple agent (agent sessions).
|
||||
status = hsa::flush_internal_agent_buffers(agent_session.get());
|
||||
if(status != ROCPROFILER_STATUS_SUCCESS) return status;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -281,6 +285,41 @@ flush_internal_agent_buffers(rocprofiler_buffer_id_t buffer_id)
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
rocprofiler_status_t
|
||||
flush_all_agent_buffers()
|
||||
{
|
||||
auto* service = get_configured_pc_sampling_service().load();
|
||||
if(!service) return ROCPROFILER_STATUS_ERROR;
|
||||
|
||||
rocprofiler_status_t status = ROCPROFILER_STATUS_SUCCESS;
|
||||
// Loop over all agents that have PC sampling service configured
|
||||
// and drain their internal buffers.
|
||||
// NOTE: one SDK buffer can consume data from multiple agents
|
||||
// (multiple HSA runtime buffers)
|
||||
for(const auto& [_, agent_session] : service->agent_sessions)
|
||||
{
|
||||
status = flush_internal_agent_buffers(agent_session->buffer_id);
|
||||
if(status != ROCPROFILER_STATUS_SUCCESS)
|
||||
{
|
||||
ROCP_ERROR << "Failed to flush internal HSA buffers tied to rocp buffer "
|
||||
<< agent_session->buffer_id.handle;
|
||||
}
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
void
|
||||
service_sync()
|
||||
{
|
||||
flush_all_agent_buffers();
|
||||
}
|
||||
|
||||
void
|
||||
service_fini()
|
||||
{
|
||||
flush_all_agent_buffers();
|
||||
}
|
||||
|
||||
} // namespace pc_sampling
|
||||
} // namespace rocprofiler
|
||||
|
||||
|
||||
@@ -67,6 +67,12 @@ is_pc_sample_service_configured(rocprofiler_agent_id_t agent_id);
|
||||
|
||||
rocprofiler_status_t
|
||||
flush_internal_agent_buffers(rocprofiler_buffer_id_t buffer_id);
|
||||
|
||||
void
|
||||
service_sync();
|
||||
|
||||
void
|
||||
service_fini();
|
||||
} // namespace pc_sampling
|
||||
} // namespace rocprofiler
|
||||
|
||||
|
||||
+66
-14
@@ -138,8 +138,9 @@ find_all_gpu_agents_supporting_pc_sampling_impl(rocprofiler_agent_version_t vers
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
const rocprofiler_pc_sampling_configuration_t
|
||||
extract_pc_sampling_config_prefer_stochastic(rocprofiler_agent_id_t agent_id)
|
||||
rocprofiler_pc_sampling_configuration_t
|
||||
extract_pc_sampling_config_prefer(rocprofiler_pc_sampling_method_t method,
|
||||
rocprofiler_agent_id_t agent_id)
|
||||
{
|
||||
auto cb = [](const rocprofiler_pc_sampling_configuration_t* configs,
|
||||
size_t num_config,
|
||||
@@ -158,31 +159,46 @@ extract_pc_sampling_config_prefer_stochastic(rocprofiler_agent_id_t agent_id)
|
||||
ROCPROFILER_CALL(rocprofiler_query_pc_sampling_agent_configurations(agent_id, cb, &configs),
|
||||
"Failed to query available configurations");
|
||||
|
||||
const rocprofiler_pc_sampling_configuration_t* first_host_trap_config = nullptr;
|
||||
const rocprofiler_pc_sampling_configuration_t* first_stochastic_config = nullptr;
|
||||
// Search until encountering on the stochastic configuration, if any.
|
||||
// Otherwise, use the host trap config
|
||||
const rocprofiler_pc_sampling_configuration_t* first_preferred_method_config = nullptr;
|
||||
const rocprofiler_pc_sampling_configuration_t* first_remained_method_config = nullptr;
|
||||
// Search until encountering the prefered method configuration, if any.
|
||||
// Otherwise, use what remained.
|
||||
for(auto const& cfg : configs)
|
||||
{
|
||||
if(cfg.method == ROCPROFILER_PC_SAMPLING_METHOD_STOCHASTIC)
|
||||
if(cfg.method == method)
|
||||
{
|
||||
// Temporarily disable stochastic sampling as it's not fully supported.
|
||||
// first_stochastic_config = &cfg;
|
||||
// break;
|
||||
first_preferred_method_config = &cfg;
|
||||
break;
|
||||
}
|
||||
else if(!first_host_trap_config && cfg.method == ROCPROFILER_PC_SAMPLING_METHOD_HOST_TRAP)
|
||||
else if(!first_remained_method_config &&
|
||||
cfg.method != ROCPROFILER_PC_SAMPLING_METHOD_NONE &&
|
||||
cfg.method != ROCPROFILER_PC_SAMPLING_METHOD_LAST)
|
||||
{
|
||||
first_host_trap_config = &cfg;
|
||||
first_remained_method_config = &cfg;
|
||||
}
|
||||
}
|
||||
|
||||
// Check if the stochastic config is found. Use host trap config otherwise.
|
||||
// Check if the config with the preferred method is found. Use config with other method
|
||||
// otherwise.
|
||||
const rocprofiler_pc_sampling_configuration_t* picked_cfg =
|
||||
(first_stochastic_config != nullptr) ? first_stochastic_config : first_host_trap_config;
|
||||
(first_preferred_method_config != nullptr) ? first_preferred_method_config
|
||||
: first_remained_method_config;
|
||||
|
||||
return *picked_cfg;
|
||||
}
|
||||
|
||||
rocprofiler_pc_sampling_configuration_t
|
||||
extract_pc_sampling_config_prefer_stochastic(rocprofiler_agent_id_t agent_id)
|
||||
{
|
||||
return extract_pc_sampling_config_prefer(ROCPROFILER_PC_SAMPLING_METHOD_STOCHASTIC, agent_id);
|
||||
}
|
||||
|
||||
rocprofiler_pc_sampling_configuration_t
|
||||
extract_pc_sampling_config_prefer_host_trap(rocprofiler_agent_id_t agent_id)
|
||||
{
|
||||
return extract_pc_sampling_config_prefer(ROCPROFILER_PC_SAMPLING_METHOD_HOST_TRAP, agent_id);
|
||||
}
|
||||
|
||||
void
|
||||
rocprofiler_pc_sampling_callback(rocprofiler_context_id_t /*context_id*/,
|
||||
rocprofiler_buffer_id_t /*buffer_id*/,
|
||||
@@ -306,6 +322,41 @@ test_fail_because_service_is_already_configured(
|
||||
ROCPROFILER_STATUS_ERROR_SERVICE_ALREADY_CONFIGURED);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Current limitation - Stochastic and Host-Trap PC sampling cannot coexist
|
||||
* on the same device simultaneously.
|
||||
*/
|
||||
void
|
||||
test_fail_stochastic_vs_host_trap(const callback_data* cb_data,
|
||||
rocprofiler_agent_id_t agent_id,
|
||||
const rocprofiler_pc_sampling_configuration_t* picked_pcs_config)
|
||||
{
|
||||
// Ensure that stochastic sampling has been configured on the device.
|
||||
if(picked_pcs_config->method == ROCPROFILER_PC_SAMPLING_METHOD_STOCHASTIC)
|
||||
{
|
||||
// KFD is implemented in the way that if stochastic is configured,
|
||||
// no host-trap configuration will be returned (and vice-versa).
|
||||
// Thus, ensure that the following function, although prefers host-trap,
|
||||
// returns stochastic.
|
||||
auto still_stochastic_config = extract_pc_sampling_config_prefer_host_trap(agent_id);
|
||||
EXPECT_EQ(still_stochastic_config.method, ROCPROFILER_PC_SAMPLING_METHOD_STOCHASTIC);
|
||||
|
||||
constexpr uint64_t host_trap_interva_us = 1;
|
||||
// Now, ensure that a user cannot still force rocprofiler-sdk and configure host-trap
|
||||
// sampling on the device with configured stochastic sampling.
|
||||
// ensure that stochastic and host trap sampling cannot coexist on the same device.
|
||||
EXPECT_EQ(
|
||||
rocprofiler_configure_pc_sampling_service(cb_data->client_ctx,
|
||||
agent_id,
|
||||
ROCPROFILER_PC_SAMPLING_METHOD_HOST_TRAP,
|
||||
ROCPROFILER_PC_SAMPLING_UNIT_TIME,
|
||||
host_trap_interva_us,
|
||||
cb_data->client_buffer,
|
||||
0),
|
||||
ROCPROFILER_STATUS_ERROR_SERVICE_ALREADY_CONFIGURED);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
TEST(pc_sampling, rocprofiler_configure_pc_sampling_service)
|
||||
@@ -388,6 +439,7 @@ TEST(pc_sampling, rocprofiler_configure_pc_sampling_service)
|
||||
"Failed to configure PC sampling service");
|
||||
|
||||
test_fail_because_service_is_already_configured(cb_data, agent_id, &pcs_config);
|
||||
test_fail_stochastic_vs_host_trap(cb_data, agent_id, &pcs_config);
|
||||
|
||||
// Cannot create PC sampling service in context different than the `cb_data->client_ctx`
|
||||
EXPECT_EQ(rocprofiler_configure_pc_sampling_service(another_ctx,
|
||||
|
||||
@@ -601,6 +601,7 @@ invoke_client_finalizer(rocprofiler_client_id_t client_id)
|
||||
|
||||
hsa::async_copy_sync();
|
||||
hsa::queue_controller_sync();
|
||||
pc_sampling::service_sync();
|
||||
|
||||
auto _fini_status = get_fini_status();
|
||||
if(_fini_status == 0) set_fini_status(-1);
|
||||
@@ -726,6 +727,8 @@ finalize()
|
||||
#if ROCPROFILER_SDK_HSA_PC_SAMPLING > 0
|
||||
// WARNING: this must precede `code_object::finalize()`
|
||||
pc_sampling::code_object::finalize();
|
||||
// WARNING: this must follows queue_controller_fini.
|
||||
pc_sampling::service_fini();
|
||||
#endif
|
||||
code_object::finalize();
|
||||
context::correlation_id_finalize();
|
||||
|
||||
@@ -44,14 +44,13 @@ namespace
|
||||
{
|
||||
struct FlatProfiler
|
||||
{
|
||||
public:
|
||||
FlatProfiler() = default;
|
||||
~FlatProfiler() = default;
|
||||
|
||||
CodeobjAddressTranslate translator;
|
||||
KernelObjectMap kernel_object_map;
|
||||
FlatProfile flat_profile;
|
||||
std::mutex global_mut;
|
||||
CodeobjAddressTranslate translator = {};
|
||||
KernelObjectMap kernel_object_map = {};
|
||||
FlatProfile flat_profile = {};
|
||||
std::mutex global_mut = {};
|
||||
};
|
||||
} // namespace
|
||||
|
||||
@@ -68,6 +67,7 @@ void
|
||||
fini()
|
||||
{
|
||||
delete flat_profiler;
|
||||
flat_profiler = nullptr;
|
||||
}
|
||||
|
||||
CodeobjAddressTranslate&
|
||||
@@ -186,15 +186,19 @@ dump_flat_profile()
|
||||
ss << "====================================\n" << std::endl;
|
||||
});
|
||||
|
||||
ss << "The total number of decoded samples: " << samples_num << std::endl;
|
||||
ss << "The total number of collected samples: " << client::pcs::total_samples_num()
|
||||
ss << "The total number of valid decoded samples: "
|
||||
<< flat_profile.get_valid_decoded_samples_num() << std::endl;
|
||||
ss << "The total number of invalid samples : " << flat_profile.get_invalid_samples_num()
|
||||
<< std::endl;
|
||||
|
||||
*utils::get_output_stream() << ss.str() << std::endl;
|
||||
|
||||
assert(samples_num == client::pcs::total_samples_num());
|
||||
// We expect at least one PC sample to be decoded/delivered;
|
||||
assert(samples_num > 0);
|
||||
utils::pcs_assert(
|
||||
samples_num == flat_profile.get_valid_decoded_samples_num(),
|
||||
"Number of collected valid samples different than the number of decoded samples.");
|
||||
utils::pcs_assert(samples_num > 0, "No valid samples collected/decoded.");
|
||||
utils::pcs_assert(flat_profile.more_valid_decoded_samples_expected(),
|
||||
"More invalid samples observed.");
|
||||
}
|
||||
|
||||
} // namespace address_translation
|
||||
|
||||
@@ -25,6 +25,7 @@
|
||||
#include <rocprofiler-sdk/cxx/codeobj/code_printing.hpp>
|
||||
|
||||
#include <algorithm>
|
||||
#include <atomic>
|
||||
#include <cassert>
|
||||
#include <functional>
|
||||
#include <map>
|
||||
@@ -47,8 +48,8 @@ using marker_id_t = rocprofiler::sdk::codeobj::disassembly::marker_i
|
||||
*/
|
||||
struct inst_id_t
|
||||
{
|
||||
marker_id_t code_object_id;
|
||||
uint64_t pc_addr;
|
||||
marker_id_t code_object_id = 0;
|
||||
uint64_t pc_addr = 0;
|
||||
|
||||
bool operator==(const inst_id_t& b) const
|
||||
{
|
||||
@@ -97,12 +98,12 @@ public:
|
||||
uint64_t end_address() const { return end_address_; };
|
||||
|
||||
private:
|
||||
mutable std::shared_mutex mut;
|
||||
uint64_t code_object_id_;
|
||||
std::string kernel_name_;
|
||||
uint64_t begin_address_;
|
||||
uint64_t end_address_;
|
||||
std::vector<std::unique_ptr<Instruction>> instructions_;
|
||||
mutable std::shared_mutex mut = {};
|
||||
uint64_t code_object_id_ = 0;
|
||||
std::string kernel_name_ = {};
|
||||
uint64_t begin_address_ = 0;
|
||||
uint64_t end_address_ = 0;
|
||||
std::vector<std::unique_ptr<Instruction>> instructions_ = {};
|
||||
};
|
||||
|
||||
class KernelObjectMap
|
||||
@@ -156,8 +157,8 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
std::unordered_map<std::string, std::unique_ptr<KernelObject>> kernel_object_map;
|
||||
mutable std::shared_mutex mut;
|
||||
std::unordered_map<std::string, std::unique_ptr<KernelObject>> kernel_object_map = {};
|
||||
mutable std::shared_mutex mut = {};
|
||||
|
||||
std::string form_key(uint64_t code_object_id, std::string kernel_name, uint64_t begin_address)
|
||||
{
|
||||
@@ -206,14 +207,14 @@ public:
|
||||
uint64_t sample_count() const { return sample_count_; };
|
||||
|
||||
private:
|
||||
mutable std::shared_mutex mut;
|
||||
mutable std::shared_mutex mut = {};
|
||||
|
||||
// FIXME: prevent direct access of the following fields.
|
||||
// The following fields should be accessible only from within `process` function.
|
||||
std::unique_ptr<Instruction> inst_;
|
||||
std::unique_ptr<Instruction> inst_ = {};
|
||||
// In case an instruction is samples with different exec masks,
|
||||
// keep track of how many time each exec_mask was observed.
|
||||
std::map<uint64_t, uint64_t> exec_mask_counts_;
|
||||
std::map<uint64_t, uint64_t> exec_mask_counts_ = {};
|
||||
// How many time this instruction is samples
|
||||
uint64_t sample_count_ = 0;
|
||||
};
|
||||
@@ -226,6 +227,8 @@ public:
|
||||
// write lock required
|
||||
void add_sample(std::unique_ptr<Instruction> instruction, uint64_t exec_mask)
|
||||
{
|
||||
// counting valid decoded samples
|
||||
valid_decoded_samples_num++;
|
||||
auto lock = std::unique_lock{mut};
|
||||
|
||||
inst_id_t inst_id = {.code_object_id = instruction->codeobj_id,
|
||||
@@ -256,10 +259,30 @@ public:
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void add_invalid_sample()
|
||||
{
|
||||
// counting invalid samples
|
||||
invalid_decoded_samples_num++;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Verify that more valid decoded samples is generated.
|
||||
*/
|
||||
bool more_valid_decoded_samples_expected() const
|
||||
{
|
||||
return valid_decoded_samples_num > invalid_decoded_samples_num;
|
||||
}
|
||||
|
||||
uint64_t get_valid_decoded_samples_num() const { return valid_decoded_samples_num; }
|
||||
|
||||
uint64_t get_invalid_samples_num() const { return invalid_decoded_samples_num; }
|
||||
|
||||
private:
|
||||
// TODO: optimize to use unordered_map
|
||||
std::map<inst_id_t, std::unique_ptr<SampleInstruction>> samples;
|
||||
mutable std::shared_mutex mut;
|
||||
std::map<inst_id_t, std::unique_ptr<SampleInstruction>> samples = {};
|
||||
std::atomic<uint64_t> valid_decoded_samples_num = {};
|
||||
std::atomic<uint64_t> invalid_decoded_samples_num = {};
|
||||
mutable std::shared_mutex mut = {};
|
||||
};
|
||||
|
||||
std::mutex&
|
||||
|
||||
@@ -53,6 +53,11 @@ using avail_configs_vec_t = std::vector<rocprofiler_pc_sampling_configur
|
||||
using tool_agent_info_vec_t = std::vector<std::unique_ptr<tool_agent_info>>;
|
||||
using pc_sampling_buffer_id_vec_t = std::vector<rocprofiler_buffer_id_t>;
|
||||
|
||||
namespace
|
||||
{
|
||||
constexpr uint64_t stochastic_interval = 1048576; // 2 ^ 20 cycles
|
||||
} // namespace
|
||||
|
||||
struct tool_agent_info
|
||||
{
|
||||
rocprofiler_agent_id_t agent_id;
|
||||
@@ -79,16 +84,14 @@ public:
|
||||
}
|
||||
|
||||
// GPU agents supporting PC sampling
|
||||
tool_agent_info_vec_t gpu_agents;
|
||||
// The total number of collected samples
|
||||
std::atomic<uint64_t> total_samples_num{0};
|
||||
tool_agent_info_vec_t gpu_agents = {};
|
||||
// ROCProfiler-SDK PC sampling buffers
|
||||
pc_sampling_buffer_id_vec_t buffer_ids;
|
||||
pc_sampling_buffer_id_vec_t buffer_ids = {};
|
||||
// The set that keeps track of reported code object loading/unloading events.
|
||||
// At the end of the test, the sets needs to be empty.
|
||||
// Namely, each loading event will insert a code object id into the set,
|
||||
// while each unloading event will delete a code ojbect id from the set.
|
||||
code_object_id_set_t active_code_objects;
|
||||
code_object_id_set_t active_code_objects = {};
|
||||
};
|
||||
|
||||
// The reason for using raw pointers is the following.
|
||||
@@ -139,7 +142,7 @@ find_all_gpu_agents_supporting_pc_sampling_impl(rocprofiler_agent_version_t vers
|
||||
<< "type=" << _agents[i]->type << "\n";
|
||||
}
|
||||
|
||||
*utils::get_output_stream() << ss.str() << std::endl;
|
||||
*utils::get_output_stream() << ss.str() << "\n";
|
||||
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
}
|
||||
@@ -188,8 +191,8 @@ query_avail_configs_for_agent(tool_agent_info* agent_info)
|
||||
{
|
||||
// The query operation failed, so consider the PC sampling is unsupported at the agent.
|
||||
// This can happen if the PC sampling service is invoked within the ROCgdb.
|
||||
ss << "Querying PC sampling capabilities failed with status: " << status << std::endl;
|
||||
*utils::get_output_stream() << ss.str() << std::endl;
|
||||
ss << "Querying PC sampling capabilities failed with status: " << status << "\n";
|
||||
*utils::get_output_stream() << ss.str() << "\n";
|
||||
return false;
|
||||
}
|
||||
else if(agent_info->avail_configs->size() == 0)
|
||||
@@ -199,7 +202,8 @@ query_avail_configs_for_agent(tool_agent_info* agent_info)
|
||||
}
|
||||
|
||||
ss << "The agent with the id: " << agent_info->agent_id.handle << " supports the "
|
||||
<< agent_info->avail_configs->size() << " configurations: " << std::endl;
|
||||
<< agent_info->avail_configs->size() << " configurations: "
|
||||
<< "\n";
|
||||
size_t ind = 0;
|
||||
for(auto& cfg : *agent_info->avail_configs)
|
||||
{
|
||||
@@ -208,7 +212,11 @@ query_avail_configs_for_agent(tool_agent_info* agent_info)
|
||||
<< "unit: " << cfg.unit << ", "
|
||||
<< "min_interval: " << cfg.min_interval << ", "
|
||||
<< "max_interval: " << cfg.max_interval << ", "
|
||||
<< "flags: " << std::hex << cfg.flags << std::dec << std::endl;
|
||||
<< "flags: " << std::hex << cfg.flags << std::dec
|
||||
<< ((cfg.flags == ROCPROFILER_PC_SAMPLING_CONFIGURATION_FLAGS_INTERVAL_POW2)
|
||||
? " (an interval value must be power of 2)"
|
||||
: "")
|
||||
<< "\n";
|
||||
}
|
||||
|
||||
*utils::get_output_stream() << ss.str() << std::flush;
|
||||
@@ -221,8 +229,9 @@ configure_pc_sampling_prefer_stochastic(tool_agent_info* agent_info,
|
||||
rocprofiler_context_id_t context_id,
|
||||
rocprofiler_buffer_id_t buffer_id)
|
||||
{
|
||||
int failures = MAX_FAILURES;
|
||||
size_t interval = 0;
|
||||
auto stochastic_picked = false;
|
||||
int failures = MAX_FAILURES;
|
||||
size_t interval = 0;
|
||||
do
|
||||
{
|
||||
// Update the list of available configurations
|
||||
@@ -245,9 +254,9 @@ configure_pc_sampling_prefer_stochastic(tool_agent_info* agent_info,
|
||||
{
|
||||
if(cfg.method == ROCPROFILER_PC_SAMPLING_METHOD_STOCHASTIC)
|
||||
{
|
||||
// Temporarily disable stochastic sampling as it's not fully supported.
|
||||
// first_stochastic_config = &cfg;
|
||||
// break;
|
||||
first_stochastic_config = &cfg;
|
||||
stochastic_picked = true;
|
||||
break;
|
||||
}
|
||||
else if(!first_host_trap_config &&
|
||||
cfg.method == ROCPROFILER_PC_SAMPLING_METHOD_HOST_TRAP)
|
||||
@@ -260,7 +269,7 @@ configure_pc_sampling_prefer_stochastic(tool_agent_info* agent_info,
|
||||
const rocprofiler_pc_sampling_configuration_t* picked_cfg =
|
||||
(first_stochastic_config != nullptr) ? first_stochastic_config : first_host_trap_config;
|
||||
|
||||
interval = picked_cfg->min_interval;
|
||||
interval = (stochastic_picked) ? stochastic_interval : picked_cfg->min_interval;
|
||||
|
||||
auto status = rocprofiler_configure_pc_sampling_service(context_id,
|
||||
agent_info->agent_id,
|
||||
@@ -272,8 +281,10 @@ configure_pc_sampling_prefer_stochastic(tool_agent_info* agent_info,
|
||||
if(status == ROCPROFILER_STATUS_SUCCESS)
|
||||
{
|
||||
*utils::get_output_stream()
|
||||
<< ">>> We chose PC sampling interval: " << interval
|
||||
<< " on the agent: " << agent_info->agent->id.handle << std::endl;
|
||||
<< ">>> We chose " << (stochastic_picked ? "stochastic" : "Host-Trap")
|
||||
<< " PC sampling with the interval: " << interval << " "
|
||||
<< (stochastic_picked ? "clock-cycles" : "micro seconds")
|
||||
<< " on the agent: " << agent_info->agent->id.handle << "\n";
|
||||
return;
|
||||
}
|
||||
else if(status != ROCPROFILER_STATUS_ERROR_NOT_AVAILABLE)
|
||||
@@ -301,6 +312,106 @@ configure_pc_sampling_prefer_stochastic(tool_agent_info* agent_info,
|
||||
"Failed too many times configuring PC sampling service");
|
||||
}
|
||||
|
||||
template <typename PcSamplingRecordT>
|
||||
void
|
||||
print_sample_common_fields(std::ostream& os, const PcSamplingRecordT* pc_sample)
|
||||
{
|
||||
os << "(code_obj_id, offset): (" << pc_sample->pc.code_object_id << ", 0x" << std::hex
|
||||
<< pc_sample->pc.code_object_offset << "), "
|
||||
<< "timestamp: " << std::dec << pc_sample->timestamp << ", "
|
||||
<< "exec: " << std::hex << std::setw(16) << pc_sample->exec_mask << ", "
|
||||
<< "workgroup_id_(x=" << std::dec << std::setw(5) << pc_sample->workgroup_id.x << ", "
|
||||
<< "y=" << std::setw(5) << pc_sample->workgroup_id.y << ", "
|
||||
<< "z=" << std::setw(5) << pc_sample->workgroup_id.z << "), "
|
||||
<< "wave_in_group: " << std::setw(2) << static_cast<unsigned int>(pc_sample->wave_in_group)
|
||||
<< ", "
|
||||
<< "chiplet: " << std::setw(2) << static_cast<unsigned int>(pc_sample->hw_id.chiplet) << ", "
|
||||
<< "dispatch_id: " << std::setw(7) << pc_sample->dispatch_id << ","
|
||||
<< "correlation: {internal=" << std::setw(7) << pc_sample->correlation_id.internal << ", "
|
||||
<< "external=" << std::setw(5) << pc_sample->correlation_id.external.value << "}, ";
|
||||
}
|
||||
|
||||
void
|
||||
print_sample(std::ostream& os, const rocprofiler_pc_sampling_record_host_trap_v0_t* sample)
|
||||
{
|
||||
print_sample_common_fields(os, sample);
|
||||
os << "\n";
|
||||
}
|
||||
|
||||
void
|
||||
print_sample(std::ostream& os, const rocprofiler_pc_sampling_record_stochastic_v0_t* sample)
|
||||
{
|
||||
print_sample_common_fields(os, sample);
|
||||
|
||||
if(sample->wave_issued)
|
||||
{
|
||||
auto* inst_c_str = rocprofiler_get_pc_sampling_instruction_type_name(
|
||||
static_cast<rocprofiler_pc_sampling_instruction_type_t>(sample->inst_type));
|
||||
utils::pcs_assert(inst_c_str != nullptr, "Invalid instruction type");
|
||||
os << "wave issued " << std::string(inst_c_str) << " instruction, ";
|
||||
}
|
||||
else
|
||||
{
|
||||
auto* reason_c_str = rocprofiler_get_pc_sampling_instruction_not_issued_reason_name(
|
||||
static_cast<rocprofiler_pc_sampling_instruction_not_issued_reason_t>(
|
||||
sample->snapshot.reason_not_issued));
|
||||
utils::pcs_assert(reason_c_str != nullptr, "Invalid not issued reason");
|
||||
os << "wave is stalled due to: " << std::string(reason_c_str) << " reason, ";
|
||||
}
|
||||
|
||||
auto snapshot = sample->snapshot;
|
||||
os << "two VALU instructions issued: " << static_cast<unsigned int>(snapshot.dual_issue_valu)
|
||||
<< ", ";
|
||||
|
||||
os << "arbiter state: {pipe issued: ("
|
||||
<< "VALU: " << static_cast<unsigned int>(snapshot.arb_state_issue_valu) << ", "
|
||||
<< "MATRIX: " << static_cast<unsigned int>(snapshot.arb_state_issue_matrix) << ", "
|
||||
<< "LDS: " << static_cast<unsigned int>(snapshot.arb_state_issue_lds) << ", "
|
||||
<< "LDS_DIRECT: " << static_cast<unsigned int>(snapshot.arb_state_issue_lds_direct) << ", "
|
||||
<< "SCALAR: " << static_cast<unsigned int>(snapshot.arb_state_issue_scalar) << ", "
|
||||
<< "TEX: " << static_cast<unsigned int>(snapshot.arb_state_issue_vmem_tex) << ", "
|
||||
<< "FLAT: " << static_cast<unsigned int>(snapshot.arb_state_issue_flat) << ", "
|
||||
<< "EXPORT: " << static_cast<unsigned int>(snapshot.arb_state_issue_exp) << ", "
|
||||
<< "MISC: " << static_cast<unsigned int>(snapshot.arb_state_issue_misc) << "), "
|
||||
<< "pipe stalled: ("
|
||||
<< "VALU: " << static_cast<unsigned int>(snapshot.arb_state_stall_valu) << ", "
|
||||
<< "MATRIX: " << static_cast<unsigned int>(snapshot.arb_state_stall_matrix) << ", "
|
||||
<< "LDS: " << static_cast<unsigned int>(snapshot.arb_state_stall_lds) << ", "
|
||||
<< "LDS_DIRECT: " << static_cast<unsigned int>(snapshot.arb_state_stall_lds_direct) << ", "
|
||||
<< "SCALAR: " << static_cast<unsigned int>(snapshot.arb_state_stall_scalar) << ", "
|
||||
<< "TEX: " << static_cast<unsigned int>(snapshot.arb_state_stall_vmem_tex) << ", "
|
||||
<< "FLAT: " << static_cast<unsigned int>(snapshot.arb_state_stall_flat) << ", "
|
||||
<< "EXPORT: " << static_cast<unsigned int>(snapshot.arb_state_stall_exp) << ", "
|
||||
<< "MISC: " << static_cast<unsigned int>(snapshot.arb_state_stall_misc) << ")}";
|
||||
|
||||
os << "\n";
|
||||
}
|
||||
|
||||
template <typename PcSamplingRecordT>
|
||||
static inline void
|
||||
process_sample(const PcSamplingRecordT* pc_sample,
|
||||
address_translation::CodeobjAddressTranslate& translator,
|
||||
address_translation::FlatProfile& flat_profile)
|
||||
{
|
||||
// Ignore samples from blit kernels or self-modifying code.
|
||||
if(pc_sample->correlation_id.internal == ROCPROFILER_CORRELATION_ID_INTERNAL_NONE) return;
|
||||
|
||||
auto corr_id = pc_sample->correlation_id;
|
||||
// Internal correlation IDs are generated by the ROCProfiler-SDK for
|
||||
// kernel dispatches only. Similarly, the test tool generate external
|
||||
// correlation IDs for the kernel dispatches only.
|
||||
// Thus, we should expect them to be equal.
|
||||
assert(corr_id.internal == corr_id.external.value);
|
||||
assert(corr_id.external.value > 0);
|
||||
|
||||
// Decoding the PC
|
||||
auto inst = translator.get(pc_sample->pc.code_object_id, pc_sample->pc.code_object_offset);
|
||||
flat_profile.add_sample(std::move(inst), pc_sample->exec_mask);
|
||||
|
||||
// TODO: introduce checks specific to stochastic sampling
|
||||
// TODO: print an instruction inside print_sample
|
||||
}
|
||||
|
||||
void
|
||||
rocprofiler_pc_sampling_callback(rocprofiler_context_id_t /*context_id*/,
|
||||
rocprofiler_buffer_id_t /*buffer_id*/,
|
||||
@@ -311,7 +422,7 @@ rocprofiler_pc_sampling_callback(rocprofiler_context_id_t /*context_id*/,
|
||||
{
|
||||
std::stringstream ss;
|
||||
ss << "The number of delivered samples is: " << num_headers << ", "
|
||||
<< "while the number of dropped samples is: " << drop_count << std::endl;
|
||||
<< "while the number of dropped samples is: " << drop_count << "\n";
|
||||
|
||||
auto& flat_profile = client::address_translation::get_flat_profile();
|
||||
auto& translator = client::address_translation::get_address_translator();
|
||||
@@ -340,48 +451,26 @@ rocprofiler_pc_sampling_callback(rocprofiler_context_id_t /*context_id*/,
|
||||
{
|
||||
auto* pc_sample = static_cast<rocprofiler_pc_sampling_record_host_trap_v0_t*>(
|
||||
cur_header->payload);
|
||||
|
||||
ss << "(code_obj_id, offset): (" << pc_sample->pc.code_object_id << ", 0x"
|
||||
<< std::hex << pc_sample->pc.code_object_offset << "), "
|
||||
<< "timestamp: " << std::dec << pc_sample->timestamp << ", "
|
||||
<< "exec: " << std::hex << std::setw(16) << pc_sample->exec_mask << ", "
|
||||
<< "workgroup_id_(x=" << std::dec << std::setw(5)
|
||||
<< pc_sample->workgroup_id.x << ", "
|
||||
<< "y=" << std::setw(5) << pc_sample->workgroup_id.y << ", "
|
||||
<< "z=" << std::setw(5) << pc_sample->workgroup_id.z << "), "
|
||||
<< "wave_in_group: " << std::setw(2)
|
||||
<< static_cast<unsigned int>(pc_sample->wave_in_group) << ", "
|
||||
<< "chiplet: " << std::setw(2)
|
||||
<< static_cast<unsigned int>(pc_sample->hw_id.chiplet) << ", "
|
||||
<< "dispatch_id: " << std::setw(7) << pc_sample->dispatch_id << ","
|
||||
<< "correlation: {internal=" << std::setw(7)
|
||||
<< pc_sample->correlation_id.internal << ", "
|
||||
<< "external=" << std::setw(5) << pc_sample->correlation_id.external.value
|
||||
<< "}" << std::endl;
|
||||
|
||||
// Ignore samples from blit kernels.
|
||||
if(pc_sample->correlation_id.internal ==
|
||||
ROCPROFILER_CORRELATION_ID_INTERNAL_NONE)
|
||||
continue;
|
||||
|
||||
total_samples_num() += 1;
|
||||
|
||||
auto corr_id = pc_sample->correlation_id;
|
||||
// Internal correlation IDs are generated by the ROCProfiler-SDK for
|
||||
// kernel dispatches only. Similarly, the test tool generate external
|
||||
// correlation IDs for the kernel dispatches only.
|
||||
// Thus, we should expect them to be equal.
|
||||
assert(corr_id.internal == corr_id.external.value);
|
||||
assert(corr_id.external.value > 0);
|
||||
|
||||
// Decoding the PC
|
||||
auto inst = translator.get(pc_sample->pc.code_object_id,
|
||||
pc_sample->pc.code_object_offset);
|
||||
flat_profile.add_sample(std::move(inst), pc_sample->exec_mask);
|
||||
print_sample(ss, pc_sample);
|
||||
process_sample(pc_sample, translator, flat_profile);
|
||||
}
|
||||
else if(cur_header->kind == ROCPROFILER_PC_SAMPLING_RECORD_STOCHASTIC_V0_SAMPLE)
|
||||
{
|
||||
auto* pc_sample = static_cast<rocprofiler_pc_sampling_record_stochastic_v0_t*>(
|
||||
cur_header->payload);
|
||||
print_sample(ss, pc_sample);
|
||||
process_sample(pc_sample, translator, flat_profile);
|
||||
}
|
||||
else if(cur_header->kind == ROCPROFILER_PC_SAMPLING_RECORD_INVALID_SAMPLE)
|
||||
{
|
||||
// tracking number of invalid samples
|
||||
flat_profile.add_invalid_sample();
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(false);
|
||||
std::cerr << "Unexpected kind of PC sampling record: " << cur_header->kind
|
||||
<< "\n";
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
else
|
||||
@@ -391,7 +480,7 @@ rocprofiler_pc_sampling_callback(rocprofiler_context_id_t /*context_id*/,
|
||||
}
|
||||
|
||||
// TODO: do we need some sync here?
|
||||
*utils::get_output_stream() << ss.str() << std::endl;
|
||||
*utils::get_output_stream() << ss.str() << "\n";
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
@@ -406,12 +495,7 @@ void
|
||||
fini()
|
||||
{
|
||||
delete pc_sampler;
|
||||
}
|
||||
|
||||
std::atomic<uint64_t>&
|
||||
total_samples_num()
|
||||
{
|
||||
return pc_sampler->total_samples_num;
|
||||
pc_sampler = nullptr;
|
||||
}
|
||||
|
||||
void
|
||||
@@ -421,9 +505,11 @@ configure_pc_sampling_on_all_agents(rocprofiler_context_id_t context)
|
||||
|
||||
if(pc_sampler->gpu_agents.empty())
|
||||
{
|
||||
*utils::get_output_stream() << "No availabe gpu agents supporting PC sampling" << std::endl;
|
||||
*utils::get_output_stream() << "No availabe gpu agents supporting PC sampling"
|
||||
<< "\n";
|
||||
// Emit the message to skip the test.
|
||||
std::cerr << "PC sampling unavailable" << std::endl;
|
||||
std::cerr << "PC sampling unavailable"
|
||||
<< "\n";
|
||||
// Exit with no error if none of the GPUs support PC sampling.
|
||||
exit(0);
|
||||
}
|
||||
@@ -481,7 +567,8 @@ flush_and_destroy_buffers()
|
||||
if(status == ROCPROFILER_STATUS_ERROR_BUFFER_BUSY)
|
||||
{
|
||||
*utils::get_output_stream()
|
||||
<< "The buffer is busy, so we cannot destroy it at the moment." << std::endl;
|
||||
<< "The buffer is busy, so we cannot destroy it at the moment."
|
||||
<< "\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
@@ -40,9 +40,6 @@ init();
|
||||
void
|
||||
fini();
|
||||
|
||||
std::atomic<uint64_t>&
|
||||
total_samples_num();
|
||||
|
||||
void
|
||||
configure_pc_sampling_on_all_agents(rocprofiler_context_id_t context);
|
||||
|
||||
|
||||
@@ -33,5 +33,19 @@ get_output_stream()
|
||||
static std::ostream* _v = nullptr;
|
||||
return _v;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Shows @p error_msg and aborts if @p condition is false.
|
||||
*
|
||||
*/
|
||||
void
|
||||
pcs_assert(bool condition, std::string_view error_msg)
|
||||
{
|
||||
if(!condition)
|
||||
{
|
||||
std::cerr << "PC Sampling Assertion Error: " << error_msg << "\n";
|
||||
abort();
|
||||
}
|
||||
}
|
||||
} // namespace utils
|
||||
} // namespace client
|
||||
|
||||
@@ -61,5 +61,8 @@ namespace utils
|
||||
{
|
||||
std::ostream*&
|
||||
get_output_stream();
|
||||
}
|
||||
|
||||
void
|
||||
pcs_assert(bool condition, std::string_view error_msg);
|
||||
} // namespace utils
|
||||
} // namespace client
|
||||
|
||||
@@ -48,3 +48,5 @@ from __future__ import absolute_import
|
||||
|
||||
from . import pytest_utils
|
||||
")
|
||||
|
||||
add_subdirectory(pc_sampling)
|
||||
|
||||
@@ -0,0 +1,18 @@
|
||||
#
|
||||
#
|
||||
#
|
||||
|
||||
set(PACKAGE_OUTPUT_DIR
|
||||
${ROCPROFILER_SDK_TESTS_BINARY_DIR}/pytest-packages/rocprofiler_sdk/pc_sampling)
|
||||
|
||||
file(
|
||||
WRITE "${PACKAGE_OUTPUT_DIR}/__init__.py"
|
||||
"#
|
||||
from __future__ import absolute_import
|
||||
|
||||
from . import exec_mask_manipulation
|
||||
")
|
||||
|
||||
add_subdirectory(exec_mask_manipulation)
|
||||
add_subdirectory(stochastic)
|
||||
add_subdirectory(transpose_multiple_agents)
|
||||
+14
@@ -0,0 +1,14 @@
|
||||
#
|
||||
#
|
||||
#
|
||||
|
||||
set(PACKAGE_OUTPUT_DIR
|
||||
${ROCPROFILER_SDK_TESTS_BINARY_DIR}/pytest-packages/rocprofiler_sdk/pc_sampling/exec_mask_manipulation
|
||||
)
|
||||
|
||||
set(PC_SAMPLING_PYTHON_SOURCES __init__.py csv.py json.py)
|
||||
|
||||
foreach(_FILE ${PC_SAMPLING_PYTHON_SOURCES})
|
||||
configure_file(${CMAKE_CURRENT_LIST_DIR}/${_FILE} ${PACKAGE_OUTPUT_DIR}/${_FILE}
|
||||
COPYONLY)
|
||||
endforeach()
|
||||
+23
@@ -0,0 +1,23 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
from __future__ import absolute_import
|
||||
+210
@@ -0,0 +1,210 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def stochastic_assert(df, df_condition_selection, max_failing_samples=10):
|
||||
# TODO: When asserting certain conditions related to exec_masks for all samples,
|
||||
# we observe some failures.
|
||||
# This usually happens because some small number of samples (e.g., 1-10 out of 100k)
|
||||
# do not satisfy the condition. This is either a regression in the ROCr 2nd level trap
|
||||
# handler (as sometimes execution mask or correlation ID mismatches), or
|
||||
# just stochastic nature of the sampling (meaning our checks are too strict).
|
||||
# To relax checks, we introduce an assertion that will allow some small number
|
||||
# of samples to disobey the condition.
|
||||
# This is a temporary solution until we find the root cause of the issue.
|
||||
|
||||
# extract the failing samples
|
||||
failing_samples = df[~df_condition_selection]
|
||||
assert len(failing_samples) <= max_failing_samples, "Too many failing samples"
|
||||
|
||||
|
||||
# Keep this in case we decide to revert workgroup_id information
|
||||
def validate_workgoup_id_x_y_z(df, max_x, max_y, max_z):
|
||||
assert (df["Workgroup_Size_X"].astype(int) >= 0).all()
|
||||
assert (df["Workgroup_Size_X"].astype(int) <= max_x).all()
|
||||
|
||||
assert (df["Workgroup_Size_Y"].astype(int) >= 0).all()
|
||||
assert (df["Workgroup_Size_Y"].astype(int) <= max_y).all()
|
||||
|
||||
assert (df["Workgroup_Size_Z"].astype(int) >= 0).all()
|
||||
assert (df["Workgroup_Size_Z"].astype(int) <= max_z).all()
|
||||
|
||||
|
||||
# Keep this in case we decide to revert wave_id information
|
||||
def validate_wave_id(df, max_wave_id):
|
||||
assert (df["Wave_Id"].astype(int) <= max_wave_id).all()
|
||||
|
||||
|
||||
# Keep this in case we decide to revert wave_id information
|
||||
def validate_chiplet(df, max_chiplet):
|
||||
assert (df["Chiplet"].astype(int) <= max_chiplet).all()
|
||||
|
||||
|
||||
def validate_instruction_decoding(
|
||||
df,
|
||||
inst_str,
|
||||
exec_mask_uint64: np.uint64 = None,
|
||||
source_code_lines_range: (int, int) = None,
|
||||
all_source_lines_samples=False,
|
||||
):
|
||||
# Make a copy, so that we don't work (modify) a view.
|
||||
df_inst = df[df["Instruction"].apply(lambda inst: inst.startswith(inst_str))].copy()
|
||||
|
||||
assert not df_inst.empty
|
||||
# assert the exec mask if requested
|
||||
if exec_mask_uint64 is not None:
|
||||
stochastic_assert(
|
||||
df_inst, df_inst["Exec_Mask"].astype(np.uint64) == exec_mask_uint64
|
||||
)
|
||||
|
||||
# assert whether the samples source code lines belongs to the provided range
|
||||
if source_code_lines_range is not None:
|
||||
start_range, end_range = source_code_lines_range
|
||||
# The instruction comment is isually in the following format: /path/to/source/file.cpp:line_num
|
||||
df_inst["source_line_num"] = df_inst["Instruction_Comment"].apply(
|
||||
lambda source_line: int(source_line.split(":")[-1])
|
||||
)
|
||||
assert (df_inst["source_line_num"] >= start_range).all()
|
||||
assert (df_inst["source_line_num"] <= end_range).all()
|
||||
# if requested, check if all lines from the range are sampled
|
||||
if all_source_lines_samples:
|
||||
assert len(df_inst["source_line_num"].unique()) == (
|
||||
end_range - start_range + 1
|
||||
)
|
||||
|
||||
|
||||
def validate_instruction_comment(df):
|
||||
# Instruction comment must always be present, since the testing application
|
||||
# is built with debug symbols.
|
||||
assert (
|
||||
(df["Instruction_Comment"] != "") & (df["Instruction_Comment"] != "nullptr")
|
||||
).all()
|
||||
|
||||
|
||||
def validate_instruction_correlation_id_relation(df):
|
||||
# Samples with no decoded instructions originates from either
|
||||
# blit kernels or self modifying code. The correlation id for this
|
||||
# type of samples should alway be zero.
|
||||
# Thus, Correlation_Id is 0 `iff`` instruction is not decoded.
|
||||
|
||||
# The previous statement has two implications.
|
||||
# Implication 1: If the instruction is not decoded, then correlation id is 0.
|
||||
samples_no_instruction_df = df[
|
||||
(df["Instruction"] == "") | (df["Instruction"] == "nullptr")
|
||||
]
|
||||
assert (samples_no_instruction_df["Correlation_Id"] == 0).all()
|
||||
|
||||
# Implication 2: If the correlation id is 0, then the instruction is not decoded.
|
||||
samples_cid_zero_df = df[df["Correlation_Id"] == 0]
|
||||
assert (
|
||||
(samples_cid_zero_df["Instruction"] == "")
|
||||
| (samples_cid_zero_df["Instruction"] == "nullptr")
|
||||
).all()
|
||||
|
||||
assert len(samples_no_instruction_df) == len(samples_cid_zero_df)
|
||||
|
||||
# Since we're not enabling any kind of API tracing,
|
||||
# internal correlation id should match the dispatch id
|
||||
assert all(df["Correlation_Id"] == df["Dispatch_Id"])
|
||||
|
||||
|
||||
def validate_exec_mask_based_on_correlation_id(df):
|
||||
# The function assumes that each kernel launches 1024 blocks.
|
||||
# Each block contains number of threads that matches correlation ID of the kernel.
|
||||
# The exec mask of a sample should contain number of ones equal to
|
||||
# the correlation ID of the kernel during which execution the sample was generated.
|
||||
df["active_SIMD_threads"] = df["Exec_Mask"].apply(
|
||||
lambda exec_mask: bin(exec_mask).count("1")
|
||||
)
|
||||
stochastic_assert(df, df["active_SIMD_threads"] == df["Correlation_Id"])
|
||||
|
||||
# TODO: Comment out the following code if it causes spurious fails.
|
||||
# The more conservative constraint based on the experience follows.
|
||||
# The exec mask of sampled instructions of the kernels respect the following pattern:
|
||||
# cid -> exec
|
||||
# 1 -> 0b1
|
||||
# 2 -> 0b11
|
||||
# 3 -> 0b111
|
||||
# ...
|
||||
# 64 -> 0xffffffffffffffff
|
||||
|
||||
df["Exec_Mask2"] = (
|
||||
df["Correlation_Id"].astype(int).apply(lambda x: int("0b" + (x * "1"), 2))
|
||||
)
|
||||
|
||||
# TODO: exec should be in hex and that will ease the comparison
|
||||
stochastic_assert(
|
||||
df, df["Exec_Mask"].astype(np.uint64) == df["Exec_Mask2"].astype(np.uint64)
|
||||
)
|
||||
|
||||
|
||||
def exec_mask_manipulation_validate_csv(df, all_sampled=False):
|
||||
assert not df.empty
|
||||
|
||||
validate_instruction_comment(df)
|
||||
validate_instruction_correlation_id_relation(df)
|
||||
|
||||
# Validate samples with non-zero correlation IDs (and with decoded instructions)
|
||||
samples_cid_non_zero_df = df[df["Correlation_Id"] != 0]
|
||||
|
||||
# exactly 65 kernels and 65 correlation id
|
||||
assert (samples_cid_non_zero_df["Correlation_Id"].astype(int) >= 1).all()
|
||||
assert (samples_cid_non_zero_df["Correlation_Id"].astype(int) <= 65).all()
|
||||
if all_sampled:
|
||||
# all correlation IDs must be sampled
|
||||
assert len(samples_cid_non_zero_df["Correlation_Id"].astype(int).unique()) == 65
|
||||
|
||||
first_64_kernels_df = samples_cid_non_zero_df[
|
||||
samples_cid_non_zero_df["Correlation_Id"] <= 64
|
||||
]
|
||||
|
||||
# Make a copy, so that we don't work (modify) a view.
|
||||
validate_exec_mask_based_on_correlation_id(first_64_kernels_df.copy())
|
||||
|
||||
# validate the last kernel
|
||||
kernel_65_df = df[df["Correlation_Id"] == 65]
|
||||
|
||||
# assert that v_rcp instructions are properly decoded
|
||||
# the v_rcp is executed by even SIMD threads
|
||||
validate_instruction_decoding(
|
||||
kernel_65_df,
|
||||
"v_rcp_f64",
|
||||
exec_mask_uint64=np.uint64(int("5555555555555555", 16)),
|
||||
source_code_lines_range=(288, 387),
|
||||
all_source_lines_samples=all_sampled,
|
||||
)
|
||||
|
||||
# assert that v_rcp_f32 instructions are properly decoded
|
||||
# the v_rcp_f32 is executed by odd SIMD threads
|
||||
validate_instruction_decoding(
|
||||
kernel_65_df,
|
||||
"v_rcp_f32",
|
||||
exec_mask_uint64=np.uint64(int("AAAAAAAAAAAAAAAA", 16)),
|
||||
source_code_lines_range=(391, 490),
|
||||
all_source_lines_samples=all_sampled,
|
||||
)
|
||||
+244
@@ -0,0 +1,244 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def validate_json_exec_mask_manipulation(
|
||||
data_json, pc_sampling_method="host_trap", all_sampled=False
|
||||
):
|
||||
# Although functional programming might look more elegant,
|
||||
# I was trying to avoid multiple iteration over the list of samples.
|
||||
# Thus, I decided to use procedural programming instead.
|
||||
# Although, it would be more elegant to wrap some of the checks in dedicated functions,
|
||||
# I noticed that it can introduce significant overhead, so I decided to inline those checks.
|
||||
|
||||
# the function assume homogenous system
|
||||
agents = data_json["agents"]
|
||||
gpu_agents = list(filter(lambda agent: agent["type"] == 2, agents))
|
||||
# There should be at least one GPU agent
|
||||
assert len(gpu_agents) > 0
|
||||
first_gpu_agent = gpu_agents[0]
|
||||
num_xcc = first_gpu_agent["num_xcc"]
|
||||
max_waves_per_simd = first_gpu_agent["max_waves_per_simd"]
|
||||
simd_per_cu = first_gpu_agent["simd_per_cu"]
|
||||
|
||||
instructions = data_json["strings"]["pc_sample_instructions"]
|
||||
comments = data_json["strings"]["pc_sample_comments"]
|
||||
|
||||
# execution mask where even SIMD lanes are active
|
||||
# correspond to the v_rcp_f64 instructions of the last kernel
|
||||
even_simds_active_exec_mask = np.uint64(int("5555555555555555", 16))
|
||||
# start and end source code lines of the v_rcp_f64 instructions of the last kernel
|
||||
v_rcp_f64_start_line_num, v_rcp_f64_end_line_num = 288, 387
|
||||
# execution mask where even SIMD lanes are active
|
||||
# correspond to the v_rcp_f64 instructions of the last kernel
|
||||
odd_simds_active_exec_mask = np.uint64(int("AAAAAAAAAAAAAAAA", 16))
|
||||
# start and end source code lines of the v_rcp_f32 0 instructions of the last kernel
|
||||
v_rcp_f32_start_line_num, v_rcp_f32_end_line_num = 391, 490
|
||||
|
||||
# sampled wave_ids of the last kernel
|
||||
kernel65_sampled_wave_in_grp = set()
|
||||
# sampled source lines of the last kernel matching v_rcp_f64 instructions
|
||||
kernel65_v_rcp_64_sampled_source_line_set = set()
|
||||
# sampled source lines of the last kernel matching v_rcp_f64 instructions
|
||||
kernel65_v_rcp_f32_sampled_source_line_set = set()
|
||||
# sampled correlation IDs
|
||||
sampled_cids_set = set()
|
||||
# pairs of sampled SIMD ids and waveslot IDs
|
||||
sampled_simd_waveslots_pairs = set()
|
||||
# sampled chiplets
|
||||
sampled_chiplets = set()
|
||||
# sample VMIDs
|
||||
sampled_vmids = set()
|
||||
# TODO: Similar reason for introducing stochastic_assert inside the csv.py.
|
||||
# When asserting certain conditions related to exec_masks for all samples,
|
||||
# we observe some failures.
|
||||
# This usually happens because some small number of samples (e.g., 1-10 out of 100k)
|
||||
# do not satisfy the condition. This is either a regression in the ROCr 2nd level trap
|
||||
# handler (as sometimes execution mask or correlation ID mismatches), or
|
||||
# just stochastic nature of the sampling (meaning our checks are too strict).
|
||||
# To relax checks, we introduce an assertion that will allow some small number
|
||||
# of samples to disobey the condition.
|
||||
# This is a temporary solution until we find the root cause of the issue.
|
||||
|
||||
failing_exec_mask_checks_samples_num = 0
|
||||
# We noticed failing samples in:
|
||||
# 1. kernels 1-64
|
||||
# 2. kernel 65 even SIMD lanes
|
||||
# 3. kernel 64 odd SIMD lanes
|
||||
# The number of failing samples is less than 10 per category.
|
||||
max_number_of_failing_records = 30
|
||||
|
||||
for sample in data_json["buffer_records"][f"pc_sample_{pc_sampling_method}"]:
|
||||
record = sample["record"]
|
||||
cid = record["corr_id"]["internal"]
|
||||
|
||||
# pull information from hw_id
|
||||
hw_id = record["hw_id"]
|
||||
sampled_chiplets.add(hw_id["chiplet"])
|
||||
sampled_simd_waveslots_pairs.add((hw_id["simd_id"], hw_id["wave_id"]))
|
||||
sampled_vmids.add(hw_id["vm_id"])
|
||||
|
||||
# Checks specific for all samples
|
||||
|
||||
# cids must be non-negative numbers
|
||||
assert cid >= 0
|
||||
|
||||
inst_index = sample["inst_index"]
|
||||
|
||||
# Since we're not enabling any kind of API tracing, the internal correlation id should
|
||||
# be equal to the dispatch_id
|
||||
assert cid == record["dispatch_id"]
|
||||
|
||||
if cid == 0:
|
||||
# Samples originates either from a blit kernel or self-modifying code.
|
||||
# Thus, code object is uknown, as well as the instruction.
|
||||
assert record["pc"]["code_object_id"] == 0
|
||||
assert inst_index == -1
|
||||
else:
|
||||
# Update set of sampled cids
|
||||
sampled_cids_set.add(cid)
|
||||
|
||||
# All samples with non-zero correlation ID should pass the following checks
|
||||
# code object is know, so as the instruction
|
||||
assert record["pc"]["code_object_id"] != 0
|
||||
assert inst_index != -1
|
||||
|
||||
wgid = record["wrkgrp_id"]
|
||||
# check corrdinates of the workgroup
|
||||
assert wgid["x"] >= 0 and wgid["x"] <= 1023
|
||||
assert wgid["y"] == 0
|
||||
assert wgid["z"] == 0
|
||||
|
||||
wave_in_grp = record["wave_in_grp"]
|
||||
exec_mask = record["exec_mask"]
|
||||
|
||||
if cid < 65:
|
||||
# checks specific for samples from first 64 kernels
|
||||
assert wave_in_grp == 0
|
||||
# inline if possible
|
||||
# validate_json_exec_mask_based_on_cid(sample.record)
|
||||
|
||||
# The function assumes that each kernel launches 1024 blocks.
|
||||
# Each block contains number of threads that matches correlation ID of the kernel.
|
||||
# The exec mask of a sample should contain number of ones equal to
|
||||
# the correlation ID of the kernel during which execution the sample was generated.
|
||||
# assert bin(exec_mask).count("1") == cid
|
||||
if bin(exec_mask).count("1") != cid:
|
||||
failing_exec_mask_checks_samples_num += 1
|
||||
|
||||
# TODO: Comment out the following code if it causes spurious fails.
|
||||
# The more conservative constraint based on the experience follows.
|
||||
# The exec mask of sampled instructions of the kernels respect the following pattern:
|
||||
# cid -> exec
|
||||
# 1 -> 0b1
|
||||
# 2 -> 0b11
|
||||
# 3 -> 0b111
|
||||
# ...
|
||||
# 64 -> 0xffffffffffffffff
|
||||
exec_mask_str = "0b" + "1" * cid
|
||||
# assert np.uint64(exec_mask) == np.uint64(int(exec_mask_str, 2))
|
||||
if np.uint64(exec_mask) != np.uint64(int(exec_mask_str, 2)):
|
||||
failing_exec_mask_checks_samples_num += 1
|
||||
else:
|
||||
# No more that 65 cids
|
||||
assert cid == 65
|
||||
# Monitor wave_in_group being sampled
|
||||
kernel65_sampled_wave_in_grp.add(wave_in_grp)
|
||||
# chekcs specific for samples from the last kernel
|
||||
assert wave_in_grp >= 0 and wave_in_grp <= 3
|
||||
|
||||
# validate instruction decoding
|
||||
inst = instructions[inst_index]
|
||||
comm = comments[inst_index]
|
||||
# The instruction comment is isually in the following format:
|
||||
# /path/to/source/file.cpp:line_num
|
||||
line_num = int(comm.split(":")[-1])
|
||||
if inst.startswith("v_rcp_f64"):
|
||||
# even SIMD lanes active
|
||||
# assert np.uint64(exec_mask) == even_simds_active_exec_mask
|
||||
if np.uint64(exec_mask) != even_simds_active_exec_mask:
|
||||
failing_exec_mask_checks_samples_num += 1
|
||||
|
||||
assert (
|
||||
line_num >= v_rcp_f64_start_line_num
|
||||
and line_num <= v_rcp_f64_end_line_num
|
||||
)
|
||||
kernel65_v_rcp_64_sampled_source_line_set.add(line_num)
|
||||
elif inst.startswith("v_rcp_f32"):
|
||||
# odd SIMD lanes active
|
||||
# assert np.uint64(exec_mask) == odd_simds_active_exec_mask
|
||||
if np.uint64(exec_mask) != odd_simds_active_exec_mask:
|
||||
failing_exec_mask_checks_samples_num += 1
|
||||
|
||||
assert (
|
||||
line_num >= v_rcp_f32_start_line_num
|
||||
and line_num <= v_rcp_f32_end_line_num
|
||||
)
|
||||
kernel65_v_rcp_f32_sampled_source_line_set.add(line_num)
|
||||
|
||||
if all_sampled:
|
||||
# All cids that belongs to the range [1, 65] should be samples
|
||||
assert len(sampled_cids_set) == 65
|
||||
|
||||
# all wave_ids that belongs to the range [0, 3] should be sampled for the last kernel
|
||||
assert len(kernel65_sampled_wave_in_grp) == 4
|
||||
|
||||
# all source lines matches v_rcp_f64 instructions of the last kernel should be sampled
|
||||
assert len(kernel65_v_rcp_64_sampled_source_line_set) == (
|
||||
v_rcp_f64_end_line_num - v_rcp_f64_start_line_num + 1
|
||||
)
|
||||
# all source lines matches v_rcp_f32 instructions of the last kernel should be sampled
|
||||
assert len(kernel65_v_rcp_f32_sampled_source_line_set) == (
|
||||
v_rcp_f32_end_line_num - v_rcp_f32_start_line_num + 1
|
||||
)
|
||||
|
||||
# all chiplets must be sampled
|
||||
assert len(sampled_chiplets) == num_xcc
|
||||
# all (simd ID, waveslot ID) pairs must be samples
|
||||
assert len(sampled_simd_waveslots_pairs) == simd_per_cu * max_waves_per_simd
|
||||
|
||||
# assert chiplet index
|
||||
assert all(map(lambda chiplet: 0 <= chiplet < num_xcc, sampled_chiplets))
|
||||
# assert (SIMD ID, waveslot ID) combinations
|
||||
assert all(
|
||||
map(
|
||||
lambda simd_waveslot: (0 <= simd_waveslot[0] < simd_per_cu)
|
||||
and (0 <= simd_waveslot[1] < max_waves_per_simd),
|
||||
sampled_simd_waveslots_pairs,
|
||||
)
|
||||
)
|
||||
|
||||
# Apparently, not all dispatches must belong to the same VMID,
|
||||
# so I'm temporarily disabling the following check.
|
||||
# # all samples should belong to the same VMID
|
||||
# assert len(sampled_vmids) == 1
|
||||
|
||||
# assert that the number of failing samples is acceptable
|
||||
assert (
|
||||
failing_exec_mask_checks_samples_num <= max_number_of_failing_records
|
||||
), "Number of failing samples failing exec_mask check is too high"
|
||||
+17
@@ -0,0 +1,17 @@
|
||||
#
|
||||
#
|
||||
#
|
||||
|
||||
set(PACKAGE_OUTPUT_DIR
|
||||
${ROCPROFILER_SDK_TESTS_BINARY_DIR}/pytest-packages/rocprofiler_sdk/pc_sampling/stochastic
|
||||
)
|
||||
|
||||
set(PC_SAMPLING_PYTHON_SOURCES __init__.py)
|
||||
|
||||
foreach(_FILE ${PC_SAMPLING_PYTHON_SOURCES})
|
||||
configure_file(${CMAKE_CURRENT_LIST_DIR}/${_FILE} ${PACKAGE_OUTPUT_DIR}/${_FILE}
|
||||
COPYONLY)
|
||||
endforeach()
|
||||
|
||||
add_subdirectory(csv)
|
||||
add_subdirectory(json)
|
||||
@@ -0,0 +1,24 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
from __future__ import absolute_import
|
||||
+16
@@ -0,0 +1,16 @@
|
||||
#
|
||||
#
|
||||
#
|
||||
|
||||
set(PACKAGE_OUTPUT_DIR
|
||||
${ROCPROFILER_SDK_TESTS_BINARY_DIR}/pytest-packages/rocprofiler_sdk/pc_sampling/stochastic/csv
|
||||
)
|
||||
|
||||
set(PC_SAMPLING_PYTHON_SOURCES __init__.py)
|
||||
|
||||
foreach(_FILE ${PC_SAMPLING_PYTHON_SOURCES})
|
||||
configure_file(${CMAKE_CURRENT_LIST_DIR}/${_FILE} ${PACKAGE_OUTPUT_DIR}/${_FILE}
|
||||
COPYONLY)
|
||||
endforeach()
|
||||
|
||||
add_subdirectory(gfx9)
|
||||
+24
@@ -0,0 +1,24 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
from __future__ import absolute_import
|
||||
+18
@@ -0,0 +1,18 @@
|
||||
#
|
||||
#
|
||||
#
|
||||
|
||||
set(PACKAGE_OUTPUT_DIR
|
||||
${ROCPROFILER_SDK_TESTS_BINARY_DIR}/pytest-packages/rocprofiler_sdk/pc_sampling/stochastic/csv/gfx9
|
||||
)
|
||||
|
||||
set(PC_SAMPLING_PYTHON_SOURCES
|
||||
__init__.py valu_instructions.py matrix_instructions.py texture_instructions.py
|
||||
flat_instructions.py lds_instructions.py)
|
||||
|
||||
foreach(_FILE ${PC_SAMPLING_PYTHON_SOURCES})
|
||||
configure_file(${CMAKE_CURRENT_LIST_DIR}/${_FILE} ${PACKAGE_OUTPUT_DIR}/${_FILE}
|
||||
COPYONLY)
|
||||
endforeach()
|
||||
|
||||
add_subdirectory(s_instructions)
|
||||
+110
@@ -0,0 +1,110 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from .s_instructions import validate_s_instructions
|
||||
from .valu_instructions import validate_valu_instructions
|
||||
from .texture_instructions import validate_texture_instructions
|
||||
from .matrix_instructions import validate_matrix_instructions
|
||||
from .lds_instructions import validate_lds_instructions
|
||||
from .flat_instructions import validate_flat_instructions
|
||||
|
||||
|
||||
def validate_wave_count(df):
|
||||
# Validating number of actives waves on a cu
|
||||
assert (
|
||||
(df["Wave_Count"] >= 1) & (df["Wave_Count"] <= 32)
|
||||
).all(), "Invalid Wave_Count"
|
||||
|
||||
|
||||
def validate_issued_instruction_type_no_inst(samples):
|
||||
# NO_INST type of instructions means instruction is not issued
|
||||
issued_type_no_inst = samples[samples["Instruction_Type"] == "NO_INST"]
|
||||
assert len(issued_type_no_inst) == 0, "NO_INST implies no instruction is issued"
|
||||
|
||||
|
||||
def validate_issued_instruction_type_other(samples):
|
||||
# OTHER type of instructions still to be determined
|
||||
issued_type_other = samples[samples["Instruction_Type"] == "OTHER"]
|
||||
assert len(issued_type_other) == 0, "OTHER type of instruction observed first time"
|
||||
|
||||
|
||||
def validate_issued_instruction_type_lds_direct(samples):
|
||||
# LDS_DIRECT type of instructions do not exist on gfx9
|
||||
issued_type_lds_direct = samples[samples["Instruction_Type"] == "LDS_DIRECT"]
|
||||
assert (
|
||||
len(issued_type_lds_direct) == 0
|
||||
), "LDS direct type of instruction observed on GFX9"
|
||||
|
||||
|
||||
def validate_issued_instruction_type_dual_valu(samples):
|
||||
# LDS_DIRECT type of instructions do not exist on gfx9
|
||||
issued_type_dual_valu = samples[samples["Instruction_Type"] == "DUAL_VALU"]
|
||||
assert (
|
||||
len(issued_type_dual_valu) == 0
|
||||
), "DUAL_VALU type of instruction observed on GFX9"
|
||||
|
||||
|
||||
# TODO: add checks for missing instruction types
|
||||
# - export
|
||||
|
||||
|
||||
def validate_stochastic_samples_csv(df: pd.DataFrame):
|
||||
# We expect mode valid than invalid samples
|
||||
# TODO: use stats for comparing valid vs invalid samples
|
||||
# invalid_samples = df[df["Valid"] == False]
|
||||
# valid_samples = df[df["Valid"]].copy()
|
||||
# assert len(valid_samples) > len(invalid_samples)
|
||||
|
||||
# only valid samples reside in df
|
||||
valid_samples = df.copy()
|
||||
|
||||
validate_wave_count(valid_samples)
|
||||
|
||||
# The following checks assumes that we were able to decode
|
||||
# the instruction, meaning a code object and dispatch must be known.
|
||||
valid_samples = valid_samples[valid_samples["Dispatch_Id"] > 0]
|
||||
|
||||
# scalar, barrier, waitcnt, jump, message, branches (taken and not taken)
|
||||
# are handled inside `validate_s_instructions` function
|
||||
validate_s_instructions(valid_samples)
|
||||
validate_valu_instructions(valid_samples)
|
||||
validate_texture_instructions(valid_samples)
|
||||
validate_matrix_instructions(valid_samples)
|
||||
validate_lds_instructions(valid_samples)
|
||||
validate_flat_instructions(valid_samples)
|
||||
|
||||
# validating issued instructions for uncovered types
|
||||
valid_samples_issued = valid_samples[
|
||||
valid_samples["Wave_Issued_Instruction"] == True
|
||||
].copy()
|
||||
validate_issued_instruction_type_no_inst(valid_samples_issued)
|
||||
validate_issued_instruction_type_other(valid_samples_issued)
|
||||
|
||||
# The following two types of instructions should not be observed on gfx9
|
||||
validate_issued_instruction_type_lds_direct(valid_samples_issued)
|
||||
validate_issued_instruction_type_dual_valu(valid_samples_issued)
|
||||
+74
@@ -0,0 +1,74 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
|
||||
def validate_flat_instructions_issued(samples_issued):
|
||||
# issued instruction with type == FLAT -> instruction starts with either flat_ or global_
|
||||
issued_type_flat = samples_issued[
|
||||
samples_issued["Instruction_Type"]
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_FLAT"
|
||||
]
|
||||
assert (
|
||||
issued_type_flat["Instruction"]
|
||||
.apply(lambda x: x.startswith("flat_") or x.startswith("global_"))
|
||||
.all()
|
||||
)
|
||||
|
||||
# if issued instruction starts with global_ or flat_ -> its type must be FLAT
|
||||
issued_flat_or_global = samples_issued[
|
||||
samples_issued["Instruction"].apply(
|
||||
lambda x: x.startswith("flat_") or x.startswith("global_")
|
||||
)
|
||||
]
|
||||
assert (
|
||||
issued_flat_or_global["Instruction_Type"]
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_FLAT"
|
||||
).all()
|
||||
|
||||
|
||||
def validate_flat_instructions_stalled(samples):
|
||||
global_flat_regex = r"^(global|flat)_"
|
||||
flat_samples = samples[samples["Instruction"].str.match(global_flat_regex)]
|
||||
flat_stalled = flat_samples[flat_samples["Wave_Issued_Instruction"] == False]
|
||||
|
||||
assert (
|
||||
flat_stalled["Stall_Reason"]
|
||||
.apply(
|
||||
lambda x: x
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ARBITER_WIN_EX_STALL"
|
||||
or x
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_NO_INSTRUCTION_AVAILABLE"
|
||||
or x
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ARBITER_NOT_WIN"
|
||||
or x == "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ALU_DEPENDENCY"
|
||||
)
|
||||
.all()
|
||||
)
|
||||
|
||||
|
||||
def validate_flat_instructions(samples):
|
||||
samples_issued = samples[samples["Wave_Issued_Instruction"]]
|
||||
validate_flat_instructions_issued(samples_issued)
|
||||
validate_flat_instructions_stalled(samples)
|
||||
+67
@@ -0,0 +1,67 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
|
||||
def validate_lds_instructions_issued(samples_issued):
|
||||
# issued instruction with type == LDS -> instruction starts with ds_
|
||||
issued_type_lds = samples_issued[
|
||||
samples_issued["Instruction_Type"]
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_LDS"
|
||||
]
|
||||
assert issued_type_lds["Instruction"].apply(lambda x: x.startswith("ds_")).all()
|
||||
|
||||
# issued instruction starts with ds_ -> it must be LDS
|
||||
issued_ds = samples_issued[
|
||||
samples_issued["Instruction"].apply(lambda x: x.startswith("ds_"))
|
||||
]
|
||||
assert (
|
||||
issued_ds["Instruction_Type"] == "ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_LDS"
|
||||
).all()
|
||||
|
||||
|
||||
def validate_lds_instructions_stalled(samples):
|
||||
lds_samples = samples[samples["Instruction"].apply(lambda x: x.startswith("ds_"))]
|
||||
lds_stalled = lds_samples[lds_samples["Wave_Issued_Instruction"] == False]
|
||||
|
||||
# TODO: question - why we observed alu_dependency on matrix_multiply_tile kernel
|
||||
assert (
|
||||
lds_stalled["Stall_Reason"]
|
||||
.apply(
|
||||
lambda x: x
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ARBITER_WIN_EX_STALL"
|
||||
or x
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_NO_INSTRUCTION_AVAILABLE"
|
||||
or x
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ARBITER_NOT_WIN"
|
||||
or x == "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ALU_DEPENDENCY"
|
||||
)
|
||||
.all()
|
||||
)
|
||||
|
||||
|
||||
def validate_lds_instructions(samples):
|
||||
samples_issued = samples[samples["Wave_Issued_Instruction"]]
|
||||
validate_lds_instructions_issued(samples_issued)
|
||||
validate_lds_instructions_stalled(samples)
|
||||
+107
@@ -0,0 +1,107 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
|
||||
def validate_matrix_instructions_issued(samples_issued):
|
||||
# issued instruction with type == MATRIX -> instruction starts with v_mfma
|
||||
issued_type_matrix = samples_issued[
|
||||
samples_issued["Instruction_Type"]
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_MATRIX"
|
||||
]
|
||||
assert issued_type_matrix["Instruction"].apply(lambda x: x.startswith("v_mfma")).all()
|
||||
# v_mfma_f32 goes through Matrix (MAI) arbiter, while v_mfma_f64 goes through the VALU arbiter
|
||||
|
||||
# SGEMM goes through Matrix (MAI arbiter)
|
||||
v_mfma_f32_issued = samples_issued[
|
||||
samples_issued["Instruction"].apply(lambda x: x.startswith("v_mfma_f32"))
|
||||
]
|
||||
assert (
|
||||
v_mfma_f32_issued["Instruction_Type"]
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_MATRIX"
|
||||
).all()
|
||||
|
||||
# DGEMM goes through VALU arbiter
|
||||
v_mfma_f64_issued = samples_issued[
|
||||
samples_issued["Instruction"].apply(lambda x: x.startswith("v_mfma_f64"))
|
||||
]
|
||||
assert (v_mfma_f64_issued["Instruction_Type"] == "MATRIX").all()
|
||||
assert len(issued_type_matrix) == len(v_mfma_f32_issued) + len(v_mfma_f64_issued)
|
||||
|
||||
# TODO: find an example with MAI instructions
|
||||
|
||||
|
||||
def validate_dgemm_matrix_instructions_stalled(samples):
|
||||
v_mfma_f64_samples = samples[
|
||||
samples["Instruction"].apply(lambda x: x.startswith("v_mfma_f64"))
|
||||
]
|
||||
v_mfma_f64_stalled = v_mfma_f64_samples[
|
||||
v_mfma_f64_samples["Wave_Issued_Instruction"] == False
|
||||
]
|
||||
|
||||
assert (
|
||||
v_mfma_f64_stalled["Stall_Reason"]
|
||||
.apply(
|
||||
lambda x: x
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ARBITER_WIN_EX_STALL"
|
||||
or x
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_NO_INSTRUCTION_AVAILABLE"
|
||||
or x
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ARBITER_NOT_WIN"
|
||||
)
|
||||
.all()
|
||||
)
|
||||
|
||||
|
||||
def validate_sgemm_matrix_instructions_stalled(samples):
|
||||
v_mfma_f32_samples = samples[
|
||||
samples["Instruction"].apply(lambda x: x.startswith("v_mfma_f32"))
|
||||
]
|
||||
v_mfma_f32_stalled = v_mfma_f32_samples[
|
||||
v_mfma_f32_samples["Wave_Issued_Instruction"] == False
|
||||
]
|
||||
assert (
|
||||
v_mfma_f32_stalled["Stall_Reason"]
|
||||
.apply(
|
||||
lambda x: x
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ARBITER_WIN_EX_STALL"
|
||||
or x
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_NO_INSTRUCTION_AVAILABLE"
|
||||
or x
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ARBITER_NOT_WIN"
|
||||
)
|
||||
.all()
|
||||
)
|
||||
|
||||
|
||||
def validate_matrix_instructions_stalled(samples):
|
||||
validate_dgemm_matrix_instructions_stalled(samples)
|
||||
validate_sgemm_matrix_instructions_stalled(samples)
|
||||
# TODO" find an example to test this
|
||||
|
||||
|
||||
def validate_matrix_instructions(samples):
|
||||
samples_issued = samples[samples["Wave_Issued_Instruction"]]
|
||||
validate_matrix_instructions_issued(samples_issued)
|
||||
validate_matrix_instructions_stalled(samples)
|
||||
+23
@@ -0,0 +1,23 @@
|
||||
#
|
||||
#
|
||||
#
|
||||
|
||||
set(PACKAGE_OUTPUT_DIR
|
||||
${ROCPROFILER_SDK_TESTS_BINARY_DIR}/pytest-packages/rocprofiler_sdk/pc_sampling/stochastic/csv/gfx9/s_instructions
|
||||
)
|
||||
|
||||
set(PC_SAMPLING_PYTHON_SOURCES
|
||||
__init__.py
|
||||
branch_instructions.py
|
||||
waitcnt.py
|
||||
other_instructions.py
|
||||
scalar_instructions.py
|
||||
internal_instructions.py
|
||||
jump_instructions.py
|
||||
message_instructions.py
|
||||
barrier_instructions.py)
|
||||
|
||||
foreach(_FILE ${PC_SAMPLING_PYTHON_SOURCES})
|
||||
configure_file(${CMAKE_CURRENT_LIST_DIR}/${_FILE} ${PACKAGE_OUTPUT_DIR}/${_FILE}
|
||||
COPYONLY)
|
||||
endforeach()
|
||||
+151
@@ -0,0 +1,151 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
from functools import partial
|
||||
|
||||
from .branch_instructions import validate_branch_instructions
|
||||
from .waitcnt import validate_waitcnt
|
||||
from .other_instructions import validate_other_instructions
|
||||
from .scalar_instructions import validate_scalar_instructions
|
||||
from .internal_instructions import validate_internal_instructions
|
||||
from .jump_instructions import validate_jump_instructions
|
||||
from .message_instructions import validate_message_instructions
|
||||
from .barrier_instructions import validate_barrier_instructions
|
||||
|
||||
|
||||
# Using Prefix Tree to classify the instruction type
|
||||
# I did this instead of the regex becuase I wanted to try if we could
|
||||
# generalize this approach for other types of instructions.
|
||||
# The dream scenario: We have a giant list of all instructions and their
|
||||
# types. Then we parse the list and dynamically determine the checks
|
||||
# based on the instruction types.
|
||||
|
||||
|
||||
# TODO: extract this outside of the file
|
||||
class TrieNode:
|
||||
def __init__(self):
|
||||
self.children = {}
|
||||
self.instruction_type = None # Store the instruction type at the leaf node
|
||||
|
||||
|
||||
class PrefixTree:
|
||||
def __init__(self):
|
||||
self.root = TrieNode()
|
||||
|
||||
def insert(self, full_prefix, instruction_type):
|
||||
"""Insert a prefix and its associated instruction type into the Trie."""
|
||||
node = self.root
|
||||
for char in full_prefix:
|
||||
if char not in node.children:
|
||||
node.children[char] = TrieNode()
|
||||
node = node.children[char]
|
||||
node.instruction_type = (
|
||||
instruction_type # Assign the instruction type at the leaf
|
||||
)
|
||||
|
||||
def get_instruction_type(self, instruction):
|
||||
"""Get the list of instruction types based on the longest matching prefix."""
|
||||
node = self.root
|
||||
matched_types = [] # List to store matched types
|
||||
|
||||
# Traverse the instruction one character at a time
|
||||
for char in instruction:
|
||||
if char not in node.children:
|
||||
break # Stop if no match is found
|
||||
|
||||
node = node.children[char]
|
||||
|
||||
# If we reach a node that has an instruction type, store it
|
||||
if node.instruction_type:
|
||||
matched_types.append(node.instruction_type)
|
||||
|
||||
return matched_types
|
||||
|
||||
|
||||
instructions_with_types = [
|
||||
("s_", "SCALAR"), # Scalar instructions (general category)
|
||||
("s_waitcnt", "WAITCNT"), # WAITCNT (specific)
|
||||
("s_sendmsg", "MESSAGE"), # MESSAGE (specific)
|
||||
("s_barrier", "BARRIER"), # BARRIER (specifix)
|
||||
("s_swappc", "JUMP"), # JUMP (specific)
|
||||
("s_setpc", "JUMP"), # JUMP
|
||||
("s_setpc", "JUMP"), # JUMP
|
||||
("s_sleep", "JUMP"), # JUMP
|
||||
("s_branch", "BRANCH"), # BRANCH
|
||||
("s_cbranch", "BRANCH"), # BRANCH (conditional)
|
||||
("s_wakeup", "OTHER"), # OHTER
|
||||
("s_nop", "INTERNAL"), # INTERNAL
|
||||
("s_sleep", "INTERNAL"), # INTERNAL
|
||||
]
|
||||
|
||||
|
||||
inst_type_verify_functions = {
|
||||
"BRANCH": validate_branch_instructions,
|
||||
"WAITCNT": validate_waitcnt,
|
||||
"OTHER": validate_other_instructions,
|
||||
"SCALAR": validate_scalar_instructions,
|
||||
"INTERNAL": validate_internal_instructions,
|
||||
"JUMP": validate_jump_instructions,
|
||||
"MESSAGE": validate_message_instructions,
|
||||
"BARRIER": validate_barrier_instructions,
|
||||
}
|
||||
|
||||
|
||||
# Function to classify instructions based on the Trie
|
||||
def classify_instruction_by_prefix(prefix_tree, instruction):
|
||||
# extracting the base of the instruction (e.g., s_mov_*, v_mov_*, s_setpc_*, ...)
|
||||
base_instruction = instruction.split()[0]
|
||||
|
||||
# Classify based on the Trie (general classification)
|
||||
instruction_types = prefix_tree.get_instruction_type(base_instruction)
|
||||
|
||||
# aways use the specific type
|
||||
return instruction_types[-1]
|
||||
|
||||
|
||||
def enforce_type_inheritance(sub_df, parent_df):
|
||||
for col in parent_df.columns:
|
||||
sub_df[col] = sub_df[col].astype(parent_df[col].dtype)
|
||||
return sub_df
|
||||
|
||||
|
||||
def validate_s_instructions(df):
|
||||
s_instructions = df[df["Instruction"].apply(lambda x: x.startswith("s_"))].copy()
|
||||
|
||||
# fill in the Prefi Tree
|
||||
prefix_tree = PrefixTree()
|
||||
for prefix, instruction_type in instructions_with_types:
|
||||
prefix_tree.insert(prefix, instruction_type)
|
||||
|
||||
_classify_instruction_by_prefix = partial(classify_instruction_by_prefix, prefix_tree)
|
||||
s_instructions["Instruction_Type_From_Name"] = s_instructions["Instruction"].apply(
|
||||
_classify_instruction_by_prefix
|
||||
)
|
||||
|
||||
for inst_type, subframe in s_instructions.groupby("Instruction_Type_From_Name"):
|
||||
# subframe = enforce_type_inheritance(subframe, s_instructions)
|
||||
if inst_type in inst_type_verify_functions:
|
||||
# Pass all samples and filtered samples to the verification function.
|
||||
inst_type_verify_functions[inst_type](df, subframe)
|
||||
+65
@@ -0,0 +1,65 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
|
||||
def validate_barrier_instructions_issued(all_samples, barrier_samples):
|
||||
barrier_type_samples_issued = all_samples[
|
||||
all_samples["Wave_Issued_Instruction"]
|
||||
& (
|
||||
all_samples["Instruction_Type"]
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_BARRIER"
|
||||
)
|
||||
]
|
||||
|
||||
barrier_samples_issued = barrier_samples[barrier_samples["Wave_Issued_Instruction"]]
|
||||
# sanity check
|
||||
assert len(barrier_type_samples_issued) == len(barrier_samples_issued)
|
||||
# repeat checks from above
|
||||
assert (
|
||||
barrier_samples_issued["Instruction_Type"]
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_BARRIER"
|
||||
).all()
|
||||
|
||||
|
||||
def validate_barrier_instructions_stalled(barrier_samples):
|
||||
barrier_samples_stalled = barrier_samples[
|
||||
barrier_samples["Wave_Issued_Instruction"] == False
|
||||
]
|
||||
assert (
|
||||
barrier_samples_stalled["Stall_Reason"]
|
||||
.apply(
|
||||
lambda x: x
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_NO_INSTRUCTION_AVAILABLE"
|
||||
or x
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ARBITER_NOT_WIN"
|
||||
or x == "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_BARRIER_WAIT"
|
||||
)
|
||||
.all()
|
||||
)
|
||||
|
||||
|
||||
def validate_barrier_instructions(all_samples, barrier_samples):
|
||||
validate_barrier_instructions_issued(all_samples, barrier_samples)
|
||||
validate_barrier_instructions_stalled(barrier_samples)
|
||||
+142
@@ -0,0 +1,142 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
|
||||
def validate_issued_instruction_type_branch_taken(samples):
|
||||
# issued instruction with type BRANCH_TAKEN -> instruction starts with either s_cbranch or s_branch
|
||||
issued_type_branch_taken = samples[
|
||||
(
|
||||
samples["Instruction_Type"]
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_BRANCH_TAKEN"
|
||||
)
|
||||
& samples["Wave_Issued_Instruction"]
|
||||
]
|
||||
assert (
|
||||
issued_type_branch_taken["Instruction"]
|
||||
.apply(lambda x: x.startswith("s_branch") or x.startswith("s_cbranch"))
|
||||
.all()
|
||||
)
|
||||
assert issued_type_branch_taken["Wave_Issued_Instruction"].all()
|
||||
|
||||
# if issued instruction starts with s_branch (unconditional branch) -> its type must be BRANCH_TAKEN
|
||||
issued_s_branch = samples[
|
||||
samples["Instruction"].apply(lambda x: x.startswith("s_branch"))
|
||||
& samples["Wave_Issued_Instruction"]
|
||||
]
|
||||
assert (
|
||||
issued_s_branch["Instruction_Type"]
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_BRANCH_TAKEN"
|
||||
).all()
|
||||
|
||||
# see `validate_issued_instruction_type_branch_not_taken` for more info about s_cbranch checks
|
||||
|
||||
|
||||
def validate_issued_instruction_type_branch_not_taken(samples):
|
||||
# issued instruction with type BRANCH_NOT_TAKEN -> instruction is conditional branch (starts s_cbranch)
|
||||
issued_type_branch_not_taken = samples[
|
||||
(
|
||||
samples["Instruction_Type"]
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_BRANCH_NOT_TAKEN"
|
||||
)
|
||||
& samples["Wave_Issued_Instruction"]
|
||||
]
|
||||
assert (
|
||||
issued_type_branch_not_taken["Instruction"]
|
||||
.apply(lambda x: x.startswith("s_cbranch"))
|
||||
.all()
|
||||
)
|
||||
assert issued_type_branch_not_taken["Wave_Issued_Instruction"].all()
|
||||
|
||||
# if issued instruction starts with s_cbranch -> its type is either BRANCH_TAKEN on BRANCH_NOT_TAKEN
|
||||
issued_s_cbranch = samples[
|
||||
samples["Instruction"].apply(lambda x: x.startswith("s_cbranch"))
|
||||
& samples["Wave_Issued_Instruction"]
|
||||
]
|
||||
assert (
|
||||
(
|
||||
issued_s_cbranch["Instruction_Type"]
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_BRANCH_TAKEN"
|
||||
)
|
||||
| (
|
||||
issued_s_cbranch["Instruction_Type"]
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_BRANCH_NOT_TAKEN"
|
||||
)
|
||||
).all()
|
||||
|
||||
|
||||
def s_branch_not_issued(stalled_samples):
|
||||
s_branch_stalled = stalled_samples[
|
||||
stalled_samples["Instruction"].apply(lambda x: x.startswith("s_branch"))
|
||||
]
|
||||
|
||||
if len(s_branch_stalled) > 0:
|
||||
# No ALUDEP nor ARBWINEXSTALL observed so far for unconditional branches
|
||||
assert (
|
||||
s_branch_stalled["Stall_Reason"]
|
||||
.apply(
|
||||
lambda x: x
|
||||
!= "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ALU_DEPENDENCY"
|
||||
and x
|
||||
!= "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ARBITER_WIN_EX_STALL"
|
||||
)
|
||||
.all()
|
||||
)
|
||||
|
||||
|
||||
def validate_stalled_branches(samples):
|
||||
stalled_samples = samples[samples["Wave_Issued_Instruction"] == False]
|
||||
|
||||
assert (
|
||||
stalled_samples["Stall_Reason"]
|
||||
.apply(
|
||||
lambda x: x
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ALU_DEPENDENCY"
|
||||
or x
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_NO_INSTRUCTION_AVAILABLE"
|
||||
or x
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ARBITER_NOT_WIN"
|
||||
or x
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ARBITER_WIN_EX_STALL"
|
||||
)
|
||||
.all()
|
||||
)
|
||||
|
||||
# Further constraints for unconditional branches
|
||||
s_branch_not_issued(stalled_samples)
|
||||
|
||||
|
||||
def validate_branch_instructions(all_samples, branch_samples):
|
||||
"""
|
||||
Use all_samples to verify the ROCProfV3 determines `Instruction_Type` field properly.
|
||||
|
||||
Use filtered_samples to verify both issued and stalled branch instructions.
|
||||
"""
|
||||
# For the issued branches, use all samples, as the called functions will do
|
||||
# separation based on branch type (conditional or unconditional)
|
||||
validate_issued_instruction_type_branch_taken(all_samples)
|
||||
validate_issued_instruction_type_branch_not_taken(all_samples)
|
||||
|
||||
# stalled branches
|
||||
validate_stalled_branches(branch_samples)
|
||||
+38
@@ -0,0 +1,38 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
|
||||
def validate_internal_instructions(all_samples, internal_samples):
|
||||
assert (internal_samples["Wave_Issued_Instruction"] == False).all()
|
||||
assert (
|
||||
internal_samples["Stall_Reason"]
|
||||
.apply(
|
||||
lambda x: x
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_INTERNAL_INSTRUCTION"
|
||||
or x
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_NO_INSTRUCTION_AVAILABLE"
|
||||
)
|
||||
.all()
|
||||
)
|
||||
+60
@@ -0,0 +1,60 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
|
||||
def validate_jump_instructions_issued(all_samples, jump_samples):
|
||||
jump_type_samples_issued = all_samples[
|
||||
all_samples["Wave_Issued_Instruction"]
|
||||
& (
|
||||
all_samples["Instruction_Type"]
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_JUMP"
|
||||
)
|
||||
]
|
||||
|
||||
jump_samples_issued = jump_samples[jump_samples["Wave_Issued_Instruction"]]
|
||||
# sanity check
|
||||
assert len(jump_type_samples_issued) == len(jump_samples_issued)
|
||||
# repeat checks from above
|
||||
assert (
|
||||
jump_samples_issued["Instruction_Type"]
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_JUMP"
|
||||
).all()
|
||||
|
||||
|
||||
def validate_jump_instructions_stalled(jump_samples):
|
||||
jump_samples_stalled = jump_samples[jump_samples["Wave_Issued_Instruction"] == False]
|
||||
assert (
|
||||
jump_samples_stalled["Stall_Reason"]
|
||||
.apply(
|
||||
lambda x: x
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_NO_INSTRUCTION_AVAILABLE"
|
||||
)
|
||||
.all()
|
||||
)
|
||||
|
||||
|
||||
def validate_jump_instructions(all_samples, jump_samples):
|
||||
validate_jump_instructions_issued(all_samples, jump_samples)
|
||||
validate_jump_instructions_stalled(jump_samples)
|
||||
+64
@@ -0,0 +1,64 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
|
||||
def validate_message_instructions_issued(all_samples, message_samples):
|
||||
message_type_samples_issued = all_samples[
|
||||
all_samples["Wave_Issued_Instruction"]
|
||||
& (
|
||||
all_samples["Instruction_Type"]
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_MESSAGE"
|
||||
)
|
||||
]
|
||||
|
||||
message_samples_issued = message_samples[message_samples["Wave_Issued_Instruction"]]
|
||||
# sanity check
|
||||
assert len(message_type_samples_issued) == len(message_samples_issued)
|
||||
# repeat checks from above
|
||||
assert (
|
||||
message_samples_issued["Instruction_Type"]
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_MESSAGE"
|
||||
).all()
|
||||
# TODO: find an example with messages
|
||||
|
||||
|
||||
def validate_message_instructions_stalled(message_samples):
|
||||
message_samples_stalled = message_samples[
|
||||
message_samples["Wave_Issued_Instruction"] == False
|
||||
]
|
||||
assert (
|
||||
message_samples_stalled["Stall_Reason"]
|
||||
.apply(
|
||||
lambda x: x
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_NO_INSTRUCTION_AVAILABLE"
|
||||
or x == "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ALU_DEPENDENCY"
|
||||
)
|
||||
.all()
|
||||
)
|
||||
|
||||
|
||||
def validate_message_instructions(all_samples, message_samples):
|
||||
validate_message_instructions_issued(all_samples, message_samples)
|
||||
validate_message_instructions_stalled(message_samples)
|
||||
+64
@@ -0,0 +1,64 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
|
||||
def validate_other_instructions_issued(all_samples, other_samples):
|
||||
other_type_samples_issued = all_samples[
|
||||
all_samples["Wave_Issued_Instruction"]
|
||||
& (
|
||||
all_samples["Instruction_Type"]
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_OTHER"
|
||||
)
|
||||
]
|
||||
|
||||
other_samples_issued = other_samples[other_samples["Wave_Issued_Instruction"]]
|
||||
assert (
|
||||
other_samples_issued["Instruction_Type"]
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_OTHER"
|
||||
).all()
|
||||
|
||||
assert len(other_type_samples_issued) == len(other_samples_issued)
|
||||
|
||||
|
||||
def validate_other_instructions_stalled(other_samples):
|
||||
other_samples_stalled = other_samples[
|
||||
other_samples["Wave_Issued_Instruction"] == False
|
||||
]
|
||||
|
||||
assert (
|
||||
other_samples_stalled["Stall_Reason"]
|
||||
.apply(
|
||||
lambda x: x
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_NO_INSTRUCTION_AVAILABLE"
|
||||
or x
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ARBITER_NOT_WIN"
|
||||
)
|
||||
.all()
|
||||
)
|
||||
|
||||
|
||||
def validate_other_instructions(all_samples, filtered_samples):
|
||||
validate_other_instructions_issued(all_samples, filtered_samples)
|
||||
validate_other_instructions_stalled(filtered_samples)
|
||||
+70
@@ -0,0 +1,70 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
|
||||
def validate_scalar_instructions_issued(all_samples, scalar_samples):
|
||||
# From all samples, extract samples with SCALAR type
|
||||
scalar_type_samples_issued = all_samples[
|
||||
(
|
||||
all_samples["Instruction_Type"]
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_SCALAR"
|
||||
)
|
||||
& all_samples["Wave_Issued_Instruction"]
|
||||
]
|
||||
|
||||
# scalar_samples contains instructions starting with `s_`
|
||||
scalar_samples_issued = scalar_samples[scalar_samples["Wave_Issued_Instruction"]]
|
||||
# sanity check
|
||||
assert len(scalar_type_samples_issued) == len(scalar_samples_issued)
|
||||
# same checks as above
|
||||
assert (
|
||||
scalar_samples_issued["Instruction_Type"]
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_SCALAR"
|
||||
).all()
|
||||
|
||||
|
||||
def validate_scalar_instructions_stalled(scalar_samples):
|
||||
scalar_samples_stalled = scalar_samples[
|
||||
scalar_samples["Wave_Issued_Instruction"] == False
|
||||
]
|
||||
|
||||
assert (
|
||||
scalar_samples_stalled["Stall_Reason"]
|
||||
.apply(
|
||||
lambda x: x
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_NO_INSTRUCTION_AVAILABLE"
|
||||
or x
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ARBITER_WIN_EX_STALL"
|
||||
or x
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ARBITER_NOT_WIN"
|
||||
or x == "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ALU_DEPENDENCY"
|
||||
)
|
||||
.all()
|
||||
)
|
||||
|
||||
|
||||
def validate_scalar_instructions(all_samples, scalar_samples):
|
||||
validate_scalar_instructions_issued(all_samples, scalar_samples)
|
||||
validate_scalar_instructions_stalled(scalar_samples)
|
||||
+45
@@ -0,0 +1,45 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
|
||||
def validate_waitcnt(all_samples, waitcnt_samples):
|
||||
s_waitcnt_samples = all_samples[
|
||||
all_samples["Instruction"].apply(lambda x: x.startswith("s_waitcnt"))
|
||||
]
|
||||
# sanity check
|
||||
assert len(s_waitcnt_samples) == len(waitcnt_samples)
|
||||
|
||||
# `s_waitcnt` instructions are never issued on GFX9
|
||||
assert (waitcnt_samples["Wave_Issued_Instruction"] == False).all()
|
||||
# accepted stall reasons are
|
||||
assert (
|
||||
waitcnt_samples["Stall_Reason"]
|
||||
.apply(
|
||||
lambda x: x == "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT"
|
||||
or x
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_NO_INSTRUCTION_AVAILABLE"
|
||||
)
|
||||
.all()
|
||||
)
|
||||
+74
@@ -0,0 +1,74 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
|
||||
def validate_texture_instructions_issued(samples_issued):
|
||||
# issued instruction with type == TEX -> instruction starts with buffer_
|
||||
issued_type_texture = samples_issued[
|
||||
samples_issued["Instruction_Type"]
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_TEX"
|
||||
]
|
||||
assert (
|
||||
issued_type_texture["Instruction"].apply(lambda x: x.startswith("buffer_")).all()
|
||||
)
|
||||
|
||||
# issued instruction starts with buffer_ -> it must be TEX
|
||||
issued_buffer = samples_issued[
|
||||
samples_issued["Instruction"].apply(lambda x: x.startswith("buffer_"))
|
||||
]
|
||||
assert (
|
||||
issued_buffer["Instruction_Type"]
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_TEX"
|
||||
).all()
|
||||
# TODO: find an example with TEX instructions
|
||||
|
||||
|
||||
def validate_texture_instructions_stalled(samples):
|
||||
texture_samples = samples[
|
||||
samples["Instruction"].apply(lambda x: x.startswith("buffer"))
|
||||
]
|
||||
texture_stalled = texture_samples[texture_samples["Wave_Issued_Instruction"] == False]
|
||||
|
||||
assert (
|
||||
texture_stalled["Stall_Reason"]
|
||||
.apply(
|
||||
lambda x: x
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ARBITER_WIN_EX_STALL"
|
||||
or x
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_NO_INSTRUCTION_AVAILABLE"
|
||||
or x
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ARBITER_NOT_WIN"
|
||||
or x == "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ALU_DEPENDENCY"
|
||||
)
|
||||
.all()
|
||||
)
|
||||
|
||||
# TODO: find an example with texture instructions
|
||||
|
||||
|
||||
def validate_texture_instructions(samples):
|
||||
samples_issued = samples[samples["Wave_Issued_Instruction"]]
|
||||
validate_texture_instructions_issued(samples_issued)
|
||||
validate_texture_instructions_stalled(samples)
|
||||
+69
@@ -0,0 +1,69 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
|
||||
def validate_valu_instructions_issued(samples_issued):
|
||||
# issued instruction with type == VALU -> instruction starts with v_
|
||||
issued_type_valu = samples_issued[
|
||||
samples_issued["Instruction_Type"]
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_VALU"
|
||||
]
|
||||
assert issued_type_valu["Instruction"].apply(lambda x: x.startswith("v_")).all()
|
||||
|
||||
# issued instruction starts with v_ and is not matrix instruction -> it must be VALU
|
||||
issued_v = samples_issued[
|
||||
samples_issued["Instruction"].apply(
|
||||
lambda x: x.startswith("v_") and ("mfma" not in x)
|
||||
)
|
||||
]
|
||||
assert (
|
||||
issued_v["Instruction_Type"] == "ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_VALU"
|
||||
).all()
|
||||
|
||||
|
||||
def validate_valu_instructions_stalled(samples):
|
||||
valu_samples = samples[
|
||||
samples["Instruction"].apply(lambda x: x.startswith("v_") and ("mfma" not in x))
|
||||
]
|
||||
valu_stalled = valu_samples[valu_samples["Wave_Issued_Instruction"] == False]
|
||||
|
||||
assert (
|
||||
valu_stalled["Stall_Reason"]
|
||||
.apply(
|
||||
lambda x: x
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ARBITER_WIN_EX_STALL"
|
||||
or x
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_NO_INSTRUCTION_AVAILABLE"
|
||||
or x
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ARBITER_NOT_WIN"
|
||||
)
|
||||
.all()
|
||||
)
|
||||
|
||||
|
||||
def validate_valu_instructions(samples):
|
||||
samples_issued = samples[samples["Wave_Issued_Instruction"]]
|
||||
validate_valu_instructions_issued(samples_issued)
|
||||
validate_valu_instructions_stalled(samples)
|
||||
+16
@@ -0,0 +1,16 @@
|
||||
#
|
||||
#
|
||||
#
|
||||
|
||||
set(PACKAGE_OUTPUT_DIR
|
||||
${ROCPROFILER_SDK_TESTS_BINARY_DIR}/pytest-packages/rocprofiler_sdk/pc_sampling/stochastic/json
|
||||
)
|
||||
|
||||
set(PC_SAMPLING_PYTHON_SOURCES __init__.py)
|
||||
|
||||
foreach(_FILE ${PC_SAMPLING_PYTHON_SOURCES})
|
||||
configure_file(${CMAKE_CURRENT_LIST_DIR}/${_FILE} ${PACKAGE_OUTPUT_DIR}/${_FILE}
|
||||
COPYONLY)
|
||||
endforeach()
|
||||
|
||||
add_subdirectory(gfx9)
|
||||
+24
@@ -0,0 +1,24 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
from __future__ import absolute_import
|
||||
+17
@@ -0,0 +1,17 @@
|
||||
#
|
||||
#
|
||||
#
|
||||
|
||||
set(PACKAGE_OUTPUT_DIR
|
||||
${ROCPROFILER_SDK_TESTS_BINARY_DIR}/pytest-packages/rocprofiler_sdk/pc_sampling/stochastic/json/gfx9
|
||||
)
|
||||
|
||||
set(PC_SAMPLING_PYTHON_SOURCES __init__.py arbiter_state.py s_instructions.py
|
||||
other_instructions.py)
|
||||
|
||||
foreach(_FILE ${PC_SAMPLING_PYTHON_SOURCES})
|
||||
configure_file(${CMAKE_CURRENT_LIST_DIR}/${_FILE} ${PACKAGE_OUTPUT_DIR}/${_FILE}
|
||||
COPYONLY)
|
||||
endforeach()
|
||||
|
||||
# add_subdirectory(s_instructions)
|
||||
+176
@@ -0,0 +1,176 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from collections import defaultdict
|
||||
from .arbiter_state import validate_arbiter_state
|
||||
from .other_instructions import (
|
||||
validate_valu_instructions,
|
||||
validate_flat_instructions,
|
||||
validate_lds_instructions,
|
||||
)
|
||||
from .s_instructions import (
|
||||
validate_internal_instructions,
|
||||
validate_barrier_instructions,
|
||||
validate_waitcnt,
|
||||
validate_branch_instructions,
|
||||
validate_scalar_instructions,
|
||||
)
|
||||
|
||||
# Using Prefix Tree to classify the instruction type
|
||||
# I did this instead of the regex becuase I wanted to try if we could
|
||||
# generalize this approach for other types of instructions.
|
||||
# The dream scenario: We have a giant list of all instructions and their
|
||||
# types. Then we parse the list and dynamically determine the checks
|
||||
# based on the instruction types.
|
||||
|
||||
|
||||
# TODO: extract this outside of the file
|
||||
class TrieNode:
|
||||
def __init__(self):
|
||||
self.children = {}
|
||||
self.instruction_type = None # Store the instruction type at the leaf node
|
||||
|
||||
|
||||
class PrefixTree:
|
||||
def __init__(self):
|
||||
self.root = TrieNode()
|
||||
|
||||
def insert(self, full_prefix, instruction_type):
|
||||
"""Insert a prefix and its associated instruction type into the Trie."""
|
||||
node = self.root
|
||||
for char in full_prefix:
|
||||
if char not in node.children:
|
||||
node.children[char] = TrieNode()
|
||||
node = node.children[char]
|
||||
node.instruction_type = (
|
||||
instruction_type # Assign the instruction type at the leaf
|
||||
)
|
||||
|
||||
def get_instruction_type(self, instruction):
|
||||
"""Get the list of instruction types based on the longest matching prefix."""
|
||||
node = self.root
|
||||
matched_types = [] # List to store matched types
|
||||
|
||||
# Traverse the instruction one character at a time
|
||||
for char in instruction:
|
||||
if char not in node.children:
|
||||
break # Stop if no match is found
|
||||
|
||||
node = node.children[char]
|
||||
|
||||
# If we reach a node that has an instruction type, store it
|
||||
if node.instruction_type:
|
||||
matched_types.append(node.instruction_type)
|
||||
|
||||
return matched_types
|
||||
|
||||
|
||||
instructions_with_types = [
|
||||
("s_", "SCALAR"), # Scalar instructions (general category)
|
||||
("s_waitcnt", "WAITCNT"), # WAITCNT (specific)
|
||||
("s_sendmsg", "MESSAGE"), # MESSAGE (specific)
|
||||
("s_barrier", "BARRIER"), # BARRIER (specifix)
|
||||
("s_swappc", "JUMP"), # JUMP (specific)
|
||||
("s_setpc", "JUMP"), # JUMP
|
||||
("s_setpc", "JUMP"), # JUMP
|
||||
("s_sleep", "JUMP"), # JUMP
|
||||
("s_branch", "BRANCH"), # BRANCH
|
||||
("s_cbranch", "BRANCH"), # BRANCH (conditional)
|
||||
("s_wakeup", "OTHER"), # OHTER
|
||||
("s_nop", "INTERNAL"), # INTERNAL
|
||||
("s_sleep", "INTERNAL"), # INTERNAL
|
||||
("v_", "VALU"), # VALU
|
||||
("v_mfma", "MATRIX"), # MATRIX
|
||||
("flat_", "FLAT"), # FLAT
|
||||
("global_", "FLAT"), # FLAT
|
||||
("ds_", "LDS"), # LDS
|
||||
("buffer_", "TEX"), # TEX
|
||||
]
|
||||
|
||||
|
||||
inst_type_verify_functions = {
|
||||
"BRANCH": validate_branch_instructions,
|
||||
"WAITCNT": validate_waitcnt,
|
||||
# "OTHER": validate_other_instructions,
|
||||
"SCALAR": validate_scalar_instructions,
|
||||
"INTERNAL": validate_internal_instructions,
|
||||
# "JUMP": validate_jump_instructions,
|
||||
# "MESSAGE": validate_message_instructions,
|
||||
"BARRIER": validate_barrier_instructions,
|
||||
"VALU": validate_valu_instructions,
|
||||
"FLAT": validate_flat_instructions,
|
||||
"LDS": validate_lds_instructions,
|
||||
}
|
||||
|
||||
|
||||
def validate_stochastic_samples_json(data_json):
|
||||
# fill in the Prefix Tree
|
||||
prefix_tree = PrefixTree()
|
||||
for prefix, instruction_type in instructions_with_types:
|
||||
prefix_tree.insert(prefix, instruction_type)
|
||||
|
||||
instructions = data_json["strings"]["pc_sample_instructions"]
|
||||
comments = data_json["strings"]["pc_sample_comments"]
|
||||
|
||||
insts_per_prefix_type = defaultdict(list)
|
||||
|
||||
for sample in data_json["buffer_records"]["pc_sample_stochastic"]:
|
||||
inst_index = sample["inst_index"]
|
||||
if inst_index == -1:
|
||||
# Ignoring samples from blit kernels
|
||||
continue
|
||||
record = sample["record"]
|
||||
# extend the record with the instruction
|
||||
record["inst"] = instructions[inst_index]
|
||||
|
||||
# get the instruction type from prefix tree
|
||||
inst_prefix_types = prefix_tree.get_instruction_type(record["inst"])
|
||||
# each type must have a type
|
||||
assert len(inst_prefix_types) > 0
|
||||
# As more then one type can be matched, we take the last one as the most specific.
|
||||
inst_prefix_type = inst_prefix_types[-1]
|
||||
insts_per_prefix_type[inst_prefix_type].append(record)
|
||||
|
||||
# For each sample, we need to validate wave_cnt and arbiter state
|
||||
wave_cnt = record["wave_cnt"]
|
||||
assert wave_cnt >= 0 and wave_cnt <= 32, "Invalid wave count"
|
||||
|
||||
# arbiter state check
|
||||
snapshot = record["snapshot"]
|
||||
validate_arbiter_state(snapshot)
|
||||
|
||||
# Check now the instruction type and arb state correlation.
|
||||
# We do that for all samples of a single instruction type all at once
|
||||
# to minimize the number of functions calls (one call for all samples, instead of a function
|
||||
# call per sample).
|
||||
# Please note that each sample is iterated at most twice.
|
||||
# The first time to group samples per instruction type, and the second time to validate samples.
|
||||
for inst_prefix_type, sample_records in insts_per_prefix_type.items():
|
||||
if inst_prefix_type in inst_type_verify_functions:
|
||||
inst_type_verify_functions[inst_prefix_type](sample_records)
|
||||
else:
|
||||
assert False, f"Unhandle instruction type: {inst_prefix_type}"
|
||||
+104
@@ -0,0 +1,104 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
|
||||
def validate_arbiter_state(snapshot):
|
||||
# VALU pipe checks
|
||||
if snapshot["dual_issue_valu"]:
|
||||
# (valu_issue = 1 & valu_stall = 0) is the only allowed
|
||||
assert (
|
||||
snapshot["arb_state_issue_valu"] == 1
|
||||
and snapshot["arb_state_stall_valu"] == 0
|
||||
), "Dual issue VALU arbiter state check failed"
|
||||
else:
|
||||
# (valu_issue = 0 & value_stall = 1) is not allowed
|
||||
assert not (
|
||||
snapshot["arb_state_issue_valu"] == 0
|
||||
and snapshot["arb_state_stall_valu"] == 1
|
||||
), "VALU arbiter state check failed"
|
||||
|
||||
# Matrix pipe checks
|
||||
# matrix_issue = 0 & matrix_stall = 1 is not allowed
|
||||
assert not (
|
||||
snapshot["arb_state_issue_matrix"] == 0
|
||||
and snapshot["arb_state_stall_matrix"] == 1
|
||||
), "Matrix arbiter state check failed"
|
||||
|
||||
# scalar pipe checks
|
||||
# scalar_issue = 0 & scalar_stall = 1 is not allowed
|
||||
assert not (
|
||||
snapshot["arb_state_issue_scalar"] == 0
|
||||
and snapshot["arb_state_stall_scalar"] == 1
|
||||
), "Scalar arbiter state check failed"
|
||||
|
||||
# texture pipe checks
|
||||
# tex_issue = 0 & tex_stall = 1 is not allowed
|
||||
assert not (
|
||||
snapshot["arb_state_issue_vmem_tex"] == 0
|
||||
and snapshot["arb_state_stall_vmem_tex"] == 1
|
||||
), "Texture arbiter state check failed"
|
||||
|
||||
# LDS pipe checks
|
||||
# lds_issue = 0 & lds_stall = 1 is not allowed
|
||||
assert not (
|
||||
snapshot["arb_state_issue_lds"] == 0 and snapshot["arb_state_stall_lds"] == 1
|
||||
), "LDS arbiter state check failed"
|
||||
|
||||
# flat pipe checks
|
||||
# flat_issue = 0 & flat_stall = 1 is not allowed
|
||||
assert not (
|
||||
snapshot["arb_state_issue_flat"] == 0 and snapshot["arb_state_stall_flat"] == 1
|
||||
), "Flat arbiter state check failed"
|
||||
|
||||
# misc pipe checks
|
||||
# TODO: verify this
|
||||
# According to Joe's slides, the misc_stall cannot be 0.
|
||||
# However, the condition representing this case fails for `transpose` application
|
||||
# assert((samples['Arbiter_State_Stall_Misc'] == 0).all())
|
||||
# Instead, I had to replace is with the condition belowe
|
||||
# misc_issue = 0 & misc_stall = 1 is not allowed
|
||||
assert not (
|
||||
snapshot["arb_state_issue_misc"] == 0 and snapshot["arb_state_stall_misc"] == 1
|
||||
), "Misc arbiter state check failed"
|
||||
|
||||
# export pipe checks
|
||||
# We assume same conditions for Export pipe as for Misc (Joe's original),
|
||||
# so we should TODO: verify
|
||||
# exp_issue can take both 1 and 0, so no need to check it
|
||||
# exp_stall must be 0
|
||||
assert snapshot["arb_state_stall_exp"] == 0, "Export arbiter state check failed"
|
||||
|
||||
# lds_direct pipe checks
|
||||
# This pipe doesn't exist on GFX9 so both issue and stall must be 0
|
||||
assert (
|
||||
snapshot["arb_state_issue_lds_direct"] == 0
|
||||
), "LDS Direct arbiter state check failed"
|
||||
assert (
|
||||
snapshot["arb_state_stall_lds_direct"] == 0
|
||||
), "LDS Direct arbiter state check failed"
|
||||
|
||||
# brmsg pipe doesn't exist on GFX9 so both issue and stall must be 0
|
||||
assert snapshot["arb_state_issue_brmsg"] == 0, "BRMSG arbiter state check failed"
|
||||
assert snapshot["arb_state_stall_brmsg"] == 0, "BRMSG arbiter state check failed"
|
||||
+160
@@ -0,0 +1,160 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
|
||||
def validate_valu_instructions(sample_records):
|
||||
allowed_stall_reasons = set(
|
||||
[
|
||||
"ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ARBITER_WIN_EX_STALL",
|
||||
"ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_NO_INSTRUCTION_AVAILABLE",
|
||||
"ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ARBITER_NOT_WIN",
|
||||
]
|
||||
)
|
||||
for record in sample_records:
|
||||
assert record["inst"].startswith("v_"), "VALU instruction must start with 'v_'"
|
||||
|
||||
snapshot = record["snapshot"]
|
||||
if record["wave_issued"] == 1:
|
||||
# wave issued a VALU instruction
|
||||
assert record["inst_type"] == "ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_VALU"
|
||||
assert snapshot["arb_state_issue_valu"] == 1
|
||||
assert snapshot["arb_state_stall_valu"] == 0
|
||||
else:
|
||||
# wave did not issue a VALU instruction
|
||||
# inst_type is not relevant
|
||||
stall_reason = snapshot["stall_reason"]
|
||||
assert (
|
||||
stall_reason in allowed_stall_reasons
|
||||
), "Invalid stall reason for VALU instruction"
|
||||
|
||||
if (
|
||||
stall_reason
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ARBITER_WIN_EX_STALL"
|
||||
):
|
||||
assert snapshot["arb_state_issue_valu"] == 1
|
||||
# Expectation would be that the `arb_state_stall_valu` is 1, but in some examples,
|
||||
# I've observed different behavior.
|
||||
|
||||
if (
|
||||
stall_reason
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ARBITER_NOT_WIN"
|
||||
):
|
||||
assert (
|
||||
snapshot["arb_state_issue_valu"] == 1
|
||||
or snapshot["arb_state_stall_matrix"] == 1
|
||||
), "VALU or Matrix instruction should be issued"
|
||||
|
||||
|
||||
def validate_flat_instructions(sample_records):
|
||||
allowed_stall_reasons = set(
|
||||
[
|
||||
"ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ARBITER_WIN_EX_STALL",
|
||||
"ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_NO_INSTRUCTION_AVAILABLE",
|
||||
"ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ARBITER_NOT_WIN",
|
||||
"ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ALU_DEPENDENCY",
|
||||
]
|
||||
)
|
||||
for record in sample_records:
|
||||
assert record["inst"].startswith("flat_") or record["inst"].startswith(
|
||||
"global_"
|
||||
), "Invalid name of FLAT instruction"
|
||||
|
||||
snapshot = record["snapshot"]
|
||||
if record["wave_issued"] == 1:
|
||||
# wave issued a flat memory instruction
|
||||
assert (
|
||||
record["inst_type"] == "ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_FLAT"
|
||||
), "Invalid instruction type for FLAT instruction"
|
||||
assert snapshot["arb_state_issue_flat"] == 1, "Arbiter issued flat"
|
||||
assert (
|
||||
snapshot["arb_state_stall_flat"] == 0
|
||||
), "Arbiter should not stalled flat"
|
||||
|
||||
# TODO: add checks when flat stalls LDS, and vice versa
|
||||
# If global_ inst, check ISSUE_FLAT=1, STALL_FLAT=0, ISSUE_LDS=1 -> STALL_LDS = 1
|
||||
else:
|
||||
# wave did not issue a flat instruction
|
||||
# inst_type is not relevant
|
||||
stall_reason = snapshot["stall_reason"]
|
||||
assert (
|
||||
stall_reason in allowed_stall_reasons
|
||||
), "Invalid stall reason for flat instruction"
|
||||
|
||||
if (
|
||||
stall_reason
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ARBITER_WIN_EX_STALL"
|
||||
):
|
||||
assert snapshot["arb_state_issue_flat"] == 1, "Arbiter issued flat"
|
||||
assert snapshot["arb_state_stall_flat"] == 1, "EX stalled flat"
|
||||
|
||||
# In case of flat instructions, ARBITER_NOT_WIN might mean that
|
||||
# the FLAT/VMEM pipe was idle, so the flat instruction is issued to the arbiter
|
||||
# to wake up the clock in FLAT/VMEM, but cannot be issued to the execution pipeline.
|
||||
# Afterwards, the same instruction is reissued to the arbiter that sends it to the execution pipeline.
|
||||
# That's why `Arbiter_State_Issue_Flat` is not always true as in some other cases.
|
||||
|
||||
|
||||
def validate_lds_instructions(sample_records):
|
||||
allowed_stall_reasons = set(
|
||||
[
|
||||
"ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ARBITER_WIN_EX_STALL",
|
||||
"ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_NO_INSTRUCTION_AVAILABLE",
|
||||
"ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ARBITER_NOT_WIN",
|
||||
"ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ALU_DEPENDENCY",
|
||||
]
|
||||
)
|
||||
for record in sample_records:
|
||||
assert record["inst"].startswith("ds_"), "Invalid name of LDS instruction"
|
||||
|
||||
snapshot = record["snapshot"]
|
||||
if record["wave_issued"] == 1:
|
||||
# wave issued an LDS memory instruction
|
||||
assert (
|
||||
record["inst_type"] == "ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_LDS"
|
||||
), "Invalid instruction type for LDS instruction"
|
||||
assert snapshot["arb_state_issue_lds"] == 1, "Arbiter issued lds"
|
||||
assert snapshot["arb_state_stall_lds"] == 0, "EX should not stalled lds"
|
||||
|
||||
# TODO: add checks when LDS stalls flat, and vice versa
|
||||
# ISSUE_LDS=1, STALL_LDS=0, ISSUE_FLAT=1 -> STALL_FLAT = 1
|
||||
else:
|
||||
# wave did not issue an LDS instruction
|
||||
# inst_type is not relevant
|
||||
stall_reason = snapshot["stall_reason"]
|
||||
assert (
|
||||
stall_reason in allowed_stall_reasons
|
||||
), "Invalid stall reason for LDS instruction"
|
||||
|
||||
if (
|
||||
stall_reason
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ARBITER_WIN_EX_STALL"
|
||||
):
|
||||
assert snapshot["arb_state_issue_lds"] == 1, "Arbiter issued flat"
|
||||
assert snapshot["arb_state_stall_lds"] == 1, "EX stalled flat"
|
||||
elif (
|
||||
stall_reason
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ARBITER_NOT_WIN"
|
||||
):
|
||||
assert snapshot["arb_state_issue_lds"] == 1, "Arbiter issued flat"
|
||||
+222
@@ -0,0 +1,222 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
|
||||
def validate_internal_instructions(sample_records):
|
||||
allowed_stall_reasons = set(
|
||||
[
|
||||
"ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_INTERNAL_INSTRUCTION",
|
||||
"ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_NO_INSTRUCTION_AVAILABLE",
|
||||
]
|
||||
)
|
||||
for record in sample_records:
|
||||
assert record["inst"].startswith("s_nop"), "New internal instruction observed"
|
||||
assert (
|
||||
record["wave_issued"] == 0
|
||||
), "Internal instruction should not be issued to EX"
|
||||
assert (
|
||||
record["snapshot"]["stall_reason"] in allowed_stall_reasons
|
||||
), "Invalid stall reason for internal instruction"
|
||||
|
||||
|
||||
def validate_waitcnt(sample_records):
|
||||
allowed_stall_reasons = set(
|
||||
[
|
||||
"ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_WAITCNT",
|
||||
"ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_NO_INSTRUCTION_AVAILABLE",
|
||||
]
|
||||
)
|
||||
for record in sample_records:
|
||||
assert record["inst"].startswith("s_waitcnt"), "Waitcnt must start with s_waitcn"
|
||||
assert record["wave_issued"] == 0, "Waitcnt should not be issued to EX"
|
||||
assert (
|
||||
record["snapshot"]["stall_reason"] in allowed_stall_reasons
|
||||
), "Invalid stall reason for waitcnt"
|
||||
|
||||
|
||||
def validate_branch_instructions(sample_records):
|
||||
allowed_stall_reasons = set(
|
||||
[
|
||||
"ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_NO_INSTRUCTION_AVAILABLE",
|
||||
"ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ALU_DEPENDENCY",
|
||||
"ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ARBITER_NOT_WIN",
|
||||
"ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ARBITER_WIN_EX_STALL",
|
||||
]
|
||||
)
|
||||
allowed_stall_reasons_uncoditional_branches = set(
|
||||
[
|
||||
"ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_NO_INSTRUCTION_AVAILABLE",
|
||||
"ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ARBITER_NOT_WIN",
|
||||
]
|
||||
)
|
||||
for record in sample_records:
|
||||
inst = record["inst"]
|
||||
inst_type = record["inst_type"]
|
||||
snapshot = record["snapshot"]
|
||||
stall_reason = snapshot["stall_reason"]
|
||||
assert inst.startswith("s_cbranch") or inst.startswith(
|
||||
"s_branch"
|
||||
), "Branch must start with s_cbranch or s_branch"
|
||||
|
||||
if record["wave_issued"] == 1:
|
||||
if inst.startswith("s_branch"):
|
||||
# Uncoditional issued branch can only be branch taken
|
||||
assert (
|
||||
inst_type == "ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_BRANCH_TAKEN"
|
||||
), "Unconditional branch must be taken"
|
||||
else:
|
||||
# Verifying issued branch instructions
|
||||
assert (
|
||||
inst_type == "ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_BRANCH_TAKEN"
|
||||
or inst_type
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_BRANCH_NOT_TAKEN"
|
||||
), "Invalid branch type for conditional branch instruction"
|
||||
|
||||
assert (
|
||||
snapshot["arb_state_issue_misc"] == 1
|
||||
and snapshot["arb_state_stall_misc"] == 0
|
||||
), "Invalid arb state for issued branch instruction"
|
||||
|
||||
else:
|
||||
# verifying not issued branch instructions
|
||||
assert (
|
||||
stall_reason in allowed_stall_reasons
|
||||
), "Invalid stall reason for branch instruction"
|
||||
|
||||
if (
|
||||
stall_reason
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ARBITER_NOT_WIN"
|
||||
):
|
||||
assert (
|
||||
snapshot["arb_state_issue_misc"] == 1
|
||||
), "Arbiter must have issued MISC instruction"
|
||||
|
||||
elif (
|
||||
stall_reason
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ARBITER_WIN_EX_STALL"
|
||||
):
|
||||
assert (
|
||||
snapshot["arb_state_issue_misc"] == 1
|
||||
), "Arbiter must have issued MISC instruction"
|
||||
|
||||
assert (
|
||||
snapshot["arb_state_stall_misc"] == 1
|
||||
), "Arbiter must have stalled MISC instruction"
|
||||
|
||||
# more specific checks for unconditional branches
|
||||
if inst.startswith("s_branch"):
|
||||
assert (
|
||||
stall_reason in allowed_stall_reasons_uncoditional_branches
|
||||
), "Invalid stall reason for unconditional branch instruction"
|
||||
|
||||
|
||||
def validate_scalar_instructions(sample_records):
|
||||
allowed_stall_reasons = set(
|
||||
[
|
||||
"ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_NO_INSTRUCTION_AVAILABLE",
|
||||
"ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ALU_DEPENDENCY",
|
||||
"ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ARBITER_NOT_WIN",
|
||||
"ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ARBITER_WIN_EX_STALL",
|
||||
]
|
||||
)
|
||||
|
||||
for record in sample_records:
|
||||
snapshot = record["snapshot"]
|
||||
if record["wave_issued"] == 1:
|
||||
assert (
|
||||
record["inst_type"] == "ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_SCALAR"
|
||||
), "Invalid scalar instruction type"
|
||||
assert (
|
||||
snapshot["arb_state_issue_scalar"] == 1
|
||||
), "Arbiter must have issued scalar instruction"
|
||||
assert (
|
||||
snapshot["arb_state_stall_scalar"] == 0
|
||||
), "Arbiter must have stalled scalar instruction"
|
||||
else:
|
||||
stall_reason = snapshot["stall_reason"]
|
||||
assert (
|
||||
stall_reason in allowed_stall_reasons
|
||||
), "Invalid stall reason for scalar instruction"
|
||||
|
||||
if (
|
||||
stall_reason
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ARBITER_NOT_WIN"
|
||||
):
|
||||
assert (
|
||||
snapshot["arb_state_issue_scalar"] == 1
|
||||
), "Arbiter must have issued scalar instruction"
|
||||
|
||||
elif (
|
||||
stall_reason
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ARBITER_WIN_EX_STALL"
|
||||
):
|
||||
assert (
|
||||
snapshot["arb_state_issue_scalar"] == 1
|
||||
), "Arbiter must have issued scalar instruction"
|
||||
|
||||
assert (
|
||||
snapshot["arb_state_stall_scalar"] == 1
|
||||
), "Arbiter must have stalled scalar instruction"
|
||||
|
||||
|
||||
def validate_barrier_instructions(sample_records):
|
||||
allowed_stall_reasons = set(
|
||||
[
|
||||
"ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_NO_INSTRUCTION_AVAILABLE",
|
||||
"ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ARBITER_NOT_WIN",
|
||||
"ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_BARRIER_WAIT",
|
||||
]
|
||||
)
|
||||
for record in sample_records:
|
||||
assert record["inst"].startswith(
|
||||
"s_barrier"
|
||||
), "Barrier instruction must start with s_barrier"
|
||||
snapshot = record["snapshot"]
|
||||
if record["wave_issued"] == 1:
|
||||
assert (
|
||||
record["inst_type"] == "ROCPROFILER_PC_SAMPLING_INSTRUCTION_TYPE_BARRIER"
|
||||
), "Invalid barrier instruction type"
|
||||
assert (
|
||||
snapshot["arb_state_issue_misc"] == 1
|
||||
), "Arbiter must have issued barrier instruction"
|
||||
assert (
|
||||
snapshot["arb_state_stall_misc"] == 0
|
||||
), "Arbiter must have stalled barrier instruction"
|
||||
else:
|
||||
stall_reason = snapshot["stall_reason"]
|
||||
assert (
|
||||
stall_reason in allowed_stall_reasons
|
||||
), "Invalid stall reason for barrier instruction"
|
||||
|
||||
if (
|
||||
stall_reason
|
||||
== "ROCPROFILER_PC_SAMPLING_INSTRUCTION_NOT_ISSUED_REASON_ARBITER_NOT_WIN"
|
||||
):
|
||||
assert (
|
||||
snapshot["arb_state_issue_misc"] == 1
|
||||
), "Arbiter must have issued misc instruction"
|
||||
|
||||
|
||||
# TODO: cover other types of instructions
|
||||
+14
@@ -0,0 +1,14 @@
|
||||
#
|
||||
#
|
||||
#
|
||||
|
||||
set(PACKAGE_OUTPUT_DIR
|
||||
${ROCPROFILER_SDK_TESTS_BINARY_DIR}/pytest-packages/rocprofiler_sdk/pc_sampling/transpose_multiple_agents
|
||||
)
|
||||
|
||||
set(PC_SAMPLING_PYTHON_SOURCES __init__.py csv.py)
|
||||
|
||||
foreach(_FILE ${PC_SAMPLING_PYTHON_SOURCES})
|
||||
configure_file(${CMAKE_CURRENT_LIST_DIR}/${_FILE} ${PACKAGE_OUTPUT_DIR}/${_FILE}
|
||||
COPYONLY)
|
||||
endforeach()
|
||||
+23
@@ -0,0 +1,23 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
from __future__ import absolute_import
|
||||
+93
@@ -0,0 +1,93 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import itertools
|
||||
import sys
|
||||
import pytest
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def validate_all_agents_are_sampled(
|
||||
input_samples_csv: pd.DataFrame,
|
||||
input_kernel_trace_csv: pd.DataFrame,
|
||||
input_agent_info_csv: pd.DataFrame,
|
||||
):
|
||||
transpose_kernel_source_line_start = 137
|
||||
transpose_kernel_source_line_end = 145
|
||||
|
||||
mi2xx_mi3xx_agents_df = input_agent_info_csv[
|
||||
input_agent_info_csv["Name"].apply(
|
||||
lambda name: name == "gfx90a"
|
||||
or name.startswith("gfx94")
|
||||
or name.startswith("gfx95")
|
||||
)
|
||||
]
|
||||
|
||||
# Extract samples that originates from know code object it
|
||||
samples_df = input_samples_csv[input_samples_csv["Dispatch_Id"] != 0].copy()
|
||||
|
||||
# Determine the agent on which sample was generated
|
||||
# Note: Agent_Id is in the following format e.g., "Agent 3",
|
||||
# that's why we need a log for extracting integer value of the id.
|
||||
# Determine the agent on which sample was generated
|
||||
samples_df["Agent_Id"] = (
|
||||
samples_df["Dispatch_Id"]
|
||||
.map(
|
||||
input_kernel_trace_csv.set_index("Dispatch_Id")["Agent_Id"]
|
||||
.str.split(" ")
|
||||
.str[1]
|
||||
)
|
||||
.astype(np.uint64)
|
||||
)
|
||||
sampled_agents = samples_df["Agent_Id"].unique()
|
||||
sampled_agents_num = len(sampled_agents)
|
||||
# all agents must be sampled
|
||||
assert sampled_agents_num == len(mi2xx_mi3xx_agents_df)
|
||||
|
||||
# separate samples per agents
|
||||
grouped_samples_per_agent = samples_df.groupby("Agent_Id")
|
||||
for agent_id, agent_samples_df in grouped_samples_per_agent:
|
||||
sampled_dispatches = agent_samples_df["Dispatch_Id"].unique()
|
||||
# at least 1 sampled dispatch per agent
|
||||
assert len(sampled_dispatches) >= 1
|
||||
|
||||
# extract decoded samples that are mapped to the transpose.cpp file
|
||||
transpose_samples_df = samples_df[
|
||||
samples_df["Instruction_Comment"].apply(
|
||||
lambda comment: "transpose-all-agents.cpp" in comment
|
||||
)
|
||||
].copy()
|
||||
# determine the line number for each sample
|
||||
transpose_samples_df["Source_Line_Num"] = transpose_samples_df[
|
||||
"Instruction_Comment"
|
||||
].apply(lambda source_line: int(source_line.split(":")[-1]))
|
||||
# assert that line belongs to a kernel range
|
||||
assert (
|
||||
(transpose_samples_df["Source_Line_Num"] >= transpose_kernel_source_line_start)
|
||||
& (transpose_samples_df["Source_Line_Num"] <= transpose_kernel_source_line_end)
|
||||
).all()
|
||||
@@ -3,3 +3,4 @@
|
||||
#
|
||||
|
||||
add_subdirectory(host-trap)
|
||||
add_subdirectory(stochastic)
|
||||
|
||||
+11
-346
@@ -32,360 +32,19 @@ import pandas as pd
|
||||
# =========================== Validating CSV output
|
||||
|
||||
|
||||
# Keep this in case we decide to revert workgroup_id information
|
||||
def validate_workgoup_id_x_y_z(df, max_x, max_y, max_z):
|
||||
assert (df["Workgroup_Size_X"].astype(int) >= 0).all()
|
||||
assert (df["Workgroup_Size_X"].astype(int) <= max_x).all()
|
||||
|
||||
assert (df["Workgroup_Size_Y"].astype(int) >= 0).all()
|
||||
assert (df["Workgroup_Size_Y"].astype(int) <= max_y).all()
|
||||
|
||||
assert (df["Workgroup_Size_Z"].astype(int) >= 0).all()
|
||||
assert (df["Workgroup_Size_Z"].astype(int) <= max_z).all()
|
||||
|
||||
|
||||
# Keep this in case we decide to revert wave_id information
|
||||
def validate_wave_id(df, max_wave_id):
|
||||
assert (df["Wave_Id"].astype(int) <= max_wave_id).all()
|
||||
|
||||
|
||||
# Keep this in case we decide to revert wave_id information
|
||||
def validate_chiplet(df, max_chiplet):
|
||||
assert (df["Chiplet"].astype(int) <= max_chiplet).all()
|
||||
|
||||
|
||||
def validate_instruction_decoding(
|
||||
df,
|
||||
inst_str,
|
||||
exec_mask_uint64: np.uint64 = None,
|
||||
source_code_lines_range: (int, int) = None,
|
||||
all_source_lines_samples=False,
|
||||
):
|
||||
# Make a copy, so that we don't work (modify) a view.
|
||||
df_inst = df[df["Instruction"].apply(lambda inst: inst.startswith(inst_str))].copy()
|
||||
|
||||
assert not df_inst.empty
|
||||
# assert the exec mask if requested
|
||||
if exec_mask_uint64 is not None:
|
||||
assert (df_inst["Exec_Mask"].astype(np.uint64) == exec_mask_uint64).all()
|
||||
|
||||
# assert whether the samples source code lines belongs to the provided range
|
||||
if source_code_lines_range is not None:
|
||||
start_range, end_range = source_code_lines_range
|
||||
# The instruction comment is isually in the following format: /path/to/source/file.cpp:line_num
|
||||
df_inst["source_line_num"] = df_inst["Instruction_Comment"].apply(
|
||||
lambda source_line: int(source_line.split(":")[-1])
|
||||
)
|
||||
assert (df_inst["source_line_num"] >= start_range).all()
|
||||
assert (df_inst["source_line_num"] <= end_range).all()
|
||||
# if requested, check if all lines from the range are sampled
|
||||
if all_source_lines_samples:
|
||||
assert len(df_inst["source_line_num"].unique()) == (
|
||||
end_range - start_range + 1
|
||||
)
|
||||
|
||||
|
||||
def validate_instruction_comment(df):
|
||||
# Instruction comment must always be present, since the testing application
|
||||
# is built with debug symbols.
|
||||
assert (
|
||||
(df["Instruction_Comment"] != "") & (df["Instruction_Comment"] != "nullptr")
|
||||
).all()
|
||||
|
||||
|
||||
def validate_instruction_correlation_id_relation(df):
|
||||
# Samples with no decoded instructions originates from either
|
||||
# blit kernels or self modifying code. The correlation id for this
|
||||
# type of samples should alway be zero.
|
||||
# Thus, Correlation_Id is 0 `iff`` instruction is not decoded.
|
||||
|
||||
# The previous statement has two implications.
|
||||
# Implication 1: If the instruction is not decoded, then correlation id is 0.
|
||||
samples_no_instruction_df = df[
|
||||
(df["Instruction"] == "") | (df["Instruction"] == "nullptr")
|
||||
]
|
||||
assert (samples_no_instruction_df["Correlation_Id"] == 0).all()
|
||||
|
||||
# Implication 2: If the correlation id is 0, then the instruction is not decoded.
|
||||
samples_cid_zero_df = df[df["Correlation_Id"] == 0]
|
||||
assert (
|
||||
(samples_cid_zero_df["Instruction"] == "")
|
||||
| (samples_cid_zero_df["Instruction"] == "nullptr")
|
||||
).all()
|
||||
|
||||
assert len(samples_no_instruction_df) == len(samples_cid_zero_df)
|
||||
|
||||
# Since we're not enabling any kind of API tracing,
|
||||
# internal correlation id should match the dispatch id
|
||||
assert all(df["Correlation_Id"] == df["Dispatch_Id"])
|
||||
|
||||
|
||||
def validate_exec_mask_based_on_correlation_id(df):
|
||||
# The function assumes that each kernel launches 1024 blocks.
|
||||
# Each block contains number of threads that matches correlation ID of the kernel.
|
||||
# The exec mask of a sample should contain number of ones equal to
|
||||
# the correlation ID of the kernel during which execution the sample was generated.
|
||||
df["active_SIMD_threads"] = df["Exec_Mask"].apply(
|
||||
lambda exec_mask: bin(exec_mask).count("1")
|
||||
)
|
||||
assert (df["active_SIMD_threads"] == df["Correlation_Id"]).all()
|
||||
|
||||
# TODO: Comment out the following code if it causes spurious fails.
|
||||
# The more conservative constraint based on the experience follows.
|
||||
# The exec mask of sampled instructions of the kernels respect the following pattern:
|
||||
# cid -> exec
|
||||
# 1 -> 0b1
|
||||
# 2 -> 0b11
|
||||
# 3 -> 0b111
|
||||
# ...
|
||||
# 64 -> 0xffffffffffffffff
|
||||
|
||||
df["Exec_Mask2"] = (
|
||||
df["Correlation_Id"].astype(int).apply(lambda x: int("0b" + (x * "1"), 2))
|
||||
)
|
||||
|
||||
# TODO: exec should be in hex and that will ease the comparison
|
||||
assert (df["Exec_Mask"].astype(np.uint64) == df["Exec_Mask2"].astype(np.uint64)).all()
|
||||
|
||||
|
||||
def exec_mask_manipulation_validate_csv(df, all_sampled=False):
|
||||
assert not df.empty
|
||||
|
||||
validate_instruction_comment(df)
|
||||
validate_instruction_correlation_id_relation(df)
|
||||
|
||||
# Validate samples with non-zero correlation IDs (and with decoded instructions)
|
||||
samples_cid_non_zero_df = df[df["Correlation_Id"] != 0]
|
||||
|
||||
# exactly 65 kernels and 65 correlation id
|
||||
assert (samples_cid_non_zero_df["Correlation_Id"].astype(int) >= 1).all()
|
||||
assert (samples_cid_non_zero_df["Correlation_Id"].astype(int) <= 65).all()
|
||||
if all_sampled:
|
||||
# all correlation IDs must be sampled
|
||||
assert len(samples_cid_non_zero_df["Correlation_Id"].astype(int).unique()) == 65
|
||||
|
||||
first_64_kernels_df = samples_cid_non_zero_df[
|
||||
samples_cid_non_zero_df["Correlation_Id"] <= 64
|
||||
]
|
||||
|
||||
# Make a copy, so that we don't work (modify) a view.
|
||||
validate_exec_mask_based_on_correlation_id(first_64_kernels_df.copy())
|
||||
|
||||
# validate the last kernel
|
||||
kernel_65_df = df[df["Correlation_Id"] == 65]
|
||||
|
||||
# assert that v_rcp instructions are properly decoded
|
||||
# the v_rcp is executed by even SIMD threads
|
||||
validate_instruction_decoding(
|
||||
kernel_65_df,
|
||||
"v_rcp_f64",
|
||||
exec_mask_uint64=np.uint64(int("5555555555555555", 16)),
|
||||
source_code_lines_range=(288, 387),
|
||||
all_source_lines_samples=all_sampled,
|
||||
)
|
||||
|
||||
# assert that v_rcp_f32 instructions are properly decoded
|
||||
# the v_rcp_f32 is executed by odd SIMD threads
|
||||
validate_instruction_decoding(
|
||||
kernel_65_df,
|
||||
"v_rcp_f32",
|
||||
exec_mask_uint64=np.uint64(int("AAAAAAAAAAAAAAAA", 16)),
|
||||
source_code_lines_range=(391, 490),
|
||||
all_source_lines_samples=all_sampled,
|
||||
)
|
||||
|
||||
|
||||
def test_validate_pc_sampling_exec_mask_manipulation_csv(
|
||||
input_csv: pd.DataFrame, all_sampled: bool
|
||||
):
|
||||
from rocprofiler_sdk.pc_sampling.exec_mask_manipulation.csv import (
|
||||
exec_mask_manipulation_validate_csv,
|
||||
)
|
||||
|
||||
exec_mask_manipulation_validate_csv(input_csv, all_sampled=all_sampled)
|
||||
|
||||
|
||||
# ========================= Validating JSON output
|
||||
|
||||
|
||||
def validate_json_exec_mask_manipulation(data_json, all_sampled=False):
|
||||
# Although functional programming might look more elegant,
|
||||
# I was trying to avoid multiple iteration over the list of samples.
|
||||
# Thus, I decided to use procedural programming instead.
|
||||
# Although, it would be more elegant to wrap some of the checks in dedicated functions,
|
||||
# I noticed that it can introduce significant overhead, so I decided to inline those checks.
|
||||
|
||||
# the function assume homogenous system
|
||||
agents = data_json["agents"]
|
||||
gpu_agents = list(filter(lambda agent: agent["type"] == 2, agents))
|
||||
# There should be at least one GPU agent
|
||||
assert len(gpu_agents) > 0
|
||||
first_gpu_agent = gpu_agents[0]
|
||||
num_xcc = first_gpu_agent["num_xcc"]
|
||||
max_waves_per_simd = first_gpu_agent["max_waves_per_simd"]
|
||||
simd_per_cu = first_gpu_agent["simd_per_cu"]
|
||||
|
||||
instructions = data_json["strings"]["pc_sample_instructions"]
|
||||
comments = data_json["strings"]["pc_sample_comments"]
|
||||
|
||||
# execution mask where even SIMD lanes are active
|
||||
# correspond to the v_rcp_f64 instructions of the last kernel
|
||||
even_simds_active_exec_mask = np.uint64(int("5555555555555555", 16))
|
||||
# start and end source code lines of the v_rcp_f64 instructions of the last kernel
|
||||
v_rcp_f64_start_line_num, v_rcp_f64_end_line_num = 288, 387
|
||||
# execution mask where even SIMD lanes are active
|
||||
# correspond to the v_rcp_f64 instructions of the last kernel
|
||||
odd_simds_active_exec_mask = np.uint64(int("AAAAAAAAAAAAAAAA", 16))
|
||||
# start and end source code lines of the v_rcp_f32 0 instructions of the last kernel
|
||||
v_rcp_f32_start_line_num, v_rcp_f32_end_line_num = 391, 490
|
||||
|
||||
# sampled wave_ids of the last kernel
|
||||
kernel65_sampled_wave_in_grp = set()
|
||||
# sampled source lines of the last kernel matching v_rcp_f64 instructions
|
||||
kernel65_v_rcp_64_sampled_source_line_set = set()
|
||||
# sampled source lines of the last kernel matching v_rcp_f64 instructions
|
||||
kernel65_v_rcp_f32_sampled_source_line_set = set()
|
||||
# sampled correlation IDs
|
||||
sampled_cids_set = set()
|
||||
# pairs of sampled SIMD ids and waveslot IDs
|
||||
sampled_simd_waveslots_pairs = set()
|
||||
# sampled chiplets
|
||||
sampled_chiplets = set()
|
||||
# sample VMIDs
|
||||
sampled_vmids = set()
|
||||
|
||||
for sample in data_json["buffer_records"]["pc_sample_host_trap"]:
|
||||
record = sample["record"]
|
||||
cid = record["corr_id"]["internal"]
|
||||
|
||||
# pull information from hw_id
|
||||
hw_id = record["hw_id"]
|
||||
sampled_chiplets.add(hw_id["chiplet"])
|
||||
sampled_simd_waveslots_pairs.add((hw_id["simd_id"], hw_id["wave_id"]))
|
||||
sampled_vmids.add(hw_id["vm_id"])
|
||||
|
||||
# Checks specific for all samples
|
||||
|
||||
# cids must be non-negative numbers
|
||||
assert cid >= 0
|
||||
|
||||
inst_index = sample["inst_index"]
|
||||
|
||||
# Since we're not enabling any kind of API tracing, the internal correlation id should
|
||||
# be equal to the dispatch_id
|
||||
assert cid == record["dispatch_id"]
|
||||
|
||||
if cid == 0:
|
||||
# Samples originates either from a blit kernel or self-modifying code.
|
||||
# Thus, code object is uknown, as well as the instruction.
|
||||
assert record["pc"]["code_object_id"] == 0
|
||||
assert inst_index == -1
|
||||
else:
|
||||
# Update set of sampled cids
|
||||
sampled_cids_set.add(cid)
|
||||
|
||||
# All samples with non-zero correlation ID should pass the following checks
|
||||
# code object is know, so as the instruction
|
||||
assert record["pc"]["code_object_id"] != 0
|
||||
assert inst_index != -1
|
||||
|
||||
wgid = record["wrkgrp_id"]
|
||||
# check corrdinates of the workgroup
|
||||
assert wgid["x"] >= 0 and wgid["x"] <= 1023
|
||||
assert wgid["y"] == 0
|
||||
assert wgid["z"] == 0
|
||||
|
||||
wave_in_grp = record["wave_in_grp"]
|
||||
exec_mask = record["exec_mask"]
|
||||
|
||||
if cid < 65:
|
||||
# checks specific for samples from first 64 kernels
|
||||
assert wave_in_grp == 0
|
||||
# inline if possible
|
||||
# validate_json_exec_mask_based_on_cid(sample.record)
|
||||
|
||||
# The function assumes that each kernel launches 1024 blocks.
|
||||
# Each block contains number of threads that matches correlation ID of the kernel.
|
||||
# The exec mask of a sample should contain number of ones equal to
|
||||
# the correlation ID of the kernel during which execution the sample was generated.
|
||||
assert bin(exec_mask).count("1") == cid
|
||||
|
||||
# TODO: Comment out the following code if it causes spurious fails.
|
||||
# The more conservative constraint based on the experience follows.
|
||||
# The exec mask of sampled instructions of the kernels respect the following pattern:
|
||||
# cid -> exec
|
||||
# 1 -> 0b1
|
||||
# 2 -> 0b11
|
||||
# 3 -> 0b111
|
||||
# ...
|
||||
# 64 -> 0xffffffffffffffff
|
||||
exec_mask_str = "0b" + "1" * cid
|
||||
assert np.uint64(exec_mask) == np.uint64(int(exec_mask_str, 2))
|
||||
else:
|
||||
# No more that 65 cids
|
||||
assert cid == 65
|
||||
# Monitor wave_in_group being sampled
|
||||
kernel65_sampled_wave_in_grp.add(wave_in_grp)
|
||||
# chekcs specific for samples from the last kernel
|
||||
assert wave_in_grp >= 0 and wave_in_grp <= 3
|
||||
|
||||
# validate instruction decoding
|
||||
inst = instructions[inst_index]
|
||||
comm = comments[inst_index]
|
||||
# The instruction comment is isually in the following format:
|
||||
# /path/to/source/file.cpp:line_num
|
||||
line_num = int(comm.split(":")[-1])
|
||||
if inst.startswith("v_rcp_f64"):
|
||||
# even SIMD lanes active
|
||||
assert np.uint64(exec_mask) == even_simds_active_exec_mask
|
||||
assert (
|
||||
line_num >= v_rcp_f64_start_line_num
|
||||
and line_num <= v_rcp_f64_end_line_num
|
||||
)
|
||||
kernel65_v_rcp_64_sampled_source_line_set.add(line_num)
|
||||
elif inst.startswith("v_rcp_f32"):
|
||||
# odd SIMD lanes active
|
||||
assert np.uint64(exec_mask) == odd_simds_active_exec_mask
|
||||
assert (
|
||||
line_num >= v_rcp_f32_start_line_num
|
||||
and line_num <= v_rcp_f32_end_line_num
|
||||
)
|
||||
kernel65_v_rcp_f32_sampled_source_line_set.add(line_num)
|
||||
|
||||
if all_sampled:
|
||||
# All cids that belongs to the range [1, 65] should be samples
|
||||
assert len(sampled_cids_set) == 65
|
||||
|
||||
# all wave_ids that belongs to the range [0, 3] should be sampled for the last kernel
|
||||
assert len(kernel65_sampled_wave_in_grp) == 4
|
||||
|
||||
# all source lines matches v_rcp_f64 instructions of the last kernel should be sampled
|
||||
assert len(kernel65_v_rcp_64_sampled_source_line_set) == (
|
||||
v_rcp_f64_end_line_num - v_rcp_f64_start_line_num + 1
|
||||
)
|
||||
# all source lines matches v_rcp_f32 instructions of the last kernel should be sampled
|
||||
assert len(kernel65_v_rcp_f32_sampled_source_line_set) == (
|
||||
v_rcp_f32_end_line_num - v_rcp_f32_start_line_num + 1
|
||||
)
|
||||
|
||||
# all chiplets must be sampled
|
||||
assert len(sampled_chiplets) == num_xcc
|
||||
# all (simd ID, waveslot ID) pairs must be samples
|
||||
assert len(sampled_simd_waveslots_pairs) == simd_per_cu * max_waves_per_simd
|
||||
|
||||
# assert chiplet index
|
||||
assert all(map(lambda chiplet: 0 <= chiplet < num_xcc, sampled_chiplets))
|
||||
# assert (SIMD ID, waveslot ID) combinations
|
||||
assert all(
|
||||
map(
|
||||
lambda simd_waveslot: (0 <= simd_waveslot[0] < simd_per_cu)
|
||||
and (0 <= simd_waveslot[1] < max_waves_per_simd),
|
||||
sampled_simd_waveslots_pairs,
|
||||
)
|
||||
)
|
||||
|
||||
# Apparently, not all dispatches must belong to the same VMID,
|
||||
# so I'm temporarily disabling the following check.
|
||||
# # all samples should belong to the same VMID
|
||||
# assert len(sampled_vmids) == 1
|
||||
|
||||
|
||||
def test_validate_pc_sampling_exec_mask_manipulation_json(
|
||||
input_json, input_csv: pd.DataFrame, all_sampled: bool
|
||||
):
|
||||
@@ -393,7 +52,13 @@ def test_validate_pc_sampling_exec_mask_manipulation_json(
|
||||
# The same amount of samples should be in both CSV and JSON files.
|
||||
assert len(input_csv) == len(data["buffer_records"]["pc_sample_host_trap"])
|
||||
# # validating JSON output
|
||||
validate_json_exec_mask_manipulation(data, all_sampled=all_sampled)
|
||||
from rocprofiler_sdk.pc_sampling.exec_mask_manipulation.json import (
|
||||
validate_json_exec_mask_manipulation,
|
||||
)
|
||||
|
||||
validate_json_exec_mask_manipulation(
|
||||
data, pc_sampling_method="host_trap", all_sampled=all_sampled
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
+5
-49
@@ -34,57 +34,13 @@ def test_multi_agent_support(
|
||||
input_kernel_trace_csv: pd.DataFrame,
|
||||
input_agent_info_csv: pd.DataFrame,
|
||||
):
|
||||
transpose_kernel_source_line_start = 137
|
||||
transpose_kernel_source_line_end = 145
|
||||
|
||||
mi2xx_mi3xx_agents_df = input_agent_info_csv[
|
||||
input_agent_info_csv["Name"].apply(
|
||||
lambda name: name == "gfx90a"
|
||||
or name.startswith("gfx94")
|
||||
or name.startswith("gfx95")
|
||||
)
|
||||
]
|
||||
|
||||
# Extract samples that originates from know code object it
|
||||
samples_df = input_samples_csv[input_samples_csv["Dispatch_Id"] != 0].copy()
|
||||
|
||||
# Determine the agent on which sample was generated
|
||||
samples_df["Agent_Id"] = (
|
||||
samples_df["Dispatch_Id"]
|
||||
.map(
|
||||
input_kernel_trace_csv.set_index("Dispatch_Id")["Agent_Id"]
|
||||
.str.split(" ")
|
||||
.str[1]
|
||||
)
|
||||
.astype(np.uint64)
|
||||
from rocprofiler_sdk.pc_sampling.transpose_multiple_agents.csv import (
|
||||
validate_all_agents_are_sampled,
|
||||
)
|
||||
sampled_agents = samples_df["Agent_Id"].unique()
|
||||
sampled_agents_num = len(sampled_agents)
|
||||
# all agents must be sampled
|
||||
assert sampled_agents_num == len(mi2xx_mi3xx_agents_df)
|
||||
|
||||
# separate samples per agents
|
||||
grouped_samples_per_agent = samples_df.groupby("Agent_Id")
|
||||
for agent_id, agent_samples_df in grouped_samples_per_agent:
|
||||
sampled_dispatches = agent_samples_df["Dispatch_Id"].unique()
|
||||
# at least 1 sampled dispatch per agent
|
||||
assert len(sampled_dispatches) >= 1
|
||||
|
||||
# extract decoded samples that are mapped to the transpose.cpp file
|
||||
transpose_samples_df = samples_df[
|
||||
samples_df["Instruction_Comment"].apply(
|
||||
lambda comment: "transpose-all-agents.cpp" in comment
|
||||
)
|
||||
].copy()
|
||||
# determine the line number for each sample
|
||||
transpose_samples_df["Source_Line_Num"] = transpose_samples_df[
|
||||
"Instruction_Comment"
|
||||
].apply(lambda source_line: int(source_line.split(":")[-1]))
|
||||
# assert that line belongs to a kernel range
|
||||
assert (
|
||||
(transpose_samples_df["Source_Line_Num"] >= transpose_kernel_source_line_start)
|
||||
& (transpose_samples_df["Source_Line_Num"] <= transpose_kernel_source_line_end)
|
||||
).all()
|
||||
validate_all_agents_are_sampled(
|
||||
input_samples_csv, input_kernel_trace_csv, input_agent_info_csv
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -0,0 +1,6 @@
|
||||
#
|
||||
# PC sampling tests
|
||||
#
|
||||
|
||||
add_subdirectory(exec-mask-manipulation)
|
||||
add_subdirectory(transpose-multiple-agents)
|
||||
+153
@@ -0,0 +1,153 @@
|
||||
#
|
||||
# rocprofv3 tool test
|
||||
#
|
||||
cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR)
|
||||
|
||||
project(
|
||||
rocprofiler-tests-pc-sampling
|
||||
LANGUAGES CXX
|
||||
VERSION 0.0.0)
|
||||
|
||||
find_package(rocprofiler-sdk REQUIRED)
|
||||
|
||||
rocprofiler_configure_pytest_files(CONFIG pytest.ini COPY validate.py conftest.py
|
||||
input.json input.yml)
|
||||
|
||||
add_test(
|
||||
NAME rocprofv3-test-pc-sampling-stochastic-exec-mask-manipulation-input-cmd-execute
|
||||
COMMAND
|
||||
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> --pc-sampling-unit cycles
|
||||
--pc-sampling-method stochastic --pc-sampling-interval 1048576 -d
|
||||
${CMAKE_CURRENT_BINARY_DIR}/pc_sampling_cmd_input -o out --output-format csv json
|
||||
-- $<TARGET_FILE:exec-mask-manipulation>)
|
||||
|
||||
string(REPLACE "LD_PRELOAD=" "ROCPROF_PRELOAD=" PRELOAD_ENV
|
||||
"${ROCPROFILER_MEMCHECK_PRELOAD_ENV}")
|
||||
|
||||
set(pc-sampling-env-stochastic-exec-mask-manipulation "${PRELOAD_ENV}")
|
||||
|
||||
set_tests_properties(
|
||||
rocprofv3-test-pc-sampling-stochastic-exec-mask-manipulation-input-cmd-execute
|
||||
PROPERTIES TIMEOUT
|
||||
45
|
||||
LABELS
|
||||
"integration-tests;pc-sampling;stochastic"
|
||||
ENVIRONMENT
|
||||
"${pc-sampling-env-stochastic-exec-mask-manipulation}"
|
||||
FAIL_REGULAR_EXPRESSION
|
||||
"${ROCPROFILER_DEFAULT_FAIL_REGEX}"
|
||||
SKIP_REGULAR_EXPRESSION
|
||||
"PC sampling unavailable")
|
||||
|
||||
add_test(
|
||||
NAME rocprofv3-test-pc-sampling-stochastic-exec-mask-manipulation-input-json-execute
|
||||
COMMAND
|
||||
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> -i
|
||||
${CMAKE_CURRENT_BINARY_DIR}/input.json -d
|
||||
${CMAKE_CURRENT_BINARY_DIR}/pc_sampling_json_input --
|
||||
$<TARGET_FILE:exec-mask-manipulation>)
|
||||
|
||||
set_tests_properties(
|
||||
rocprofv3-test-pc-sampling-stochastic-exec-mask-manipulation-input-json-execute
|
||||
PROPERTIES TIMEOUT
|
||||
45
|
||||
LABELS
|
||||
"integration-tests;pc-sampling;stochastic"
|
||||
ENVIRONMENT
|
||||
"${pc-sampling-env-stochastic-exec-mask-manipulation}"
|
||||
FAIL_REGULAR_EXPRESSION
|
||||
"${ROCPROFILER_DEFAULT_FAIL_REGEX}"
|
||||
SKIP_REGULAR_EXPRESSION
|
||||
"PC sampling unavailable")
|
||||
|
||||
add_test(
|
||||
NAME rocprofv3-test-pc-sampling-stochastic-exec-mask-manipulation-input-yaml-execute
|
||||
COMMAND
|
||||
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> -i
|
||||
${CMAKE_CURRENT_BINARY_DIR}/input.yml -d
|
||||
${CMAKE_CURRENT_BINARY_DIR}/pc_sampling_yaml_input --
|
||||
$<TARGET_FILE:exec-mask-manipulation>)
|
||||
|
||||
set_tests_properties(
|
||||
rocprofv3-test-pc-sampling-stochastic-exec-mask-manipulation-input-yaml-execute
|
||||
PROPERTIES TIMEOUT
|
||||
45
|
||||
LABELS
|
||||
"integration-tests;pc-sampling;stochastic"
|
||||
ENVIRONMENT
|
||||
"${pc-sampling-env-stochastic-exec-mask-manipulation}"
|
||||
FAIL_REGULAR_EXPRESSION
|
||||
"${ROCPROFILER_DEFAULT_FAIL_REGEX}"
|
||||
SKIP_REGULAR_EXPRESSION
|
||||
"PC sampling unavailable")
|
||||
|
||||
# ========================= Validation tests
|
||||
|
||||
add_test(
|
||||
NAME rocprofv3-test-pc-sampling-stochastic-exec-mask-manipulation-input-cmd-validate
|
||||
COMMAND
|
||||
${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/validate.py -k
|
||||
test_validate_pc_sampling_ --input-csv
|
||||
${CMAKE_CURRENT_BINARY_DIR}/pc_sampling_cmd_input/out_pc_sampling_stochastic.csv
|
||||
--input-json ${CMAKE_CURRENT_BINARY_DIR}/pc_sampling_cmd_input/out_results.json
|
||||
--all-sampled False)
|
||||
|
||||
set_tests_properties(
|
||||
rocprofv3-test-pc-sampling-stochastic-exec-mask-manipulation-input-cmd-validate
|
||||
PROPERTIES
|
||||
TIMEOUT
|
||||
60
|
||||
LABELS
|
||||
"integration-tests;pc-sampling;stochastic"
|
||||
DEPENDS
|
||||
"rocprofv3-test-pc-sampling-stochastic-exec-mask-manipulation-input-cmd-execute"
|
||||
FAIL_REGULAR_EXPRESSION
|
||||
"${ROCPROFILER_DEFAULT_FAIL_REGEX}"
|
||||
SKIP_REGULAR_EXPRESSION
|
||||
"PC sampling unavailable")
|
||||
|
||||
add_test(
|
||||
NAME rocprofv3-test-pc-sampling-stochastic-exec-mask-manipulation-input-json-validate
|
||||
COMMAND
|
||||
${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/validate.py -k
|
||||
test_validate_pc_sampling_ --input-csv
|
||||
${CMAKE_CURRENT_BINARY_DIR}/pc_sampling_json_input/out_pc_sampling_stochastic.csv
|
||||
--input-json ${CMAKE_CURRENT_BINARY_DIR}/pc_sampling_json_input/out_results.json
|
||||
--all-sampled False)
|
||||
|
||||
set_tests_properties(
|
||||
rocprofv3-test-pc-sampling-stochastic-exec-mask-manipulation-input-json-validate
|
||||
PROPERTIES
|
||||
TIMEOUT
|
||||
60
|
||||
LABELS
|
||||
"integration-tests;pc-sampling;stochastic"
|
||||
DEPENDS
|
||||
"rocprofv3-test-pc-sampling-stochastic-exec-mask-manipulation-input-json-execute"
|
||||
FAIL_REGULAR_EXPRESSION
|
||||
"${ROCPROFILER_DEFAULT_FAIL_REGEX}"
|
||||
SKIP_REGULAR_EXPRESSION
|
||||
"PC sampling unavailable")
|
||||
|
||||
add_test(
|
||||
NAME rocprofv3-test-pc-sampling-stochastic-exec-mask-manipulation-input-yaml-validate
|
||||
COMMAND
|
||||
${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/validate.py -k
|
||||
test_validate_pc_sampling_ --input-csv
|
||||
${CMAKE_CURRENT_BINARY_DIR}/pc_sampling_yaml_input/out_pc_sampling_stochastic.csv
|
||||
--input-json ${CMAKE_CURRENT_BINARY_DIR}/pc_sampling_yaml_input/out_results.json
|
||||
--all-sampled False)
|
||||
|
||||
set_tests_properties(
|
||||
rocprofv3-test-pc-sampling-stochastic-exec-mask-manipulation-input-yaml-validate
|
||||
PROPERTIES
|
||||
TIMEOUT
|
||||
60
|
||||
LABELS
|
||||
"integration-tests;pc-sampling;stochastic"
|
||||
DEPENDS
|
||||
"rocprofv3-test-pc-sampling-stochastic-exec-mask-manipulation-input-yaml-execute"
|
||||
FAIL_REGULAR_EXPRESSION
|
||||
"${ROCPROFILER_DEFAULT_FAIL_REGEX}"
|
||||
SKIP_REGULAR_EXPRESSION
|
||||
"PC sampling unavailable")
|
||||
+70
@@ -0,0 +1,70 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import json
|
||||
import os
|
||||
import pytest
|
||||
import pandas as pd
|
||||
|
||||
from rocprofiler_sdk.pytest_utils.dotdict import dotdict
|
||||
from rocprofiler_sdk.pytest_utils import collapse_dict_list
|
||||
|
||||
|
||||
def pytest_addoption(parser):
|
||||
parser.addoption(
|
||||
"--input-csv",
|
||||
action="store",
|
||||
help="Path to CSV file.",
|
||||
)
|
||||
|
||||
parser.addoption(
|
||||
"--input-json",
|
||||
action="store",
|
||||
help="Path to CSV file.",
|
||||
)
|
||||
|
||||
parser.addoption(
|
||||
"--all-sampled",
|
||||
action="store",
|
||||
help="All SW and HW units must be sampled.",
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def input_csv(request):
|
||||
filename = request.config.getoption("--input-csv")
|
||||
if not os.path.isfile(filename):
|
||||
# The CSV file is not generated, because the dependency test
|
||||
# responsible to generate this file was skipped or failed.
|
||||
# Thus emit the message to skip this test as well.
|
||||
print("PC sampling unavailable")
|
||||
else:
|
||||
with open(filename, "r") as inp:
|
||||
return pd.read_csv(
|
||||
inp,
|
||||
na_filter=False, # parse empty fields as ""
|
||||
keep_default_na=False, # parse empty fields as ""
|
||||
dtype={
|
||||
"Exec_Mask": "uint64",
|
||||
"Instruction": str,
|
||||
"Instruction_Comment": str,
|
||||
"Wave_Issued_Instruction": bool,
|
||||
"Instruction_Type": str,
|
||||
"Stall_Reason": str,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def input_json(request):
|
||||
filename = request.config.getoption("--input-json")
|
||||
with open(filename, "r") as inp:
|
||||
# Significant overhead of 5-6secs observed when feeding
|
||||
# data into the dotdict.
|
||||
# Using plain python dict instead
|
||||
return collapse_dict_list(json.load(inp))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def all_sampled(request):
|
||||
_all_sampled_str = request.config.getoption("--all-sampled")
|
||||
return _all_sampled_str == "True"
|
||||
+14
@@ -0,0 +1,14 @@
|
||||
{
|
||||
"jobs": [
|
||||
{
|
||||
"pc_sampling_unit": "cycles",
|
||||
"pc_sampling_method": "stochastic",
|
||||
"pc_sampling_interval": 1048576,
|
||||
"output_file": "out",
|
||||
"output_format": [
|
||||
"csv",
|
||||
"json"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
+8
@@ -0,0 +1,8 @@
|
||||
jobs:
|
||||
- pc_sampling_unit: "cycles"
|
||||
pc_sampling_method: "stochastic"
|
||||
pc_sampling_interval: 1048576
|
||||
output_file: "out"
|
||||
output_format:
|
||||
- "csv"
|
||||
- "json"
|
||||
+5
@@ -0,0 +1,5 @@
|
||||
|
||||
[pytest]
|
||||
addopts = --durations=20 -rA -s -vv
|
||||
testpaths = validate.py
|
||||
pythonpath = @ROCPROFILER_SDK_TESTS_BINARY_DIR@/pytest-packages
|
||||
+62
@@ -0,0 +1,62 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
import pytest
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
|
||||
# =========================== Validating fields common for both host-trap and stochastic CSV output
|
||||
|
||||
|
||||
def test_validate_pc_sampling_exec_mask_manipulation_csv(
|
||||
input_csv: pd.DataFrame, all_sampled: bool
|
||||
):
|
||||
from rocprofiler_sdk.pc_sampling.exec_mask_manipulation.csv import (
|
||||
exec_mask_manipulation_validate_csv,
|
||||
)
|
||||
|
||||
exec_mask_manipulation_validate_csv(input_csv, all_sampled=all_sampled)
|
||||
|
||||
|
||||
# # ========================= Validating fields common for both host-trap and stochastic JSON output
|
||||
|
||||
|
||||
def test_validate_pc_sampling_exec_mask_manipulation_json(
|
||||
input_json, input_csv: pd.DataFrame, all_sampled: bool
|
||||
):
|
||||
data = input_json["rocprofiler-sdk-tool"]
|
||||
# The same amount of samples should be in both CSV and JSON files.
|
||||
assert len(input_csv) == len(data["buffer_records"]["pc_sample_stochastic"])
|
||||
# # validating JSON output
|
||||
from rocprofiler_sdk.pc_sampling.exec_mask_manipulation.json import (
|
||||
validate_json_exec_mask_manipulation,
|
||||
)
|
||||
|
||||
validate_json_exec_mask_manipulation(
|
||||
data, pc_sampling_method="stochastic", all_sampled=all_sampled
|
||||
)
|
||||
|
||||
|
||||
# ======================== Validating fields specific for stochastic sampling
|
||||
|
||||
|
||||
def test_validate_pc_sampling_stochastic_specific_csv(input_csv: pd.DataFrame):
|
||||
from rocprofiler_sdk.pc_sampling.stochastic.csv.gfx9 import (
|
||||
validate_stochastic_samples_csv,
|
||||
)
|
||||
|
||||
validate_stochastic_samples_csv(input_csv)
|
||||
|
||||
|
||||
def test_validate_pc_sampling_stochastic_specific_json(input_json):
|
||||
from rocprofiler_sdk.pc_sampling.stochastic.json.gfx9 import (
|
||||
validate_stochastic_samples_json,
|
||||
)
|
||||
|
||||
validate_stochastic_samples_json(input_json["rocprofiler-sdk-tool"])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
exit_code = pytest.main(["-x", __file__] + sys.argv[1:])
|
||||
sys.exit(exit_code)
|
||||
+171
@@ -0,0 +1,171 @@
|
||||
#
|
||||
# rocprofv3 tool test
|
||||
#
|
||||
cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR)
|
||||
|
||||
project(
|
||||
rocprofiler-tests-pc-sampling
|
||||
LANGUAGES CXX
|
||||
VERSION 0.0.0)
|
||||
|
||||
find_package(rocprofiler-sdk REQUIRED)
|
||||
|
||||
rocprofiler_configure_pytest_files(CONFIG pytest.ini COPY validate.py conftest.py
|
||||
input.json input.yml)
|
||||
|
||||
# To ensure we sample all agents, use 16 threads each assigned to one agent in a round
|
||||
# robin manner. To keep the job per device reasonable, each thread offloads 100 instances
|
||||
# of a transpose kernel.
|
||||
set(TRANSPOSE_NUM_THREADS 16)
|
||||
set(TRANSPOSE_NUM_ITERATIONS 100)
|
||||
|
||||
add_test(
|
||||
NAME rocprofv3-test-pc-sampling-stochastic-transpose-multiple-agents-input-cmd-execute
|
||||
COMMAND
|
||||
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> --kernel-trace --pc-sampling-unit
|
||||
cycles --pc-sampling-method stochastic --pc-sampling-interval 1048576 -d
|
||||
${CMAKE_CURRENT_BINARY_DIR}/pc_sampling_cmd_input -o out --output-format csv json
|
||||
-- $<TARGET_FILE:transpose> ${TRANSPOSE_NUM_THREADS} ${TRANSPOSE_NUM_ITERATIONS})
|
||||
|
||||
string(REPLACE "LD_PRELOAD=" "ROCPROF_PRELOAD=" PRELOAD_ENV
|
||||
"${ROCPROFILER_MEMCHECK_PRELOAD_ENV}")
|
||||
|
||||
set(pc-sampling-env-stochastic-transpose-multiple-agents "${PRELOAD_ENV}")
|
||||
|
||||
set_tests_properties(
|
||||
rocprofv3-test-pc-sampling-stochastic-transpose-multiple-agents-input-cmd-execute
|
||||
PROPERTIES TIMEOUT
|
||||
45
|
||||
LABELS
|
||||
"integration-tests;pc-sampling;stochastic"
|
||||
ENVIRONMENT
|
||||
"${pc-sampling-env-stochastic-transpose-multiple-agents}"
|
||||
FAIL_REGULAR_EXPRESSION
|
||||
"${ROCPROFILER_DEFAULT_FAIL_REGEX}"
|
||||
SKIP_REGULAR_EXPRESSION
|
||||
"PC sampling unavailable")
|
||||
|
||||
add_test(
|
||||
NAME rocprofv3-test-pc-sampling-stochastic-transpose-multiple-agents-input-json-execute
|
||||
COMMAND
|
||||
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> -i
|
||||
${CMAKE_CURRENT_BINARY_DIR}/input.json -d
|
||||
${CMAKE_CURRENT_BINARY_DIR}/pc_sampling_json_input -- $<TARGET_FILE:transpose>
|
||||
${TRANSPOSE_NUM_THREADS} ${TRANSPOSE_NUM_ITERATIONS})
|
||||
|
||||
set_tests_properties(
|
||||
rocprofv3-test-pc-sampling-stochastic-transpose-multiple-agents-input-json-execute
|
||||
PROPERTIES TIMEOUT
|
||||
45
|
||||
LABELS
|
||||
"integration-tests;pc-sampling;stochastic"
|
||||
ENVIRONMENT
|
||||
"${pc-sampling-env-stochastic-transpose-multiple-agents}"
|
||||
FAIL_REGULAR_EXPRESSION
|
||||
"${ROCPROFILER_DEFAULT_FAIL_REGEX}"
|
||||
SKIP_REGULAR_EXPRESSION
|
||||
"PC sampling unavailable")
|
||||
|
||||
add_test(
|
||||
NAME rocprofv3-test-pc-sampling-stochastic-transpose-multiple-agents-input-yaml-execute
|
||||
COMMAND
|
||||
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> -i
|
||||
${CMAKE_CURRENT_BINARY_DIR}/input.yml -d
|
||||
${CMAKE_CURRENT_BINARY_DIR}/pc_sampling_yaml_input -- $<TARGET_FILE:transpose>
|
||||
${TRANSPOSE_NUM_THREADS} ${TRANSPOSE_NUM_ITERATIONS})
|
||||
|
||||
set_tests_properties(
|
||||
rocprofv3-test-pc-sampling-stochastic-transpose-multiple-agents-input-yaml-execute
|
||||
PROPERTIES TIMEOUT
|
||||
45
|
||||
LABELS
|
||||
"integration-tests;pc-sampling;stochastic"
|
||||
ENVIRONMENT
|
||||
"${pc-sampling-env-stochastic-transpose-multiple-agents}"
|
||||
FAIL_REGULAR_EXPRESSION
|
||||
"${ROCPROFILER_DEFAULT_FAIL_REGEX}"
|
||||
SKIP_REGULAR_EXPRESSION
|
||||
"PC sampling unavailable")
|
||||
|
||||
# ========================= Validation tests
|
||||
|
||||
add_test(
|
||||
NAME rocprofv3-test-pc-sampling-stochastic-transpose-multiple-agents-input-cmd-validate
|
||||
COMMAND
|
||||
${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/validate.py -k test_
|
||||
--input-samples-csv
|
||||
${CMAKE_CURRENT_BINARY_DIR}/pc_sampling_cmd_input/out_pc_sampling_stochastic.csv
|
||||
--input-samples-json
|
||||
${CMAKE_CURRENT_BINARY_DIR}/pc_sampling_cmd_input/out_results.json
|
||||
--input-kernel-trace-csv
|
||||
${CMAKE_CURRENT_BINARY_DIR}/pc_sampling_cmd_input/out_kernel_trace.csv
|
||||
--input-agent-info-csv
|
||||
${CMAKE_CURRENT_BINARY_DIR}/pc_sampling_cmd_input/out_agent_info.csv)
|
||||
|
||||
set_tests_properties(
|
||||
rocprofv3-test-pc-sampling-stochastic-transpose-multiple-agents-input-cmd-validate
|
||||
PROPERTIES
|
||||
TIMEOUT
|
||||
60
|
||||
LABELS
|
||||
"integration-tests;pc-sampling;stochastic"
|
||||
DEPENDS
|
||||
"rocprofv3-test-pc-sampling-stochastic-transpose-multiple-agents-input-cmd-execute"
|
||||
FAIL_REGULAR_EXPRESSION
|
||||
"${ROCPROFILER_DEFAULT_FAIL_REGEX}"
|
||||
SKIP_REGULAR_EXPRESSION
|
||||
"PC sampling unavailable")
|
||||
|
||||
add_test(
|
||||
NAME rocprofv3-test-pc-sampling-stochastic-transpose-multiple-agents-input-json-validate
|
||||
COMMAND
|
||||
${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/validate.py -k test_
|
||||
--input-samples-csv
|
||||
${CMAKE_CURRENT_BINARY_DIR}/pc_sampling_json_input/out_pc_sampling_stochastic.csv
|
||||
--input-samples-json
|
||||
${CMAKE_CURRENT_BINARY_DIR}/pc_sampling_json_input/out_results.json
|
||||
--input-kernel-trace-csv
|
||||
${CMAKE_CURRENT_BINARY_DIR}/pc_sampling_json_input/out_kernel_trace.csv
|
||||
--input-agent-info-csv
|
||||
${CMAKE_CURRENT_BINARY_DIR}/pc_sampling_json_input/out_agent_info.csv)
|
||||
|
||||
set_tests_properties(
|
||||
rocprofv3-test-pc-sampling-stochastic-transpose-multiple-agents-input-json-validate
|
||||
PROPERTIES
|
||||
TIMEOUT
|
||||
60
|
||||
LABELS
|
||||
"integration-tests;pc-sampling;stochastic"
|
||||
DEPENDS
|
||||
"rocprofv3-test-pc-sampling-stochastic-transpose-multiple-agents-input-json-execute"
|
||||
FAIL_REGULAR_EXPRESSION
|
||||
"${ROCPROFILER_DEFAULT_FAIL_REGEX}"
|
||||
SKIP_REGULAR_EXPRESSION
|
||||
"PC sampling unavailable")
|
||||
|
||||
add_test(
|
||||
NAME rocprofv3-test-pc-sampling-stochastic-transpose-multiple-agents-input-yaml-validate
|
||||
COMMAND
|
||||
${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/validate.py -k test_
|
||||
--input-samples-csv
|
||||
${CMAKE_CURRENT_BINARY_DIR}/pc_sampling_yaml_input/out_pc_sampling_stochastic.csv
|
||||
--input-samples-json
|
||||
${CMAKE_CURRENT_BINARY_DIR}/pc_sampling_json_input/out_results.json
|
||||
--input-kernel-trace-csv
|
||||
${CMAKE_CURRENT_BINARY_DIR}/pc_sampling_yaml_input/out_kernel_trace.csv
|
||||
--input-agent-info-csv
|
||||
${CMAKE_CURRENT_BINARY_DIR}/pc_sampling_yaml_input/out_agent_info.csv)
|
||||
|
||||
set_tests_properties(
|
||||
rocprofv3-test-pc-sampling-stochastic-transpose-multiple-agents-input-yaml-validate
|
||||
PROPERTIES
|
||||
TIMEOUT
|
||||
60
|
||||
LABELS
|
||||
"integration-tests;pc-sampling;stochastic"
|
||||
DEPENDS
|
||||
"rocprofv3-test-pc-sampling-stochastic-transpose-multiple-agents-input-yaml-execute"
|
||||
FAIL_REGULAR_EXPRESSION
|
||||
"${ROCPROFILER_DEFAULT_FAIL_REGEX}"
|
||||
SKIP_REGULAR_EXPRESSION
|
||||
"PC sampling unavailable")
|
||||
+84
@@ -0,0 +1,84 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import json
|
||||
import os
|
||||
import pytest
|
||||
import pandas as pd
|
||||
|
||||
from rocprofiler_sdk.pytest_utils.dotdict import dotdict
|
||||
from rocprofiler_sdk.pytest_utils import collapse_dict_list
|
||||
|
||||
|
||||
def pytest_addoption(parser):
|
||||
parser.addoption(
|
||||
"--input-samples-csv",
|
||||
action="store",
|
||||
help="Path to CSV file containing PC samples.",
|
||||
)
|
||||
|
||||
parser.addoption(
|
||||
"--input-kernel-trace-csv",
|
||||
action="store",
|
||||
help="Path to CSV file containing kernel trace.",
|
||||
)
|
||||
|
||||
parser.addoption(
|
||||
"--input-agent-info-csv",
|
||||
action="store",
|
||||
help="Path to CSV file containing agents information.",
|
||||
)
|
||||
|
||||
parser.addoption(
|
||||
"--input-samples-json",
|
||||
action="store",
|
||||
help="Path to JSON file containing PC samples.",
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def input_samples_csv(request):
|
||||
filename = request.config.getoption("--input-samples-csv")
|
||||
if not os.path.isfile(filename):
|
||||
# The CSV file is not generated, because the dependency test
|
||||
# responsible to generate this file was skipped or failed.
|
||||
# Thus emit the message to skip this test as well.
|
||||
print("PC sampling unavailable")
|
||||
else:
|
||||
with open(filename, "r") as inp:
|
||||
return pd.read_csv(
|
||||
inp,
|
||||
na_filter=False, # parse empty fields as ""
|
||||
keep_default_na=False, # parse empty fields as ""
|
||||
dtype={
|
||||
"Exec_Mask": "uint64",
|
||||
"Instruction": str,
|
||||
"Instruction_Comment": str,
|
||||
"Wave_Issued_Instruction": bool,
|
||||
"Instruction_Type": str,
|
||||
"Stall_Reason": str,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def input_kernel_trace_csv(request):
|
||||
filename = request.config.getoption("--input-kernel-trace-csv")
|
||||
with open(filename, "r") as inp:
|
||||
return pd.read_csv(inp)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def input_agent_info_csv(request):
|
||||
filename = request.config.getoption("--input-agent-info-csv")
|
||||
with open(filename, "r") as inp:
|
||||
return pd.read_csv(inp)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def input_samples_json(request):
|
||||
filename = request.config.getoption("--input-samples-json")
|
||||
with open(filename, "r") as inp:
|
||||
# Significant overhead of 5-6secs observed when feeding
|
||||
# data into the dotdict.
|
||||
# Using plain python dict instead
|
||||
return collapse_dict_list(json.load(inp))
|
||||
+15
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"jobs": [
|
||||
{
|
||||
"kernel_trace": true,
|
||||
"pc_sampling_unit": "cycles",
|
||||
"pc_sampling_method": "stochastic",
|
||||
"pc_sampling_interval": 1048576,
|
||||
"output_file": "out",
|
||||
"output_format": [
|
||||
"csv",
|
||||
"json"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
+9
@@ -0,0 +1,9 @@
|
||||
jobs:
|
||||
- kernel_trace: true
|
||||
pc_sampling_unit: "cycles"
|
||||
pc_sampling_method: "stochastic"
|
||||
pc_sampling_interval: 1048576
|
||||
output_file: "out"
|
||||
output_format:
|
||||
- "csv"
|
||||
- "json"
|
||||
+5
@@ -0,0 +1,5 @@
|
||||
|
||||
[pytest]
|
||||
addopts = --durations=20 -rA -s -vv
|
||||
testpaths = validate.py
|
||||
pythonpath = @ROCPROFILER_SDK_TESTS_BINARY_DIR@/pytest-packages
|
||||
+46
@@ -0,0 +1,46 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import itertools
|
||||
import sys
|
||||
import pytest
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
|
||||
# ===================== validation common for both host-trap and stochastic sampling
|
||||
def test_multi_agent_support(
|
||||
input_samples_csv: pd.DataFrame,
|
||||
input_kernel_trace_csv: pd.DataFrame,
|
||||
input_agent_info_csv: pd.DataFrame,
|
||||
):
|
||||
from rocprofiler_sdk.pc_sampling.transpose_multiple_agents.csv import (
|
||||
validate_all_agents_are_sampled,
|
||||
)
|
||||
|
||||
validate_all_agents_are_sampled(
|
||||
input_samples_csv, input_kernel_trace_csv, input_agent_info_csv
|
||||
)
|
||||
|
||||
|
||||
# =================== validation specific to stochastic sampling
|
||||
|
||||
|
||||
def test_validate_pc_sampling_stochastic_specific_csv(input_samples_csv: pd.DataFrame):
|
||||
from rocprofiler_sdk.pc_sampling.stochastic.csv.gfx9 import (
|
||||
validate_stochastic_samples_csv,
|
||||
)
|
||||
|
||||
validate_stochastic_samples_csv(input_samples_csv)
|
||||
|
||||
|
||||
def test_validate_pc_sampling_stochastic_specific_json(input_samples_json):
|
||||
from rocprofiler_sdk.pc_sampling.stochastic.json.gfx9 import (
|
||||
validate_stochastic_samples_json,
|
||||
)
|
||||
|
||||
validate_stochastic_samples_json(input_samples_json["rocprofiler-sdk-tool"])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
exit_code = pytest.main(["-x", __file__] + sys.argv[1:])
|
||||
sys.exit(exit_code)
|
||||
Reference in New Issue
Block a user