Adding agent-index (#189)

* Adding agent-index

* review changes

* review comments addressed

* minor fix

* fix CI failure

* review comments

* Fix agent index test and address review comments

* Build Fixes

---------

Co-authored-by: Benjamin Welton <bewelton@amd.com>
This commit is contained in:
Nagaraj, Sriraksha
2025-03-14 02:51:32 -05:00
committed by GitHub
szülő 2fe63d873e
commit c30bb7cbda
32 fájl változott, egészen pontosan 604 új sor hozzáadva és 108 régi sor törölve
+4
Fájl megtekintése
@@ -158,6 +158,10 @@ Full documentation for ROCprofiler-SDK is available at [rocm.docs.amd.com/projec
- SDK: `rocprofiler_agent_v0_t` support for agent UUIDs
- SDK: `rocprofiler_agent_v0_t` support for agent visibility based on gpu isolation environment variables such as `ROCR_VISIBLE_DEVICES` and so on.
- Accumulation VGPR support for `rocprofv3`.
- Added `--agent-index` option in rocprofv3 to specify the agent naming convention in the output
- absolute == node_id
- relative == logical_node_id
- type-relative == logical_node_type_id
## ROCprofiler-SDK 0.7.0 for ROCm release 6.5
+17
Fájl megtekintése
@@ -568,6 +568,20 @@ For MPI applications (or other job launchers such as SLURM), place rocprofv3 ins
"--realpath",
help=argparse.SUPPRESS,
)
advanced_options.add_argument(
"-A",
"--agent-index",
choices=("absolute", "relative", "type-relative"),
help="""absolute == node_id, e.g. Agent-0, Agent-2, Agent-4- absolute index of the agent regardless of cgroups masking.
This is a monotonically increasing number that is incremented for every folder in /sys/class/kfd/kfd/topology/nodes.
relative == logical_node_id,
e.g. Agent-0, Agent-1, Agent-2- relative index of the agent accounting for cgroups masking.
This is a monotonically increasing number which is incremented for every folder in /sys/class/kfd/kfd/topology/nodes/ whose properties file was non-empty.
type-relative == logical_node_type_id,
e.g. CPU-0, GPU-0, GPU-1- relative index of the agent accounting for cgroups masking where indexing starts at zero for each agent type.
It is a monotonically increasing number for each agent type and is incremented for every type folder in /sys/class/kfd/kfd/topology/nodes/ whose properties file is non-empty.
If agent-index is not provided then the default value for it is relative.""",
)
# below is available for CI because LD_PRELOADing a library linked to a sanitizer library
# causes issues in apps where HIP is part of shared library.
add_parser_bool_argument(
@@ -1202,6 +1216,9 @@ def run(app_args, args, **kwargs):
if args.kernel_iteration_range:
update_env("ROCPROF_KERNEL_FILTER_RANGE", ", ".join(args.kernel_iteration_range))
if args.agent_index:
update_env("ROCPROF_AGENT_INDEX", args.agent_index)
if args.extra_counters is not None:
with open(args.extra_counters, "r") as e_file:
e_file_contents = e_file.read()
@@ -954,6 +954,23 @@ save(ArchiveT& ar, rocprofiler_pc_sampling_configuration_t data)
ROCP_SDK_SAVE_DATA_FIELD(flags);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, rocprofiler_agent_runtime_visiblity_t data)
{
ROCP_SDK_SAVE_DATA_BITFIELD("hsa", hsa);
ROCP_SDK_SAVE_DATA_BITFIELD("hip", hip);
ROCP_SDK_SAVE_DATA_BITFIELD("rccl", rccl);
ROCP_SDK_SAVE_DATA_BITFIELD("rocdecode", rocdecode);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, rocprofiler_uuid_t data)
{
ROCP_SDK_SAVE_DATA_FIELD(value);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, const rocprofiler_agent_v0_t& data)
@@ -1012,6 +1029,9 @@ save(ArchiveT& ar, const rocprofiler_agent_v0_t& data)
ROCP_SDK_SAVE_DATA_CSTR(model_name);
ROCP_SDK_SAVE_DATA_FIELD(node_id);
ROCP_SDK_SAVE_DATA_FIELD(logical_node_id);
ROCP_SDK_SAVE_DATA_FIELD(logical_node_type_id);
ROCP_SDK_SAVE_DATA_FIELD(runtime_visibility);
ROCP_SDK_SAVE_DATA_FIELD(uuid);
auto generate = [&](auto name, const auto* value, uint64_t size) {
using value_type = std::remove_const_t<std::remove_pointer_t<decltype(value)>>;
+25 -15
Fájl megtekintése
@@ -288,10 +288,12 @@ generate_csv(const output_config&
auto row_ss = std::stringstream{};
auto kernel_name = tool_metadata.get_kernel_name(record.dispatch_info.kernel_id,
record.correlation_id.external.value);
rocprofiler::tool::csv::kernel_trace_csv_encoder::write_row(
row_ss,
tool_metadata.get_kind_name(record.kind),
tool_metadata.get_node_id(record.dispatch_info.agent_id),
tool_metadata.get_agent_index(record.dispatch_info.agent_id, cfg.agent_index_value)
.as_string(),
record.dispatch_info.queue_id.handle,
record.thread_id,
record.dispatch_info.dispatch_id,
@@ -418,6 +420,7 @@ generate_csv(const output_config& c
"Correlation_Id",
"Start_Timestamp",
"End_Timestamp"}};
for(auto ditr : data)
{
for(auto record : data.get(ditr))
@@ -428,8 +431,10 @@ generate_csv(const output_config& c
row_ss,
tool_metadata.get_kind_name(record.kind),
api_name,
tool_metadata.get_node_id(record.src_agent_id),
tool_metadata.get_node_id(record.dst_agent_id),
tool_metadata.get_agent_index(record.src_agent_id, cfg.agent_index_value)
.as_string(),
tool_metadata.get_agent_index(record.dst_agent_id, cfg.agent_index_value)
.as_string(),
record.correlation_id.internal,
record.start_timestamp,
record.end_timestamp);
@@ -465,13 +470,14 @@ generate_csv(const output_config&
{
for(auto record : data.get(ditr))
{
uint64_t agent_info{0};
auto agent_info = std::string{};
// Free functions currently do not track agent information. Only set it on allocation
// operations, otherwise set it to 0 currently
if(record.operation == ROCPROFILER_MEMORY_ALLOCATION_ALLOCATE ||
record.operation == ROCPROFILER_MEMORY_ALLOCATION_VMEM_ALLOCATE)
{
agent_info = tool_metadata.get_node_id(record.agent_id);
agent_info = tool_metadata.get_agent_index(record.agent_id, cfg.agent_index_value)
.as_string();
}
auto api_name = tool_metadata.get_operation_name(record.kind, record.operation);
auto row_ss = std::stringstream{};
@@ -611,7 +617,10 @@ generate_csv(const output_config& cfg,
row_ss,
correlation_id.internal,
record.dispatch_data.dispatch_info.dispatch_id,
tool_metadata.get_node_id(record.dispatch_data.dispatch_info.agent_id),
tool_metadata
.get_agent_index(record.dispatch_data.dispatch_info.agent_id,
cfg.agent_index_value)
.as_string(),
record.dispatch_data.dispatch_info.queue_id.handle,
tool_metadata.process_id,
record.thread_id,
@@ -667,15 +676,16 @@ generate_csv(const output_config&
auto kind_name = tool_metadata.get_kind_name(record.kind);
auto op_name = tool_metadata.get_operation_name(record.kind, record.operation);
tool::csv::scratch_memory_encoder::write_row(row_ss,
kind_name,
op_name,
tool_metadata.get_node_id(record.agent_id),
record.queue_id.handle,
record.thread_id,
record.flags,
record.start_timestamp,
record.end_timestamp);
tool::csv::scratch_memory_encoder::write_row(
row_ss,
kind_name,
op_name,
tool_metadata.get_agent_index(record.agent_id, cfg.agent_index_value).as_string(),
record.queue_id.handle,
record.thread_id,
record.flags,
record.start_timestamp,
record.end_timestamp);
ofs << row_ss.str();
}
+26 -28
Fájl megtekintése
@@ -476,15 +476,13 @@ write_otf2(
{
for(auto& [agent, evt] : itr)
{
const auto* _agent = _get_agent(agent);
auto _type_name = std::string_view{"UNK"};
if(_agent->type == ROCPROFILER_AGENT_TYPE_CPU)
_type_name = "CPU";
else if(_agent->type == ROCPROFILER_AGENT_TYPE_GPU)
_type_name = "GPU";
evt.name = fmt::format(
"Thread {}, Copy to {} {}", tid, _type_name, _agent->logical_node_type_id);
const auto* _agent = _get_agent(agent);
auto agent_index_info =
tool_metadata.get_agent_index(_agent->id, cfg.agent_index_value);
evt.name = fmt::format("Thread {}, Copy to {} {}",
tid,
agent_index_info.type,
agent_index_info.as_string("-"));
}
}
@@ -501,16 +499,20 @@ write_otf2(
{
_agent = _get_agent(agent);
}
auto _type_name = std::string_view{"UNK"};
if(_agent != nullptr && _agent->type == ROCPROFILER_AGENT_TYPE_CPU)
_type_name = "CPU";
else if(_agent != nullptr && _agent->type == ROCPROFILER_AGENT_TYPE_GPU)
_type_name = "GPU";
evt.name = fmt::format("Thread {}, Memory Operation at {} {}",
tid,
_type_name,
_agent == nullptr ? 0 : _agent->logical_node_type_id);
if(_agent)
{
auto agent_index_info =
tool_metadata.get_agent_index(_agent->id, cfg.agent_index_value);
evt.name = fmt::format("Thread {}, Memory Operation at {} {}",
tid,
agent_index_info.type,
agent_index_info.as_string("-"));
}
else
{
auto _type_name = std::string_view{"UNK"};
evt.name = fmt::format("Thread {}, Memory Operation at {} {}", tid, _type_name, 0);
}
}
}
@@ -532,17 +534,13 @@ write_otf2(
{
for(auto& [queue, evt] : qitr)
{
const auto* _agent = _get_agent(agent);
auto _type_name = std::string_view{"UNK"};
if(_agent->type == ROCPROFILER_AGENT_TYPE_CPU)
_type_name = "CPU";
else if(_agent->type == ROCPROFILER_AGENT_TYPE_GPU)
_type_name = "GPU";
const auto* _agent = _get_agent(agent);
auto agent_index_info =
tool_metadata.get_agent_index(_agent->id, cfg.agent_index_value);
evt.name = fmt::format("Thread {}, Compute on {} {}, Queue {}",
tid,
_type_name,
_agent->logical_node_type_id,
agent_index_info.type,
agent_index_info.as_string("-"),
_queue_ids.at(queue));
}
}
@@ -260,15 +260,11 @@ write_perfetto(
const auto* _agent = _get_agent(aitr.first);
auto _namess = std::stringstream{};
_namess << "COMPUTE AGENT [" << _agent->logical_node_id << "] QUEUE [" << nqueue++
<< "] ";
if(_agent->type == ROCPROFILER_AGENT_TYPE_CPU)
_namess << "(CPU)";
else if(_agent->type == ROCPROFILER_AGENT_TYPE_GPU)
_namess << "(GPU)";
else
_namess << "(UNK)";
auto agent_index_info =
tool_metadata.get_agent_index(_agent->id, ocfg.agent_index_value);
_namess << "COMPUTE " << agent_index_info.label << " [" << agent_index_info.index
<< "] QUEUE [" << nqueue++ << "] ";
_namess << agent_index_info.type;
auto _track = ::perfetto::Track{get_hash_id(_namess.str())};
auto _desc = _track.Serialize();
@@ -478,31 +474,34 @@ write_perfetto(
auto name = buffer_names.at(itr.kind, itr.operation);
auto& track = agent_thread_tracks.at(itr.dst_agent_id).at(itr.thread_id);
TRACE_EVENT_BEGIN(sdk::perfetto_category<sdk::category::memory_copy>::name,
::perfetto::StaticString(name.data()),
track,
itr.start_timestamp,
::perfetto::Flow::ProcessScoped(itr.correlation_id.internal),
"begin_ns",
itr.start_timestamp,
"end_ns",
itr.end_timestamp,
"delta_ns",
(itr.end_timestamp - itr.start_timestamp),
"kind",
itr.kind,
"operation",
itr.operation,
"src_agent",
agents_map.at(itr.src_agent_id).logical_node_id,
"dst_agent",
agents_map.at(itr.dst_agent_id).logical_node_id,
"copy_bytes",
itr.bytes,
"corr_id",
itr.correlation_id.internal,
"tid",
itr.thread_id);
TRACE_EVENT_BEGIN(
sdk::perfetto_category<sdk::category::memory_copy>::name,
::perfetto::StaticString(name.data()),
track,
itr.start_timestamp,
::perfetto::Flow::ProcessScoped(itr.correlation_id.internal),
"begin_ns",
itr.start_timestamp,
"end_ns",
itr.end_timestamp,
"delta_ns",
(itr.end_timestamp - itr.start_timestamp),
"kind",
itr.kind,
"operation",
itr.operation,
"src_agent",
tool_metadata.get_agent_index(itr.src_agent_id, ocfg.agent_index_value)
.as_string("-"),
"dst_agent",
tool_metadata.get_agent_index(itr.dst_agent_id, ocfg.agent_index_value)
.as_string("-"),
"copy_bytes",
itr.bytes,
"corr_id",
itr.correlation_id.internal,
"tid",
itr.thread_id);
TRACE_EVENT_END(sdk::perfetto_category<sdk::category::memory_copy>::name,
track,
itr.end_timestamp);
@@ -592,7 +591,10 @@ write_perfetto(
"kind",
current.kind,
"agent",
tool_metadata.get_node_id(info.agent_id),
tool_metadata
.get_agent_index(agents_map.at(info.agent_id).id,
ocfg.agent_index_value)
.as_string("-"),
"corr_id",
current.correlation_id.internal,
"queue",
@@ -676,11 +678,10 @@ write_perfetto(
auto _track_name = std::stringstream{};
const auto* _agent = _get_agent(mitr.first);
if(_agent->type == ROCPROFILER_AGENT_TYPE_CPU)
_track_name << "COPY BYTES to AGENT [" << _agent->logical_node_id << "] (CPU)";
else if(_agent->type == ROCPROFILER_AGENT_TYPE_GPU)
_track_name << "COPY BYTES to AGENT [" << _agent->logical_node_id << "] (GPU)";
auto agent_index_info =
tool_metadata.get_agent_index(_agent->id, ocfg.agent_index_value);
_track_name << "COPY BYTES to " << agent_index_info.label << " ["
<< agent_index_info.index << "] (" << agent_index_info.type << ")";
constexpr auto _unit = ::perfetto::CounterTrack::Unit::UNIT_SIZE_BYTES;
auto& _name = mem_cpy_cnt_names.emplace_back(_track_name.str());
@@ -847,10 +848,15 @@ write_perfetto(
auto _track_name = std::stringstream{};
const rocprofiler_agent_t* _agent = _get_agent(alloc_itr.first);
if(_agent->type == ROCPROFILER_AGENT_TYPE_CPU)
_track_name << "ALLOCATE BYTES on AGENT [" << _agent->logical_node_id << "] (CPU)";
else if(_agent->type == ROCPROFILER_AGENT_TYPE_GPU)
_track_name << "ALLOCATE BYTES on AGENT [" << _agent->logical_node_id << "] (GPU)";
if(_agent != nullptr)
{
auto agent_index_info =
tool_metadata.get_agent_index(_agent->id, ocfg.agent_index_value);
_track_name << "ALLOCATE BYTES on " << agent_index_info.label << " ["
<< agent_index_info.index << "] (" << agent_index_info.type << ")";
}
else
_track_name << "FREE BYTES";
constexpr auto _unit = ::perfetto::CounterTrack::Unit::UNIT_SIZE_BYTES;
auto& _name = mem_alloc_cnt_names.emplace_back(_track_name.str());
+30 -3
Fájl megtekintése
@@ -33,6 +33,7 @@
#include <rocprofiler-sdk/rocprofiler.h>
#include <rocprofiler-sdk/cxx/details/tokenize.hpp>
#include <fmt/core.h>
#include <fmt/format.h>
#include <unistd.h>
@@ -534,10 +535,36 @@ metadata::get_operation_name(rocprofiler_buffer_tracing_kind_t kind,
return buffer_names.at(kind, op);
}
uint64_t
metadata::get_node_id(rocprofiler_agent_id_t _val) const
agent_index
metadata::get_agent_index(rocprofiler_agent_id_t id, agent_indexing index) const
{
return CHECK_NOTNULL(get_agent(_val))->logical_node_id;
const auto* _agent = get_agent(id);
ROCP_FATAL_IF(!_agent) << "Information of the agent with handle: " << id.handle
<< " is not present";
// stringify agent type
auto get_type = [_agent]() -> std::string_view {
switch(_agent->type)
{
case ROCPROFILER_AGENT_TYPE_CPU: return "CPU";
case ROCPROFILER_AGENT_TYPE_GPU: return "GPU";
case ROCPROFILER_AGENT_TYPE_NONE:
case ROCPROFILER_AGENT_TYPE_LAST: break;
}
return "UNK";
};
switch(index)
{
case agent_indexing::node: return agent_index{"Agent", _agent->node_id, get_type()};
case agent_indexing::logical_node_type:
return agent_index{
get_type(), static_cast<uint32_t>(_agent->logical_node_type_id), get_type()};
case agent_indexing::logical_node:
default:
return agent_index{"Agent", static_cast<uint32_t>(_agent->logical_node_id), get_type()};
}
}
const std::string*
+23 -1
Fájl megtekintése
@@ -44,6 +44,8 @@
#include <rocprofiler-sdk/cxx/name_info.hpp>
#include <rocprofiler-sdk/cxx/operators.hpp>
#include <fmt/core.h>
#include <cstdint>
#include <map>
#include <string>
@@ -86,6 +88,26 @@ using code_object_load_info_vec_t = std::vector<rocprofiler::att_wrapper::Codeob
template <typename Tp>
using synced_map = common::Synchronized<Tp, true>;
enum class agent_indexing
{
node = 0,
logical_node,
logical_node_type,
};
struct agent_index
{
std::string_view label = {}; // e.g. Agent, CPU, GPU
uint32_t index = 0; // the numerical index
std::string_view type = {}; // e.g. CPU, GPU, etc.
// returns label + index
std::string as_string(std::string_view sep = " ") const
{
return fmt::format("{}{}{}", label, sep, index);
}
};
struct metadata
{
using agent_info_ptr_vec_t = common::container::small_vector<const agent_info*, 16>;
@@ -165,7 +187,7 @@ struct metadata
rocprofiler_tracing_operation_t op) const;
std::string_view get_operation_name(rocprofiler_buffer_tracing_kind_t kind,
rocprofiler_tracing_operation_t op) const;
uint64_t get_node_id(rocprofiler_agent_id_t _val) const;
agent_index get_agent_index(rocprofiler_agent_id_t agent, agent_indexing index) const;
const std::string* get_string_entry(size_t key) const;
private:
@@ -87,6 +87,14 @@ output_config::parse_env()
<< "Unsupported output format type: " << itr;
}
std::string agent_index = common::get_env("ROCPROF_AGENT_INDEX", "relative");
if(agent_index == "type-relative")
agent_index_value = agent_indexing::logical_node_type;
else if(agent_index == "absolute")
agent_index_value = agent_indexing::node;
else
agent_index_value = agent_indexing::logical_node;
const auto supported_perfetto_backends = std::set<std::string_view>{"inprocess", "system"};
LOG_IF(FATAL, supported_perfetto_backends.count(perfetto_backend) == 0)
<< "Unsupported perfetto backend type: " << perfetto_backend;
@@ -23,6 +23,7 @@
#pragma once
#include "format_path.hpp"
#include "metadata.hpp"
#include "lib/common/environment.hpp"
#include "lib/common/filesystem.hpp"
@@ -71,6 +72,7 @@ struct output_config
uint64_t stats_summary_unit_value = 1;
size_t perfetto_shmem_size_hint = defaults::perfetto_shmem_size_hint_kb;
size_t perfetto_buffer_size = defaults::perfetto_buffer_size_kb;
agent_indexing agent_index_value = agent_indexing::logical_node;
std::string stats_summary_unit = "nsec";
std::string output_path = "%cwd%";
std::string output_file = "%hostname%/%pid%";
@@ -38,7 +38,8 @@ counters_list = ["SQ_WAVES", "GRBM_GUI_ACTIVE"]
def test_validate_counter_collection_pmc1(input_data: pd.DataFrame):
df = input_data
assert not df.empty
assert (df["Agent_Id"].astype(int).values > 0).all()
df_agent_id = df["Agent_Id"].str.split(" ").str[-1]
assert (df_agent_id.astype(int).values >= 0).all()
assert (df["Queue_Id"].astype(int).values > 0).all()
assert len(df["Kernel_Name"]) > 0
+1
Fájl megtekintése
@@ -41,3 +41,4 @@ add_subdirectory(rocjpeg-trace)
if(TARGET att_decoder_testing)
add_subdirectory(advanced-thread-trace)
endif()
add_subdirectory(agent-index)
@@ -0,0 +1,124 @@
#
# rocprofv3 tool test
#
cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR)
project(
rocprofiler-tests-rocprofv3-agent-index
LANGUAGES CXX
VERSION 0.0.0)
find_package(rocprofiler-sdk REQUIRED)
string(REPLACE "LD_PRELOAD=" "ROCPROF_PRELOAD=" PRELOAD_ENV
"${ROCPROFILER_MEMCHECK_PRELOAD_ENV}")
set(tracing-env "${PRELOAD_ENV}")
rocprofiler_configure_pytest_files(
CONFIG pytest.ini
COPY validate.py conftest.py input_absolute_agent_index.yaml
input_type_relative_agent_index.yaml input_relative_agent_index.yaml)
if(ROCPROFILER_MEMCHECK STREQUAL "LeakSanitizer")
set(LOG_LEVEL "warning") # info produces memory leak
else()
set(LOG_LEVEL "info")
endif()
set(execute_tests
rocprofv3-test-agent-index-json-absolute-execute
rocprofv3-test-agent-index-json-type-relative-execute
rocprofv3-test-agent-index-json-relative-execute)
set(input_files
${CMAKE_CURRENT_BINARY_DIR}/input_absolute_agent_index.yaml
${CMAKE_CURRENT_BINARY_DIR}/input_type_relative_agent_index.yaml
${CMAKE_CURRENT_BINARY_DIR}/input_relative_agent_index.yaml)
set(output_dir
${CMAKE_CURRENT_BINARY_DIR}/%argt%-agent-index-absolute
${CMAKE_CURRENT_BINARY_DIR}/%argt%-agent-index-type-relative
${CMAKE_CURRENT_BINARY_DIR}/%argt%-agent-index-relative)
list(LENGTH execute_tests execute_test_length)
foreach(i RANGE 0 ${execute_test_length})
if(${i} EQUAL ${execute_test_length})
break()
endif()
list(GET execute_tests ${i} test_name)
list(GET input_files ${i} input_file)
list(GET output_dir ${i} output)
add_test(NAME ${test_name}
COMMAND $<TARGET_FILE:rocprofiler-sdk::rocprofv3> -i ${input_file} -d
${output} --log-level ${LOG_LEVEL} -- $<TARGET_FILE:transpose>)
set_tests_properties(
${test_name}
PROPERTIES TIMEOUT 45 LABELS "integration-tests" ENVIRONMENT "${tracing-env}"
FAIL_REGULAR_EXPRESSION "${ROCPROFILER_DEFAULT_FAIL_REGEX}")
endforeach()
set(agent_index_types absolute relative type-relative)
set(validate_tests
rocprofv3-test-agent-index-json-absolute-validate
rocprofv3-test-agent-index-json-relative-validate
rocprofv3-test-agent-index-json-type-relative-validate)
set(output_dir_name transpose-agent-index-absolute transpose-agent-index-relative
transpose-agent-index-type-relative)
list(LENGTH validate_tests validate_test_length)
foreach(i RANGE 0 ${validate_test_length})
if(${i} EQUAL ${validate_test_length})
break()
endif()
list(GET validate_tests ${i} test_name)
list(GET agent_index_types ${i} agent_index)
list(GET output_dir_name ${i} output)
add_test(
NAME ${test_name}
COMMAND
${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/validate.py --agent-index
${agent_index} --csv-kernel-input
${CMAKE_CURRENT_BINARY_DIR}/${output}/out_kernel_trace.csv
--csv-memory-allocation-input
${CMAKE_CURRENT_BINARY_DIR}/${output}/out_memory_allocation_trace.csv
--csv-memory-copy-input
${CMAKE_CURRENT_BINARY_DIR}/${output}/out_memory_copy_trace.csv --json-input
${CMAKE_CURRENT_BINARY_DIR}/transpose-agent-index-absolute/out_results.json)
endforeach()
set(VALIDATION_FILES
${CMAKE_CURRENT_BINARY_DIR}/transpose-agent-index-absolute/out_results.json
${CMAKE_CURRENT_BINARY_DIR}/transpose-agent-index-absolute/out_kernel_trace.csv
${CMAKE_CURRENT_BINARY_DIR}/transpose-agent-index-absolute/out_memory_allocation_trace.csv
${CMAKE_CURRENT_BINARY_DIR}/transpose-agent-index-absolute/out_memory_copy_trace.csv
${CMAKE_CURRENT_BINARY_DIR}/transpose-agent-index-relative/out_kernel_trace.csv
${CMAKE_CURRENT_BINARY_DIR}/transpose-agent-index-relative/out_memory_allocation_trace.csv
${CMAKE_CURRENT_BINARY_DIR}/transpose-agent-index-relative/out_memory_copy_trace.csv
${CMAKE_CURRENT_BINARY_DIR}/transpose-agent-index-type-relative/out_kernel_trace.csv
${CMAKE_CURRENT_BINARY_DIR}/transpose-agent-index-type-relative/out_memory_allocation_trace.csv
${CMAKE_CURRENT_BINARY_DIR}/transpose-agent-index-type-relative/out_memory_copy_trace.csv
)
set(dependency_tests
rocprofv3-test-agent-index-json-absolute-execute
rocprofv3-test-agent-index-json-relative-execute;rocprofv3-test-agent-index-json-absolute-execute
rocprofv3-test-agent-index-json-type-relative-execute;rocprofv3-test-agent-index-json-absolute-execute
)
foreach(i RANGE 0 ${validate_test_length})
if(${i} EQUAL ${validate_test_length})
break()
endif()
list(GET validate_tests ${i} test_name)
list(GET dependency_tests ${i} dep)
set_tests_properties(
${test_name}
PROPERTIES TIMEOUT
45
LABELS
"integration-tests"
DEPENDS
${dep}
FAIL_REGULAR_EXPRESSION
"AssertionError"
ATTACHED_FILES_ON_FAIL
"${VALIDATION_FILES}")
endforeach()
@@ -0,0 +1,99 @@
#!/usr/bin/env python3
# MIT License
#
# Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
import csv
import pytest
import json
from rocprofiler_sdk.pytest_utils.dotdict import dotdict
from rocprofiler_sdk.pytest_utils import collapse_dict_list
from rocprofiler_sdk.pytest_utils.perfetto_reader import PerfettoReader
def pytest_addoption(parser):
parser.addoption(
"--agent-index",
choices=("absolute", "relative", "type-relative"),
help="...",
)
parser.addoption(
"--csv-kernel-input",
action="store",
help="Path to kernel tracing CSV file",
)
parser.addoption(
"--csv-memory-allocation-input",
action="store",
help="Path to memory allocation tracing CSV file",
)
parser.addoption(
"--csv-memory-copy-input",
action="store",
help="Path to memory allocation tracing CSV file",
)
parser.addoption(
"--json-input",
action="store",
help="Path to JSON file.",
)
def read_csv(filename):
data = []
with open(filename, "r") as inp:
reader = csv.DictReader(inp)
for row in reader:
data.append(row)
return data
@pytest.fixture
def csv_kernel_input(request):
filename = request.config.getoption("--csv-kernel-input")
return read_csv(filename)
@pytest.fixture
def csv_memory_copy_input(request):
filename = request.config.getoption("--csv-memory-copy-input")
return read_csv(filename)
@pytest.fixture
def csv_memory_allocation_input(request):
filename = request.config.getoption("--csv-memory-allocation-input")
return read_csv(filename)
@pytest.fixture
def json_data(request):
filename = request.config.getoption("--json-input")
with open(filename, "r") as inp:
return dotdict(collapse_dict_list(json.load(inp)))
@pytest.fixture
def agent_index(request):
return request.config.getoption("--agent-index")
@@ -0,0 +1,7 @@
jobs:
- output_file: "out"
output_format: [csv, json]
sys_trace: True
agent_index: absolute
@@ -0,0 +1,6 @@
jobs:
- output_file: "out"
output_format: [csv]
sys_trace: True
agent_index: relative
@@ -0,0 +1,6 @@
jobs:
- output_file: "out"
output_format: [csv]
sys_trace: True
agent_index: type-relative
@@ -0,0 +1,5 @@
[pytest]
addopts = --durations=20 -rA -s -vv
testpaths = validate.py
pythonpath = @ROCPROFILER_SDK_TESTS_BINARY_DIR@/pytest-packages
@@ -0,0 +1,124 @@
#!/usr/bin/env python3
# MIT License
#
# Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
import sys
import pytest
import re
def get_type(id_type):
if id_type == "absolute":
return "node_id"
elif id_type == "type-relative":
return "logical_node_type_id"
else:
return "logical_node_id"
def check_kernel_traces(json_data, kernels_data, id_type):
data = json_data["rocprofiler-sdk-tool"]
agents = data["agents"]
kernels = data["buffer_records"]["kernel_dispatch"]
_type = get_type(id_type)
for row in kernels_data:
found = any(
int(agent[_type]) == int(row["Agent_Id"].split(" ")[-1]) for agent in agents
)
assert found, f"Agent ID {row['Agent_Id']} of Type {id_type} not found"
def check_memory_allocation_traces(json_data, mem_alloc, id_type):
data = json_data["rocprofiler-sdk-tool"]
agents = data["agents"]
mem_alloc_record = data["buffer_records"]["memory_allocation"]
_type = get_type(id_type)
for row in mem_alloc:
found = row["Agent_Id"] != "" and any(
int(agent[_type]) == int(row["Agent_Id"].split(" ")[-1]) for agent in agents
)
if row["Agent_Id"] != "":
assert found, f"Agent ID {row['Agent_Id']} of Type {id_type} not found"
def check_memory_copy_traces(json_data, mem_copy, id_type):
data = json_data["rocprofiler-sdk-tool"]
agents = data["agents"]
mem_copy_record = data["buffer_records"]["memory_copy"]
_type = get_type(id_type)
for row in mem_copy:
found = any(
int(agent[_type]) == int(row["Source_Agent_Id"].split(" ")[-1])
for agent in agents
)
assert (
found
), f"Src Agent ID {row['Destination_Agent_Id']} of Type {id_type} not found"
found = any(
int(agent[_type]) == int(row["Destination_Agent_Id"].split(" ")[-1])
for agent in agents
)
assert (
found
), f"Destination Agent ID {row['Destiantion_Agent_Id']} of Type {id_type} not found"
if (
_type == "logical_node_type_id"
and row["Direction"] == "MEMORY_COPY_HOST_TO_DEVICE"
):
assert row["Source_Agent_Id"].split(" ")[0] == "CPU"
assert row["Destination_Agent_Id"].split(" ")[0] == "GPU"
elif (
_type == "logical_node_type_id"
and row["Direction"] == "MEMORY_COPY_DEVICE_TO_HOST"
):
assert row["Source_Agent_Id"].split(" ")[0] == "GPU"
assert row["Destination_Agent_Id"].split(" ")[0] == "CPU"
def test_validate(
csv_kernel_input,
json_data,
agent_index,
csv_memory_allocation_input,
csv_memory_copy_input,
):
check_kernel_traces(json_data, csv_kernel_input, agent_index)
check_memory_copy_traces(json_data, csv_memory_copy_input, agent_index)
check_memory_allocation_traces(json_data, csv_memory_allocation_input, agent_index)
if __name__ == "__main__":
exit_code = pytest.main(["-x", __file__] + sys.argv[1:])
sys.exit(exit_code)
@@ -41,7 +41,8 @@ def test_validate_counter_collection_pmc1_extra_counters(input_data: pd.DataFram
df = input_data
assert not df.empty
assert (df["Agent_Id"].astype(int).values > 0).all()
df_agent_id = df["Agent_Id"].str.split(" ").str[-1]
assert (df_agent_id.astype(int).values >= 0).all()
assert (df["Queue_Id"].astype(int).values > 0).all()
assert (df["Process_Id"].astype(int).values > 0).all()
assert len(df["Kernel_Name"]) > 0
@@ -41,7 +41,8 @@ def test_validate_counter_collection_pmc1(input_data: pd.DataFrame):
df = input_data
assert not df.empty
assert (df["Agent_Id"].astype(int).values > 0).all()
df_agent_id = df["Agent_Id"].str.split(" ").str[-1]
assert (df_agent_id.astype(int).values >= 0).all()
assert (df["Queue_Id"].astype(int).values > 0).all()
assert (df["Process_Id"].astype(int).values > 0).all()
assert len(df["Kernel_Name"]) > 0
@@ -49,7 +49,7 @@ def test_validate_counter_collection_pmc2(counter_input_data):
di_list = []
for row in counter_input_data:
assert int(row["Agent_Id"]) > 0
assert int(row["Agent_Id"].split(" ")[-1]) >= 0
assert int(row["Queue_Id"]) > 0
assert int(row["Process_Id"]) > 0
assert len(row["Kernel_Name"]) > 0
@@ -51,7 +51,7 @@ def test_validate_counter_collection_yml_pmc(counter_input_data):
di_list = []
for row in counter_input_data:
assert int(row["Agent_Id"]) > 0
assert int(row["Agent_Id"].split(" ")[-1]) >= 0
assert int(row["Queue_Id"]) > 0
assert int(row["Process_Id"]) > 0
assert len(row["Kernel_Name"]) > 0
@@ -36,7 +36,8 @@ def unique(lst):
def validate_csv(df, kernel_list, counter_name):
assert not df.empty
assert (df["Agent_Id"].astype(int).values > 0).all()
df_Agent_id = df["Agent_Id"].str.split(" ").str[-1]
assert (df_Agent_id.astype(int).values >= 0).all()
assert (df["Queue_Id"].astype(int).values > 0).all()
assert (df["Process_Id"].astype(int).values > 0).all()
assert len(df["Kernel_Name"]) > 0
@@ -73,7 +73,7 @@ def test_kernel_trace(kernel_trace_input_data):
assert len(kernel_trace_input_data) == 3
for row in kernel_trace_input_data:
assert row["Kind"] == "KERNEL_DISPATCH"
assert int(row["Agent_Id"]) > 0
assert int(row["Agent_Id"].split(" ")[-1]) >= 0
assert int(row["Queue_Id"]) > 0
assert int(row["Kernel_Id"]) > 0
assert row["Kernel_Name"] in valid_kernel_names
@@ -23,7 +23,7 @@ add_test(
NAME rocprofv3-test-pc-sampling-host-trap-transpose-multiple-agents-input-cmd-execute
COMMAND
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> --kernel-trace --pc-sampling-unit time
--pc-sampling-method host_trap --pc-sampling-interval 1 -d
--pc-sampling-method host_trap --pc-sampling-interval 1 --agent-index absolute -d
${CMAKE_CURRENT_BINARY_DIR}/pc_sampling_cmd_input -o out --output-format csv --
$<TARGET_FILE:transpose> ${TRANSPOSE_NUM_THREADS} ${TRANSPOSE_NUM_ITERATIONS})
@@ -8,7 +8,8 @@
"output_file": "out",
"output_format": [
"csv"
]
],
"agent_index": "type-relative"
}
]
}
@@ -3,3 +3,4 @@ jobs:
pc_sampling_unit: "time"
pc_sampling_method: "host_trap"
pc_sampling_interval: 1
agent-index: relative
@@ -49,7 +49,11 @@ def test_multi_agent_support(
# Determine the agent on which sample was generated
samples_df["Agent_Id"] = (
samples_df["Dispatch_Id"]
.map(input_kernel_trace_csv.set_index("Dispatch_Id")["Agent_Id"])
.map(
input_kernel_trace_csv.set_index("Dispatch_Id")["Agent_Id"]
.str.split(" ")
.str[1]
)
.astype(np.uint64)
)
sampled_agents = samples_df["Agent_Id"].unique()
@@ -127,7 +127,7 @@ def test_scratch_memory(json_input_data, csv_input_data):
assert row["Kind"] == "SCRATCH_MEMORY"
assert row["Operation"] in ["SCRATCH_MEMORY_ALLOC", "SCRATCH_MEMORY_FREE"]
assert int(row["Agent_Id"]) > 0
assert int(row["Agent_Id"].split(" ")[-1]) >= 0
assert int(row["Queue_Id"]) > 0
assert int(row["Thread_Id"]) > 0
assert int(row["Start_Timestamp"]) > 0
@@ -300,7 +300,7 @@ def test_kernel_trace(kernel_input_data, kernel_stats_data):
continue
assert row["Kind"] == "KERNEL_DISPATCH"
assert int(row["Agent_Id"]) > 0
assert int(row["Agent_Id"].split(" ")[-1]) >= 0
assert int(row["Queue_Id"]) > 0
assert int(row["Kernel_Id"]) > 0
assert int(row["Correlation_Id"]) > 0
@@ -419,8 +419,8 @@ def test_memory_copy_trace(
"MEMORY_COPY_DEVICE_TO_HOST",
)
src_agent = get_agent(row["Source_Agent_Id"])
dst_agent = get_agent(row["Destination_Agent_Id"])
src_agent = get_agent(row["Source_Agent_Id"].split(" ")[-1])
dst_agent = get_agent(row["Destination_Agent_Id"].split(" ")[-1])
assert src_agent is not None and dst_agent is not None, f"{agent_info_input_data}"
if row["Direction"] == "MEMORY_COPY_HOST_TO_DEVICE":
@@ -129,7 +129,7 @@ def test_kernel_trace(kernel_input_data):
assert len(kernel_input_data) == 1
for row in kernel_input_data:
assert row["Kind"] == "KERNEL_DISPATCH"
assert int(row["Agent_Id"]) > 0
assert int(row["Agent_Id"].split(" ")[-1]) >= 0
assert int(row["Queue_Id"]) > 0
assert int(row["Kernel_Id"]) > 0
assert row["Kernel_Name"] in valid_kernel_names
@@ -223,8 +223,8 @@ def test_memory_copy_trace(agent_info_input_data, memory_copy_input_data):
assert direction in ("MEMORY_COPY_HOST_TO_DEVICE", "MEMORY_COPY_DEVICE_TO_HOST")
row = memory_copy_input_data[idx]
assert row["Direction"] == direction
src_agent = get_agent(row["Source_Agent_Id"])
dst_agent = get_agent(row["Destination_Agent_Id"])
src_agent = get_agent(row["Source_Agent_Id"].split(" ")[-1])
dst_agent = get_agent(row["Destination_Agent_Id"].split(" ")[-1])
assert src_agent is not None and dst_agent is not None, f"{agent_info_input_data}"
if direction == "MEMORY_COPY_HOST_TO_DEVICE":
assert src_agent["Agent_Type"] == "CPU"