rocprofv3 kernel renaming support + misc rocprofv3 updates (#992)
* Increase rocprofv3 tool buffer size - 32 pages instead of 1 page * Improve rocprofv3 perfetto track labels * Preliminary kernel renaming support + misc rocprofv3 updates - add rocprofv3 option --kernel-rename - add rocprofv3 options for perfetto settings (buffer size, etc.) - add CSV columns for kernel trace - Thread_Id - Dispatch_Id - add CSV column for counter_collection - Kernel_Id
Αυτή η υποβολή περιλαμβάνεται σε:
υποβλήθηκε από
GitHub
γονέας
41f024475a
υποβολή
ebb021c59f
@@ -168,6 +168,10 @@ For MPI applications (or other job launchers such as SLURM), place rocprofv3 ins
|
||||
"--list-metrics",
|
||||
help="List metrics for counter collection",
|
||||
)
|
||||
add_parser_bool_argument(
|
||||
"--kernel-rename",
|
||||
help="Use region names defined by roctxRangePush/roctxRangePop regions to rename the kernels",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-i",
|
||||
"--input",
|
||||
@@ -232,6 +236,35 @@ For MPI applications (or other job launchers such as SLURM), place rocprofv3 ins
|
||||
default=os.environ.get("ROCPROF_PRELOAD", "").split(":"),
|
||||
nargs="*",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--perfetto-backend",
|
||||
help="Perfetto data collection backend. 'system' mode requires starting traced and perfetto daemons",
|
||||
default=None,
|
||||
type=str,
|
||||
nargs=1,
|
||||
choices=("inprocess", "system"),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--perfetto-buffer-size",
|
||||
help="Size of buffer for perfetto output in KB. default: 1 GB",
|
||||
default=None,
|
||||
type=int,
|
||||
metavar="KB",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--perfetto-buffer-fill-policy",
|
||||
help="Policy for handling new records when perfetto has reached the buffer limit",
|
||||
default=None,
|
||||
type=str,
|
||||
choices=("discard", "ring_buffer"),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--perfetto-shmem-size-hint",
|
||||
help="Perfetto shared memory size hint in KB. default: 64 KB",
|
||||
default=None,
|
||||
type=int,
|
||||
metavar="KB",
|
||||
)
|
||||
|
||||
if args is None:
|
||||
args = sys.argv[1:]
|
||||
@@ -582,6 +615,29 @@ def run(app_args, args, **kwargs):
|
||||
args.log_level,
|
||||
)
|
||||
|
||||
for opt, env_val in dict(
|
||||
[
|
||||
["kernel_rename", "KERNEL_RENAME"],
|
||||
]
|
||||
).items():
|
||||
val = getattr(args, f"{opt}")
|
||||
if val is not None:
|
||||
update_env(f"ROCPROF_{env_val}", val, overwrite_if_true=True)
|
||||
|
||||
for opt, env_val in dict(
|
||||
[
|
||||
["perfetto_buffer_size", "PERFETTO_BUFFER_SIZE_KB"],
|
||||
["perfetto_shmem_size_hint", "PERFETTO_SHMEM_SIZE_HINT_KB"],
|
||||
["perfetto_fill_policy", "PERFETTO_BUFFER_FILL_POLICY"],
|
||||
["perfetto_backend", "PERFETTO_BACKEND"],
|
||||
]
|
||||
).items():
|
||||
val = getattr(args, f"{opt}")
|
||||
if val is not None:
|
||||
if isinstance(val, (list, tuple, set)):
|
||||
val = ", ".join(val)
|
||||
update_env(f"ROCPROF_{env_val}", val, overwrite=True)
|
||||
|
||||
def log_config(_env):
|
||||
existing_env = dict(os.environ)
|
||||
init_message = "\n- rocprofv3 configuration{}:\n".format(
|
||||
|
||||
@@ -70,5 +70,33 @@ get_string_entry(std::string_view name)
|
||||
->emplace(_hash_v, std::make_unique<std::string>(name))
|
||||
.first->second.get();
|
||||
}
|
||||
|
||||
const std::string*
|
||||
get_string_entry(size_t _hash_v)
|
||||
{
|
||||
if(!get_string_array()) return nullptr;
|
||||
|
||||
auto _lk = std::shared_lock<std::shared_mutex>{get_sync()};
|
||||
if(get_string_array()->count(_hash_v) > 0) return get_string_array()->at(_hash_v).get();
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
size_t
|
||||
add_string_entry(std::string_view name)
|
||||
{
|
||||
if(!get_string_array()) return 0;
|
||||
|
||||
auto _hash_v = std::hash<std::string_view>{}(name);
|
||||
{
|
||||
auto _lk = std::shared_lock<std::shared_mutex>{get_sync()};
|
||||
if(get_string_array()->count(_hash_v) > 0) return _hash_v;
|
||||
}
|
||||
|
||||
auto _lk = std::unique_lock<std::shared_mutex>{get_sync()};
|
||||
get_string_array()->emplace(_hash_v, std::make_unique<std::string>(name));
|
||||
|
||||
return _hash_v;
|
||||
}
|
||||
} // namespace common
|
||||
} // namespace rocprofiler
|
||||
|
||||
@@ -22,6 +22,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
|
||||
@@ -31,5 +32,11 @@ namespace common
|
||||
{
|
||||
const std::string*
|
||||
get_string_entry(std::string_view name);
|
||||
|
||||
const std::string*
|
||||
get_string_entry(size_t hash);
|
||||
|
||||
size_t
|
||||
add_string_entry(std::string_view name);
|
||||
} // namespace common
|
||||
} // namespace rocprofiler
|
||||
|
||||
@@ -284,6 +284,10 @@ config::config()
|
||||
<< "Unsupported output format type: " << itr;
|
||||
}
|
||||
if(kernel_filter_include.empty()) kernel_filter_include = std::string(".*");
|
||||
|
||||
const auto supported_perfetto_backends = std::set<std::string_view>{"inprocess", "system"};
|
||||
LOG_IF(FATAL, supported_perfetto_backends.count(perfetto_backend) == 0)
|
||||
<< "Unsupported perfetto backend type: " << perfetto_backend;
|
||||
}
|
||||
|
||||
std::vector<output_key>
|
||||
|
||||
@@ -77,8 +77,11 @@ struct config
|
||||
bool csv_output = false;
|
||||
bool json_output = false;
|
||||
bool pftrace_output = false;
|
||||
bool kernel_rename = get_env("ROCPROF_KERNEL_RENAME", false);
|
||||
int mpi_size = get_mpi_size();
|
||||
int mpi_rank = get_mpi_rank();
|
||||
size_t perfetto_shmem_size_hint = get_env("ROCPROF_PERFETTO_SHMEM_SIZE_HINT_KB", 64);
|
||||
size_t perfetto_buffer_size = get_env("ROCPROF_PERFETTO_BUFFER_SIZE_KB", 1024000);
|
||||
std::string output_path = get_env("ROCPROF_OUTPUT_PATH", fs::current_path().string());
|
||||
std::string output_file = get_env("ROCPROF_OUTPUT_FILE_NAME", std::to_string(getpid()));
|
||||
std::string tmp_directory = get_env("ROCPROF_TMPDIR", output_path);
|
||||
@@ -87,6 +90,9 @@ struct config
|
||||
get_env("ROCPROF_KERNEL_FILTER_INCLUDE_REGEX", std::string{".*"});
|
||||
std::string kernel_filter_exclude =
|
||||
get_env("ROCPROF_KERNEL_FILTER_EXCLUDE_REGEX", std::string{});
|
||||
std::string perfetto_buffer_fill_policy =
|
||||
get_env("ROCPROF_PERFETTO_BUFFER_FILL_POLICY", std::string{"discard"});
|
||||
std::string perfetto_backend = get_env("ROCPROF_PERFETTO_BACKEND", std::string{"inprocess"});
|
||||
std::unordered_set<uint32_t> kernel_filter_range = {};
|
||||
std::set<std::string> counters = {};
|
||||
};
|
||||
|
||||
@@ -101,8 +101,8 @@ struct csv_encoder
|
||||
|
||||
using api_csv_encoder = csv_encoder<7>;
|
||||
using agent_info_csv_encoder = csv_encoder<53>;
|
||||
using kernel_trace_csv_encoder = csv_encoder<16>;
|
||||
using counter_collection_csv_encoder = csv_encoder<15>;
|
||||
using kernel_trace_csv_encoder = csv_encoder<18>;
|
||||
using counter_collection_csv_encoder = csv_encoder<16>;
|
||||
using memory_copy_csv_encoder = csv_encoder<7>;
|
||||
using marker_csv_encoder = csv_encoder<7>;
|
||||
using list_basic_metrics_csv_encoder = csv_encoder<5>;
|
||||
|
||||
@@ -287,6 +287,8 @@ generate_csv(tool_table*
|
||||
{"Kind",
|
||||
"Agent_Id",
|
||||
"Queue_Id",
|
||||
"Thread_Id",
|
||||
"Dispatch_Id",
|
||||
"Kernel_Id",
|
||||
"Kernel_Name",
|
||||
"Correlation_Id",
|
||||
@@ -304,12 +306,15 @@ generate_csv(tool_table*
|
||||
for(const auto& record : data)
|
||||
{
|
||||
auto row_ss = std::stringstream{};
|
||||
auto kernel_name = tool_functions->tool_get_kernel_name_fn(record.dispatch_info.kernel_id);
|
||||
auto kernel_name = tool_functions->tool_get_kernel_name_fn(
|
||||
record.dispatch_info.kernel_id, record.correlation_id.external.value);
|
||||
rocprofiler::tool::csv::kernel_trace_csv_encoder::write_row(
|
||||
row_ss,
|
||||
tool_functions->tool_get_domain_name_fn(record.kind),
|
||||
tool_functions->tool_get_agent_node_id_fn(record.dispatch_info.agent_id),
|
||||
record.dispatch_info.queue_id.handle,
|
||||
record.thread_id,
|
||||
record.dispatch_info.dispatch_id,
|
||||
record.dispatch_info.kernel_id,
|
||||
kernel_name,
|
||||
record.correlation_id.internal,
|
||||
@@ -542,6 +547,7 @@ generate_csv(tool_table* too
|
||||
"Process_Id",
|
||||
"Thread_Id",
|
||||
"Grid_Size",
|
||||
"Kernel_Id",
|
||||
"Kernel_Name",
|
||||
"Workgroup_Size",
|
||||
"LDS_Block_Size",
|
||||
@@ -583,7 +589,8 @@ generate_csv(tool_table* too
|
||||
getpid(),
|
||||
record.thread_id,
|
||||
magnitude(record.dispatch_data.dispatch_info.grid_size),
|
||||
tool_functions->tool_get_kernel_name_fn(kernel_id),
|
||||
record.dispatch_data.dispatch_info.kernel_id,
|
||||
tool_functions->tool_get_kernel_name_fn(kernel_id, correlation_id.external.value),
|
||||
magnitude(record.dispatch_data.dispatch_info.workgroup_size),
|
||||
record.lds_block_size_v,
|
||||
record.dispatch_data.dispatch_info.private_segment_size,
|
||||
|
||||
@@ -25,6 +25,8 @@
|
||||
#include "helper.hpp"
|
||||
#include "output_file.hpp"
|
||||
|
||||
#include "lib/common/string_entry.hpp"
|
||||
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
#include <rocprofiler-sdk/marker/api_id.h>
|
||||
|
||||
@@ -90,6 +92,27 @@ write_json(tool_table* tool
|
||||
json_ar(cereal::make_nvp("buffer_records", buffer_name_info));
|
||||
json_ar(cereal::make_nvp("marker_api", marker_msg_data));
|
||||
|
||||
{
|
||||
auto _extern_corr_id_strings = std::map<size_t, std::string>{};
|
||||
if(tool::get_config().kernel_rename)
|
||||
{
|
||||
for(auto itr : *kernel_dispatch_deque)
|
||||
{
|
||||
auto _value = itr.correlation_id.external.value;
|
||||
if(_value > 0)
|
||||
{
|
||||
const auto* _str = common::get_string_entry(_value);
|
||||
if(_str) _extern_corr_id_strings.emplace(_value, *_str);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
json_ar.setNextName("correlation_id");
|
||||
json_ar.startNode();
|
||||
json_ar(cereal::make_nvp("external", _extern_corr_id_strings));
|
||||
json_ar.finishNode();
|
||||
}
|
||||
|
||||
{
|
||||
json_ar.setNextName("counters");
|
||||
json_ar.startNode();
|
||||
|
||||
@@ -25,6 +25,7 @@
|
||||
#include "output_file.hpp"
|
||||
|
||||
#include "lib/common/utility.hpp"
|
||||
#include "lib/rocprofiler-sdk-tool/config.hpp"
|
||||
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
#include <rocprofiler-sdk/marker/api_id.h>
|
||||
@@ -83,20 +84,37 @@ write_perfetto(
|
||||
auto cfg = ::perfetto::TraceConfig{};
|
||||
|
||||
// environment settings
|
||||
auto shmem_size_hint = size_t{64};
|
||||
auto buffer_size_kb = size_t{1024000};
|
||||
auto shmem_size_hint = get_config().perfetto_shmem_size_hint;
|
||||
auto buffer_size_kb = get_config().perfetto_buffer_size;
|
||||
|
||||
auto* buffer_config = cfg.add_buffers();
|
||||
buffer_config->set_size_kb(buffer_size_kb);
|
||||
buffer_config->set_fill_policy(
|
||||
::perfetto::protos::gen::TraceConfig_BufferConfig_FillPolicy_DISCARD);
|
||||
|
||||
if(get_config().perfetto_buffer_fill_policy == "discard" ||
|
||||
get_config().perfetto_buffer_fill_policy.empty())
|
||||
buffer_config->set_fill_policy(
|
||||
::perfetto::protos::gen::TraceConfig_BufferConfig_FillPolicy_DISCARD);
|
||||
else if(get_config().perfetto_buffer_fill_policy == "ring_buffer")
|
||||
buffer_config->set_fill_policy(
|
||||
::perfetto::protos::gen::TraceConfig_BufferConfig_FillPolicy_RING_BUFFER);
|
||||
else
|
||||
ROCP_FATAL << "Unsupport perfetto buffer fill policy: '"
|
||||
<< get_config().perfetto_buffer_fill_policy
|
||||
<< "'. Supported: discard, ring_buffer";
|
||||
|
||||
auto* ds_cfg = cfg.add_data_sources()->mutable_config();
|
||||
ds_cfg->set_name("track_event"); // this MUST be track_event
|
||||
ds_cfg->set_track_event_config_raw(track_event_cfg.SerializeAsString());
|
||||
|
||||
args.shmem_size_hint_kb = shmem_size_hint;
|
||||
args.backends |= ::perfetto::kInProcessBackend;
|
||||
|
||||
if(get_config().perfetto_backend == "inprocess" || get_config().perfetto_backend.empty())
|
||||
args.backends |= ::perfetto::kInProcessBackend;
|
||||
else if(get_config().perfetto_backend == "system")
|
||||
args.backends |= ::perfetto::kSystemBackend;
|
||||
else
|
||||
ROCP_FATAL << "Unsupport perfetto backend: '" << get_config().perfetto_backend
|
||||
<< "'. Supported: inprocess, system";
|
||||
|
||||
::perfetto::Tracing::Initialize(args);
|
||||
::perfetto::TrackEvent::Register();
|
||||
@@ -175,13 +193,15 @@ write_perfetto(
|
||||
for(auto titr : itr.second)
|
||||
{
|
||||
auto _namess = std::stringstream{};
|
||||
_namess << "COPY to [" << _agent->logical_node_id << "] THREAD ["
|
||||
<< thread_indexes.at(titr) << "]";
|
||||
_namess << "COPY to AGENT [" << _agent->logical_node_id << "] THREAD ["
|
||||
<< thread_indexes.at(titr) << "] ";
|
||||
|
||||
if(_agent->type == ROCPROFILER_AGENT_TYPE_CPU)
|
||||
_namess << " CPU";
|
||||
_namess << "(CPU)";
|
||||
else if(_agent->type == ROCPROFILER_AGENT_TYPE_GPU)
|
||||
_namess << " GPU";
|
||||
_namess << "(GPU)";
|
||||
else
|
||||
_namess << "(UNK)";
|
||||
|
||||
auto _track = ::perfetto::Track{get_hash_id(_namess.str())};
|
||||
auto _desc = _track.Serialize();
|
||||
@@ -201,12 +221,15 @@ write_perfetto(
|
||||
const auto* _agent = _get_agent(aitr.first);
|
||||
|
||||
auto _namess = std::stringstream{};
|
||||
_namess << "COMPUTE [" << _agent->logical_node_id << "] QUEUE [" << nqueue++ << "] ";
|
||||
_namess << "COMPUTE AGENT [" << _agent->logical_node_id << "] QUEUE [" << nqueue++
|
||||
<< "] ";
|
||||
|
||||
if(_agent->type == ROCPROFILER_AGENT_TYPE_CPU)
|
||||
_namess << "CPU";
|
||||
_namess << "(CPU)";
|
||||
else if(_agent->type == ROCPROFILER_AGENT_TYPE_GPU)
|
||||
_namess << "GPU";
|
||||
_namess << "(GPU)";
|
||||
else
|
||||
_namess << "(UNK)";
|
||||
|
||||
auto _track = ::perfetto::Track{get_hash_id(_namess.str())};
|
||||
auto _desc = _track.Serialize();
|
||||
@@ -451,9 +474,9 @@ write_perfetto(
|
||||
const auto* _agent = _get_agent(mitr.first);
|
||||
|
||||
if(_agent->type == ROCPROFILER_AGENT_TYPE_CPU)
|
||||
_track_name << "COPY BYTES to [" << _agent->logical_node_id << "] CPU";
|
||||
_track_name << "COPY BYTES to AGENT [" << _agent->logical_node_id << "] (CPU)";
|
||||
else if(_agent->type == ROCPROFILER_AGENT_TYPE_GPU)
|
||||
_track_name << "COPY BYTES to [" << _agent->logical_node_id << "] GPU";
|
||||
_track_name << "COPY BYTES to AGENT [" << _agent->logical_node_id << "] (GPU)";
|
||||
|
||||
constexpr auto _unit = ::perfetto::CounterTrack::Unit::UNIT_SIZE_BYTES;
|
||||
auto& _name = mem_cpy_cnt_names.emplace_back(_track_name.str());
|
||||
|
||||
@@ -333,7 +333,7 @@ using scratch_memory_buffered_output_t =
|
||||
|
||||
using tool_get_agent_node_id_fn_t = uint64_t (*)(rocprofiler_agent_id_t);
|
||||
using tool_get_app_timestamps_fn_t = timestamps_t* (*) ();
|
||||
using tool_get_kernel_name_fn_t = std::string_view (*)(uint64_t);
|
||||
using tool_get_kernel_name_fn_t = std::string_view (*)(uint64_t, uint64_t);
|
||||
using tool_get_domain_name_fn_t = std::string_view (*)(rocprofiler_buffer_tracing_kind_t);
|
||||
using tool_get_operation_name_fn_t = std::string_view (*)(rocprofiler_buffer_tracing_kind_t,
|
||||
rocprofiler_tracing_operation_t);
|
||||
|
||||
@@ -34,7 +34,10 @@
|
||||
#include "lib/common/environment.hpp"
|
||||
#include "lib/common/filesystem.hpp"
|
||||
#include "lib/common/logging.hpp"
|
||||
#include "lib/common/scope_destructor.hpp"
|
||||
#include "lib/common/string_entry.hpp"
|
||||
#include "lib/common/synchronized.hpp"
|
||||
#include "lib/common/units.hpp"
|
||||
#include "lib/common/utility.hpp"
|
||||
|
||||
#include <rocprofiler-sdk/agent.h>
|
||||
@@ -181,8 +184,10 @@ using targeted_kernels_map_t =
|
||||
std::unordered_map<rocprofiler_kernel_id_t, std::unordered_set<uint32_t>>;
|
||||
using counter_dimension_info_map_t =
|
||||
std::unordered_map<uint64_t, std::vector<rocprofiler_record_dimension_info_t>>;
|
||||
using agent_info_map_t = std::unordered_map<rocprofiler_agent_id_t, rocprofiler_agent_t>;
|
||||
using kernel_iteration_t = std::unordered_map<rocprofiler_kernel_id_t, uint32_t>;
|
||||
using agent_info_map_t = std::unordered_map<rocprofiler_agent_id_t, rocprofiler_agent_t>;
|
||||
using kernel_iteration_t = std::unordered_map<rocprofiler_kernel_id_t, uint32_t>;
|
||||
using kernel_rename_map_t = std::unordered_map<uint64_t, uint64_t>;
|
||||
using kernel_rename_stack_t = std::stack<uint64_t>;
|
||||
|
||||
auto code_obj_data = as_pointer<common::Synchronized<code_object_data_map_t, true>>();
|
||||
auto* kernel_data = as_pointer<common::Synchronized<kernel_symbol_data_map_t, true>>();
|
||||
@@ -196,14 +201,20 @@ auto* tool_functions = as_pointer(tool_table{});
|
||||
auto* stats_timestamp = as_pointer(timestamps_t{});
|
||||
auto kernel_iteration = common::Synchronized<kernel_iteration_t, true>{};
|
||||
|
||||
thread_local auto thread_dispatch_rename = as_pointer<kernel_rename_stack_t>();
|
||||
thread_local auto thread_dispatch_rename_dtor = common::scope_destructor{[]() {
|
||||
delete thread_dispatch_rename;
|
||||
thread_dispatch_rename = nullptr;
|
||||
}};
|
||||
|
||||
bool
|
||||
add_kernel_target(uint64_t _kern_id, const std::unordered_set<uint32_t>& range)
|
||||
{
|
||||
return target_kernels
|
||||
.wlock(
|
||||
[](targeted_kernels_map_t& _targets_v,
|
||||
uint64_t _kern_id_v,
|
||||
std::unordered_set<uint32_t> _range) {
|
||||
[](targeted_kernels_map_t& _targets_v,
|
||||
uint64_t _kern_id_v,
|
||||
const std::unordered_set<uint32_t>& _range) {
|
||||
return _targets_v.emplace(_kern_id_v, _range);
|
||||
},
|
||||
_kern_id,
|
||||
@@ -214,42 +225,33 @@ add_kernel_target(uint64_t _kern_id, const std::unordered_set<uint32_t>& range)
|
||||
bool
|
||||
is_targeted_kernel(uint64_t _kern_id)
|
||||
{
|
||||
bool is_target_kernel = false;
|
||||
std::unordered_set<uint32_t> range = {};
|
||||
is_target_kernel = target_kernels.rlock(
|
||||
[&range](const auto& _targets_v, uint64_t _kern_id_v) {
|
||||
if(_targets_v.find(_kern_id_v) != _targets_v.end())
|
||||
{
|
||||
range = _targets_v.at(_kern_id_v);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
const std::unordered_set<uint32_t>* range = target_kernels.rlock(
|
||||
[](const auto& _targets_v, uint64_t _kern_id_v) -> const std::unordered_set<uint32_t>* {
|
||||
if(_targets_v.find(_kern_id_v) != _targets_v.end()) return &_targets_v.at(_kern_id_v);
|
||||
return nullptr;
|
||||
},
|
||||
_kern_id);
|
||||
|
||||
if(is_target_kernel)
|
||||
if(range)
|
||||
{
|
||||
kernel_iteration.rlock(
|
||||
[&](const auto& _kernel_iter,
|
||||
uint64_t _kernel_id,
|
||||
std::unordered_set<uint32_t> _range) {
|
||||
return kernel_iteration.rlock(
|
||||
[](const auto& _kernel_iter,
|
||||
uint64_t _kernel_id,
|
||||
const std::unordered_set<uint32_t>& _range) {
|
||||
auto itr = _kernel_iter.at(_kernel_id);
|
||||
|
||||
// If the iteration range is not given then all iterations of the kernel is profiled
|
||||
if(_range.empty())
|
||||
is_target_kernel = true;
|
||||
|
||||
return true;
|
||||
else if(_range.find(itr) != _range.end())
|
||||
{
|
||||
is_target_kernel = true;
|
||||
}
|
||||
else
|
||||
is_target_kernel = false;
|
||||
return true;
|
||||
return false;
|
||||
},
|
||||
_kern_id,
|
||||
range);
|
||||
*range);
|
||||
}
|
||||
return is_target_kernel;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
auto&
|
||||
@@ -295,6 +297,26 @@ get_roctx_msg(uint64_t cid)
|
||||
cid);
|
||||
}
|
||||
|
||||
int
|
||||
set_kernel_rename_correlation_id(rocprofiler_thread_id_t thr_id,
|
||||
rocprofiler_context_id_t ctx_id,
|
||||
rocprofiler_external_correlation_id_request_kind_t kind,
|
||||
rocprofiler_tracing_operation_t op,
|
||||
uint64_t internal_corr_id,
|
||||
rocprofiler_user_data_t* external_corr_id,
|
||||
void* user_data)
|
||||
{
|
||||
ROCP_FATAL_IF(kind != ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_KERNEL_DISPATCH)
|
||||
<< "unexpected kind: " << kind;
|
||||
|
||||
if(thread_dispatch_rename != nullptr && !thread_dispatch_rename->empty())
|
||||
external_corr_id->value = thread_dispatch_rename->top();
|
||||
|
||||
common::consume_args(thr_id, ctx_id, kind, op, internal_corr_id, user_data);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
cntrl_tracing_callback(rocprofiler_callback_tracing_record_t record,
|
||||
rocprofiler_user_data_t* user_data,
|
||||
@@ -337,6 +359,45 @@ cntrl_tracing_callback(rocprofiler_callback_tracing_record_t record,
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
kernel_rename_callback(rocprofiler_callback_tracing_record_t record,
|
||||
rocprofiler_user_data_t* user_data,
|
||||
void* data)
|
||||
{
|
||||
if(!rocprofiler::tool::get_config().kernel_rename || thread_dispatch_rename == nullptr) return;
|
||||
|
||||
if(record.kind == ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API)
|
||||
{
|
||||
auto* marker_data =
|
||||
static_cast<rocprofiler_callback_tracing_marker_api_data_t*>(record.payload);
|
||||
|
||||
if(record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxMarkA &&
|
||||
record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT && marker_data->args.roctxMarkA.message)
|
||||
{
|
||||
thread_dispatch_rename->emplace(
|
||||
common::add_string_entry(marker_data->args.roctxMarkA.message));
|
||||
}
|
||||
else if(record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxRangePushA &&
|
||||
record.phase == ROCPROFILER_CALLBACK_PHASE_EXIT &&
|
||||
marker_data->args.roctxRangePushA.message)
|
||||
{
|
||||
thread_dispatch_rename->emplace(
|
||||
common::add_string_entry(marker_data->args.roctxRangePushA.message));
|
||||
}
|
||||
else if(record.operation == ROCPROFILER_MARKER_CORE_API_ID_roctxRangePop &&
|
||||
record.phase == ROCPROFILER_CALLBACK_PHASE_ENTER)
|
||||
{
|
||||
ROCP_FATAL_IF(thread_dispatch_rename->empty())
|
||||
<< "roctxRangePop invoked more times than roctxRangePush on thread "
|
||||
<< rocprofiler::common::get_tid();
|
||||
|
||||
thread_dispatch_rename->pop();
|
||||
}
|
||||
}
|
||||
|
||||
common::consume_args(user_data, data);
|
||||
}
|
||||
|
||||
void
|
||||
callback_tracing_callback(rocprofiler_callback_tracing_record_t record,
|
||||
rocprofiler_user_data_t* user_data,
|
||||
@@ -485,8 +546,6 @@ callback_tracing_callback(rocprofiler_callback_tracing_record_t record,
|
||||
}
|
||||
}
|
||||
|
||||
(void) record;
|
||||
(void) user_data;
|
||||
(void) data;
|
||||
}
|
||||
|
||||
@@ -544,9 +603,14 @@ code_object_tracing_callback(rocprofiler_callback_tracing_record_t record,
|
||||
std::regex exclude_regex(kernel_filter_exclude);
|
||||
if(std::regex_search(kernel_info.formatted_kernel_name, include_regex))
|
||||
{
|
||||
if(kernel_filter_exclude.empty())
|
||||
if(kernel_filter_exclude.empty() ||
|
||||
!std::regex_search(kernel_info.formatted_kernel_name, exclude_regex))
|
||||
add_kernel_target(sym_data->kernel_id, kernel_filter_range);
|
||||
else if(!std::regex_search(kernel_info.formatted_kernel_name, exclude_regex))
|
||||
}
|
||||
else
|
||||
{
|
||||
if(kernel_filter_exclude.empty() ||
|
||||
!std::regex_search(kernel_info.formatted_kernel_name, exclude_regex))
|
||||
add_kernel_target(sym_data->kernel_id, kernel_filter_range);
|
||||
}
|
||||
}
|
||||
@@ -558,8 +622,13 @@ code_object_tracing_callback(rocprofiler_callback_tracing_record_t record,
|
||||
}
|
||||
|
||||
std::string_view
|
||||
get_kernel_name(uint64_t kernel_id)
|
||||
get_kernel_name(uint64_t kernel_id, uint64_t rename_id)
|
||||
{
|
||||
if(rename_id > 0)
|
||||
{
|
||||
if(const auto* _name = common::get_string_entry(rename_id)) return std::string_view{*_name};
|
||||
}
|
||||
|
||||
return CHECK_NOTNULL(kernel_data)->rlock([kernel_id](const auto& _data) -> std::string_view {
|
||||
return _data.at(kernel_id).formatted_kernel_name;
|
||||
});
|
||||
@@ -1180,8 +1249,8 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
|
||||
{
|
||||
client_finalizer = fini_func;
|
||||
|
||||
constexpr uint64_t buffer_size = 4096;
|
||||
constexpr uint64_t buffer_watermark = 4096;
|
||||
const uint64_t buffer_size = 32 * common::units::get_page_size();
|
||||
const uint64_t buffer_watermark = 31 * common::units::get_page_size();
|
||||
|
||||
rocprofiler_get_timestamp(&(stats_timestamp->app_start_time));
|
||||
|
||||
@@ -1362,6 +1431,40 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
|
||||
"Could not setup counting service");
|
||||
}
|
||||
|
||||
if(tool::get_config().kernel_rename)
|
||||
{
|
||||
auto rename_ctx = rocprofiler_context_id_t{};
|
||||
auto marker_core_api_kinds = std::array<rocprofiler_tracing_operation_t, 3>{
|
||||
ROCPROFILER_MARKER_CORE_API_ID_roctxMarkA,
|
||||
ROCPROFILER_MARKER_CORE_API_ID_roctxRangePushA,
|
||||
ROCPROFILER_MARKER_CORE_API_ID_roctxRangePop};
|
||||
|
||||
ROCPROFILER_CALL(rocprofiler_create_context(&rename_ctx), "failed to create context");
|
||||
|
||||
ROCPROFILER_CALL(rocprofiler_configure_callback_tracing_service(
|
||||
rename_ctx,
|
||||
ROCPROFILER_CALLBACK_TRACING_MARKER_CORE_API,
|
||||
marker_core_api_kinds.data(),
|
||||
marker_core_api_kinds.size(),
|
||||
kernel_rename_callback,
|
||||
nullptr),
|
||||
"callback tracing service failed to configure");
|
||||
|
||||
ROCPROFILER_CALL(rocprofiler_start_context(rename_ctx), "start context failed");
|
||||
|
||||
auto external_corr_id_request_kinds =
|
||||
std::array<rocprofiler_external_correlation_id_request_kind_t, 1>{
|
||||
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_KERNEL_DISPATCH};
|
||||
|
||||
ROCPROFILER_CALL(rocprofiler_configure_external_correlation_id_request_service(
|
||||
get_client_ctx(),
|
||||
external_corr_id_request_kinds.data(),
|
||||
external_corr_id_request_kinds.size(),
|
||||
set_kernel_rename_correlation_id,
|
||||
nullptr),
|
||||
"Could not configure external correlation id request service");
|
||||
}
|
||||
|
||||
for(auto itr : get_buffers().as_array())
|
||||
{
|
||||
if(itr.handle > 0)
|
||||
|
||||
@@ -206,8 +206,8 @@ run(int rank, int tid, int devid, int argc, char** argv)
|
||||
int* in = nullptr;
|
||||
int* out = nullptr;
|
||||
|
||||
HIP_API_CALL(hipMalloc(&in, size));
|
||||
HIP_API_CALL(hipMalloc(&out, size));
|
||||
HIP_API_CALL(hipMallocAsync(&in, size, stream));
|
||||
HIP_API_CALL(hipMallocAsync(&out, size, stream));
|
||||
HIP_API_CALL(hipMemsetAsync(in, 0, size, stream));
|
||||
HIP_API_CALL(hipMemsetAsync(out, 0, size, stream));
|
||||
HIP_API_CALL(hipMemcpyAsync(in, inp_matrix, size, hipMemcpyHostToDevice, stream));
|
||||
@@ -238,13 +238,15 @@ run(int rank, int tid, int devid, int argc, char** argv)
|
||||
print_lock.unlock();
|
||||
|
||||
HIP_API_CALL(hipStreamSynchronize(stream));
|
||||
HIP_API_CALL(hipStreamDestroy(stream));
|
||||
|
||||
// cpu_transpose(matrix, out_matrix, M, N);
|
||||
verify(inp_matrix, out_matrix, M, N);
|
||||
|
||||
HIP_API_CALL(hipFree(in));
|
||||
HIP_API_CALL(hipFree(out));
|
||||
HIP_API_CALL(hipFreeAsync(in, stream));
|
||||
HIP_API_CALL(hipFreeAsync(out, stream));
|
||||
|
||||
HIP_API_CALL(hipStreamSynchronize(stream));
|
||||
HIP_API_CALL(hipStreamDestroy(stream));
|
||||
|
||||
delete[] inp_matrix;
|
||||
delete[] out_matrix;
|
||||
|
||||
@@ -28,3 +28,4 @@ add_subdirectory(tracing-plus-counter-collection)
|
||||
add_subdirectory(tracing-hip-in-libraries)
|
||||
add_subdirectory(counter-collection)
|
||||
add_subdirectory(hsa-queue-dependency)
|
||||
add_subdirectory(kernel-rename)
|
||||
|
||||
@@ -0,0 +1,114 @@
|
||||
#
|
||||
# rocprofv3 tool tests for kernel renaming
|
||||
#
|
||||
cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR)
|
||||
|
||||
project(
|
||||
rocprofiler-tests-rocprofv3-kernel-rename
|
||||
LANGUAGES CXX
|
||||
VERSION 0.0.0)
|
||||
|
||||
find_package(rocprofiler-sdk REQUIRED)
|
||||
|
||||
if(ROCPROFILER_MEMCHECK STREQUAL "LeakSanitizer")
|
||||
set(LOG_LEVEL "warning") # info produces memory leak
|
||||
else()
|
||||
set(LOG_LEVEL "info")
|
||||
endif()
|
||||
|
||||
string(REPLACE "LD_PRELOAD=" "ROCPROF_PRELOAD=" PRELOAD_ENV
|
||||
"${ROCPROFILER_MEMCHECK_PRELOAD_ENV}")
|
||||
|
||||
set(kernel-rename-env "${PRELOAD_ENV}")
|
||||
|
||||
rocprofiler_configure_pytest_files(CONFIG pytest.ini input-kernel-rename.yml
|
||||
COPY validate.py conftest.py)
|
||||
|
||||
##########################################################################################
|
||||
#
|
||||
# Command line input
|
||||
#
|
||||
##########################################################################################
|
||||
|
||||
add_test(
|
||||
NAME rocprofv3-test-kernel-rename-cmd-line-execute
|
||||
COMMAND
|
||||
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> -M --sys-trace no --hsa-trace=0
|
||||
--hsa-core-trace=1 --hip-compiler-trace False --hip-runtime-trace --kernel-trace
|
||||
--memory-copy-trace -d ${CMAKE_CURRENT_BINARY_DIR}/%argt%-trace-cmd-line -o out
|
||||
--output-format pftrace json --log-level env --kernel-rename
|
||||
--perfetto-shmem-size-hint 128 --perfetto-buffer-size 2048000
|
||||
--perfetto-buffer-fill-policy ring_buffer --perfetto-backend inprocess --
|
||||
$<TARGET_FILE:transpose>)
|
||||
|
||||
set_tests_properties(
|
||||
rocprofv3-test-kernel-rename-cmd-line-execute
|
||||
PROPERTIES TIMEOUT 45 LABELS "integration-tests" ENVIRONMENT "${kernel-rename-env}"
|
||||
FAIL_REGULAR_EXPRESSION "${ROCPROFILER_DEFAULT_FAIL_REGEX}")
|
||||
|
||||
add_test(
|
||||
NAME rocprofv3-test-kernel-rename-cmd-line-validate
|
||||
COMMAND
|
||||
${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/validate.py --json-input
|
||||
${CMAKE_CURRENT_BINARY_DIR}/transpose-trace-cmd-line/out_results.json
|
||||
--pftrace-input
|
||||
${CMAKE_CURRENT_BINARY_DIR}/transpose-trace-cmd-line/out_results.pftrace)
|
||||
|
||||
set(VALIDATION_FILES
|
||||
${CMAKE_CURRENT_BINARY_DIR}/transpose-trace-cmd-line/out_results.pftrace
|
||||
${CMAKE_CURRENT_BINARY_DIR}/transpose-trace-cmd-line/out_results.json)
|
||||
|
||||
set_tests_properties(
|
||||
rocprofv3-test-kernel-rename-cmd-line-validate
|
||||
PROPERTIES TIMEOUT
|
||||
45
|
||||
LABELS
|
||||
"integration-tests"
|
||||
DEPENDS
|
||||
"rocprofv3-test-kernel-rename-execute"
|
||||
FAIL_REGULAR_EXPRESSION
|
||||
"AssertionError"
|
||||
ATTACHED_FILES_ON_FAIL
|
||||
"${VALIDATION_FILES}")
|
||||
|
||||
##########################################################################################
|
||||
#
|
||||
# YAML input
|
||||
#
|
||||
##########################################################################################
|
||||
|
||||
add_test(
|
||||
NAME rocprofv3-test-kernel-rename-inp-yaml-execute
|
||||
COMMAND
|
||||
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> -i
|
||||
${CMAKE_CURRENT_BINARY_DIR}/input-kernel-rename.yml -- $<TARGET_FILE:transpose>)
|
||||
|
||||
set_tests_properties(
|
||||
rocprofv3-test-kernel-rename-inp-yaml-execute
|
||||
PROPERTIES TIMEOUT 45 LABELS "integration-tests" ENVIRONMENT "${kernel-rename-env}"
|
||||
FAIL_REGULAR_EXPRESSION "${ROCPROFILER_DEFAULT_FAIL_REGEX}")
|
||||
|
||||
add_test(
|
||||
NAME rocprofv3-test-kernel-rename-inp-yaml-validate
|
||||
COMMAND
|
||||
${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/validate.py --json-input
|
||||
${CMAKE_CURRENT_BINARY_DIR}/transpose-trace-inp-yaml/out_results.json
|
||||
--pftrace-input
|
||||
${CMAKE_CURRENT_BINARY_DIR}/transpose-trace-inp-yaml/out_results.pftrace)
|
||||
|
||||
set(VALIDATION_FILES
|
||||
${CMAKE_CURRENT_BINARY_DIR}/transpose-trace-inp-yaml/out_results.pftrace
|
||||
${CMAKE_CURRENT_BINARY_DIR}/transpose-trace-inp-yaml/out_results.json)
|
||||
|
||||
set_tests_properties(
|
||||
rocprofv3-test-kernel-rename-inp-yaml-validate
|
||||
PROPERTIES TIMEOUT
|
||||
45
|
||||
LABELS
|
||||
"integration-tests"
|
||||
DEPENDS
|
||||
"rocprofv3-test-kernel-rename-inp-yaml-execute"
|
||||
FAIL_REGULAR_EXPRESSION
|
||||
"AssertionError"
|
||||
ATTACHED_FILES_ON_FAIL
|
||||
"${VALIDATION_FILES}")
|
||||
@@ -0,0 +1,34 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import pytest
|
||||
import json
|
||||
|
||||
from rocprofiler_sdk.pytest_utils.dotdict import dotdict
|
||||
from rocprofiler_sdk.pytest_utils import collapse_dict_list
|
||||
from rocprofiler_sdk.pytest_utils.perfetto_reader import PerfettoReader
|
||||
|
||||
|
||||
def pytest_addoption(parser):
|
||||
parser.addoption(
|
||||
"--json-input",
|
||||
action="store",
|
||||
help="Path to JSON file.",
|
||||
)
|
||||
parser.addoption(
|
||||
"--pftrace-input",
|
||||
action="store",
|
||||
help="Path to Perfetto trace file.",
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def json_data(request):
|
||||
filename = request.config.getoption("--json-input")
|
||||
with open(filename, "r") as inp:
|
||||
return dotdict(collapse_dict_list(json.load(inp)))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def pftrace_data(request):
|
||||
filename = request.config.getoption("--pftrace-input")
|
||||
return PerfettoReader(filename).read()[0]
|
||||
@@ -0,0 +1,20 @@
|
||||
jobs:
|
||||
- mangled_kernels: True
|
||||
sys_trace: False
|
||||
hsa_trace: False
|
||||
hsa_core_trace: True
|
||||
hsa_amd_trace: False
|
||||
hip_compiler_trace: False
|
||||
hip_runtime_trace: True
|
||||
kernel_trace: True
|
||||
memory_copy_trace: True
|
||||
marker_trace: False
|
||||
output_directory: "@CMAKE_CURRENT_BINARY_DIR@/%argt%-trace-inp-yaml"
|
||||
output_file: out
|
||||
output_format: [pftrace, json]
|
||||
log_level: env
|
||||
kernel_rename: True
|
||||
perfetto_shmem_size_hint: 128
|
||||
perfetto_buffer_size: 2048000
|
||||
perfetto_buffer_fill_policy: ring_buffer
|
||||
perfetto_backend: inprocess
|
||||
@@ -0,0 +1,5 @@
|
||||
|
||||
[pytest]
|
||||
addopts = --durations=20 -rA -s -vv
|
||||
testpaths = validate.py
|
||||
pythonpath = @ROCPROFILER_SDK_TESTS_BINARY_DIR@/pytest-packages
|
||||
@@ -0,0 +1,148 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
import pytest
|
||||
|
||||
|
||||
def test_hsa_api_trace(json_data):
|
||||
data = json_data["rocprofiler-sdk-tool"]
|
||||
|
||||
def get_operation_name(kind_id, op_id):
|
||||
return data["strings"]["buffer_records"][kind_id]["operations"][op_id]
|
||||
|
||||
def get_kind_name(kind_id):
|
||||
return data["strings"]["buffer_records"][kind_id]["kind"]
|
||||
|
||||
valid_domain_names = ("HSA_CORE_API",)
|
||||
|
||||
hsa_api_data = data["buffer_records"]["hsa_api"]
|
||||
|
||||
functions = []
|
||||
for api in hsa_api_data:
|
||||
kind = get_kind_name(api["kind"])
|
||||
assert kind in valid_domain_names
|
||||
assert api["end_timestamp"] >= api["start_timestamp"]
|
||||
functions.append(get_operation_name(api["kind"], api["operation"]))
|
||||
|
||||
functions = list(set(functions))
|
||||
assert "hsa_amd_memory_async_copy_on_engine" not in functions
|
||||
assert "hsa_signal_destroy" in functions
|
||||
|
||||
|
||||
def test_hip_api_trace(json_data):
|
||||
data = json_data["rocprofiler-sdk-tool"]
|
||||
|
||||
def get_operation_name(kind_id, op_id):
|
||||
return data["strings"]["buffer_records"][kind_id]["operations"][op_id]
|
||||
|
||||
def get_kind_name(kind_id):
|
||||
return data["strings"]["buffer_records"][kind_id]["kind"]
|
||||
|
||||
valid_domain_names = ("HIP_RUNTIME_API",)
|
||||
|
||||
hip_api_data = data["buffer_records"]["hip_api"]
|
||||
|
||||
functions = []
|
||||
for api in hip_api_data:
|
||||
kind = get_kind_name(api["kind"])
|
||||
assert kind in valid_domain_names
|
||||
assert api["end_timestamp"] >= api["start_timestamp"]
|
||||
functions.append(get_operation_name(api["kind"], api["operation"]))
|
||||
|
||||
functions = list(set(functions))
|
||||
for itr in (
|
||||
"__hipPushCallConfiguration",
|
||||
"__hipPopCallConfiguration",
|
||||
"__hipRegisterFatBinary",
|
||||
"__hipRegisterFunction",
|
||||
):
|
||||
assert itr not in functions, f"{itr}"
|
||||
|
||||
for itr in (
|
||||
"hipMallocAsync",
|
||||
"hipMemcpyAsync",
|
||||
"hipMemsetAsync",
|
||||
"hipFreeAsync",
|
||||
"hipLaunchKernel",
|
||||
):
|
||||
assert itr in functions, f"{itr}"
|
||||
|
||||
|
||||
def test_kernel_trace(json_data):
|
||||
data = json_data["rocprofiler-sdk-tool"]
|
||||
|
||||
def get_kernel_name(kernel_id):
|
||||
return data["kernel_symbols"][kernel_id]["formatted_kernel_name"]
|
||||
|
||||
def get_kernel_rename(corr_id):
|
||||
for itr in data.strings.correlation_id.external:
|
||||
if itr.key == corr_id:
|
||||
return itr.value
|
||||
return None
|
||||
|
||||
def get_kind_name(kind_id):
|
||||
return data["strings"]["buffer_records"][kind_id]["kind"]
|
||||
|
||||
valid_kernel_names = ("run",)
|
||||
kernel_dispatch_data = data["buffer_records"]["kernel_dispatch"]
|
||||
for dispatch in kernel_dispatch_data:
|
||||
assert get_kind_name(dispatch["kind"]) == "KERNEL_DISPATCH"
|
||||
assert dispatch["correlation_id"]["internal"] > 0
|
||||
assert dispatch["correlation_id"]["external"] > 0
|
||||
|
||||
dispatch_info = dispatch["dispatch_info"]
|
||||
assert dispatch_info["agent_id"]["handle"] > 0
|
||||
assert dispatch_info["queue_id"]["handle"] > 0
|
||||
assert dispatch_info["kernel_id"] > 0
|
||||
assert dispatch["end_timestamp"] >= dispatch["start_timestamp"]
|
||||
|
||||
kernel_name = get_kernel_name(dispatch_info["kernel_id"])
|
||||
assert kernel_name not in valid_kernel_names
|
||||
|
||||
external_corr_id = dispatch["correlation_id"]["external"]
|
||||
assert external_corr_id > 0
|
||||
|
||||
kernel_rename = get_kernel_rename(external_corr_id)
|
||||
assert kernel_rename is not None, f"{dispatch}"
|
||||
assert kernel_rename in valid_kernel_names
|
||||
|
||||
|
||||
def test_memory_copy_json_trace(json_data):
|
||||
data = json_data["rocprofiler-sdk-tool"]
|
||||
|
||||
buffer_records = data["buffer_records"]
|
||||
agent_data = data["agents"]
|
||||
memory_copy_data = buffer_records["memory_copy"]
|
||||
|
||||
def get_kind_name(kind_id):
|
||||
return data["strings"]["buffer_records"][kind_id]["kind"]
|
||||
|
||||
def get_agent(node_id):
|
||||
for agent in agent_data:
|
||||
if agent["id"]["handle"] == node_id["handle"]:
|
||||
return agent
|
||||
return None
|
||||
|
||||
assert len(memory_copy_data) == 12
|
||||
|
||||
for row in memory_copy_data:
|
||||
src_agent = get_agent(row["src_agent_id"])
|
||||
dst_agent = get_agent(row["dst_agent_id"])
|
||||
assert get_kind_name(row["kind"]) == "MEMORY_COPY"
|
||||
assert src_agent is not None, f"{row}"
|
||||
assert dst_agent is not None, f"{row}"
|
||||
assert row["correlation_id"]["internal"] > 0
|
||||
assert row["end_timestamp"] >= row["start_timestamp"]
|
||||
|
||||
|
||||
def test_perfetto_data(pftrace_data, json_data):
|
||||
import rocprofiler_sdk.tests.rocprofv3 as rocprofv3
|
||||
|
||||
rocprofv3.test_perfetto_data(
|
||||
pftrace_data, json_data, ("hip", "hsa", "kernel", "memory_copy")
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
exit_code = pytest.main(["-x", __file__] + sys.argv[1:])
|
||||
sys.exit(exit_code)
|
||||
Αναφορά σε νέο ζήτημα
Block a user