Fix HIP Streams Duplication Error (#313)
* Fix stream duplication and fixed tests * Added comments to explain stream.cpp code, change stream nullptr check to occur in update table to prevent readding null stream, simplified hip-streams bin file code, add destroyStreams to hip-streams bin file code * Removed roctx from CMakeLists.txt * Updated documentation * Fix documentation * Removed update_table for HIP compiler table and updated stream.cpp to remove support for HIP compiler table * Added runtime initialization check for HIP * Changed tool name, working on fixing memory management * Added context for counter collection kernel rename combination * Changed name from map to set and changed description * Fix documentation description for group-by-queue * Merged memory copy and kernel operations onto a single track when on the same stream * Updated perfetto output to remove hardware information from track name to merge all memory copy and kernel operations on the same stream to the same track: * Most pr comments addressed * Added filter for counter collection and removed kernel buffer tracing hack * Added PR comment fixes --------- Co-authored-by: Madsen, Jonathan <Jonathan.Madsen@amd.com>
Este commit está contenido en:
@@ -162,8 +162,10 @@ The following table lists the commonly used ``rocprofv3`` command-line options c
|
||||
| Specifies Perfetto shared memory size hint in KB. Default: 64 KB.
|
||||
|
||||
* - Display
|
||||
- | ``-L`` [BOOL] \| ``--list-avail`` [BOOL] |br|
|
||||
- | Lists the PC sampling configurations and metrics available in the counter_defs.yaml file for counter collection. In earlier ``rocprof`` versions, this was known as ``--list-basic``, ``--list-derived``, and ``--list-counters``.
|
||||
- | ``-L`` [BOOL] \| ``--list-avail`` [BOOL] |br| |br|
|
||||
| ``--group-by-queue`` [BOOL]
|
||||
- | Lists the PC sampling configurations and metrics available in the counter_defs.yaml file for counter collection. In earlier ``rocprof`` versions, this was known as ``--list-basic``, ``--list-derived``, and ``--list-counters``. |br| |br|
|
||||
| For displaying the HSA Queues that kernels and memory copy operations are submitted to rather than the default grouping of HIP Streams for perfetto.
|
||||
|
||||
* - Other
|
||||
- | ``--preload`` PRELOAD |br| |br|
|
||||
|
||||
@@ -182,10 +182,9 @@ using rocdecode_buffered_output_t =
|
||||
using rocjpeg_buffered_output_t =
|
||||
buffered_output<rocprofiler_buffer_tracing_rocjpeg_api_record_t, domain_type::ROCJPEG>;
|
||||
using kernel_dispatch_buffered_output_with_stream_t =
|
||||
buffered_output<tool_buffer_tracing_kernel_dispatch_with_stream_record_t,
|
||||
domain_type::KERNEL_DISPATCH>;
|
||||
buffered_output<tool_buffer_tracing_kernel_dispatch_ext_record_t, domain_type::KERNEL_DISPATCH>;
|
||||
using memory_copy_buffered_output_with_stream_t =
|
||||
buffered_output<tool_buffer_tracing_memory_copy_with_stream_record_t, domain_type::MEMORY_COPY>;
|
||||
buffered_output<tool_buffer_tracing_memory_copy_ext_record_t, domain_type::MEMORY_COPY>;
|
||||
using pc_sampling_stochastic_buffered_output_t =
|
||||
buffered_output<rocprofiler::tool::rocprofiler_tool_pc_sampling_stochastic_record_t,
|
||||
domain_type::PC_SAMPLING_STOCHASTIC>;
|
||||
|
||||
@@ -249,10 +249,10 @@ generate_csv(const output_config& cfg,
|
||||
}
|
||||
|
||||
void
|
||||
generate_csv(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const generator<tool_buffer_tracing_kernel_dispatch_with_stream_record_t>& data,
|
||||
const stats_entry_t& stats)
|
||||
generate_csv(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const generator<tool_buffer_tracing_kernel_dispatch_ext_record_t>& data,
|
||||
const stats_entry_t& stats)
|
||||
{
|
||||
if(data.empty()) return;
|
||||
|
||||
@@ -399,10 +399,10 @@ generate_csv(const output_config& cfg,
|
||||
}
|
||||
|
||||
void
|
||||
generate_csv(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const generator<tool_buffer_tracing_memory_copy_with_stream_record_t>& data,
|
||||
const stats_entry_t& stats)
|
||||
generate_csv(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const generator<tool_buffer_tracing_memory_copy_ext_record_t>& data,
|
||||
const stats_entry_t& stats)
|
||||
{
|
||||
if(data.empty()) return;
|
||||
|
||||
|
||||
@@ -40,10 +40,10 @@ generate_csv(const output_config& cfg,
|
||||
std::vector<agent_info>& data);
|
||||
|
||||
void
|
||||
generate_csv(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const generator<tool_buffer_tracing_kernel_dispatch_with_stream_record_t>& data,
|
||||
const stats_entry_t& stats);
|
||||
generate_csv(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const generator<tool_buffer_tracing_kernel_dispatch_ext_record_t>& data,
|
||||
const stats_entry_t& stats);
|
||||
|
||||
void
|
||||
generate_csv(const output_config& cfg,
|
||||
@@ -58,10 +58,10 @@ generate_csv(const output_config& cfg,
|
||||
const stats_entry_t& stats);
|
||||
|
||||
void
|
||||
generate_csv(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const generator<tool_buffer_tracing_memory_copy_with_stream_record_t>& data,
|
||||
const stats_entry_t& stats);
|
||||
generate_csv(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const generator<tool_buffer_tracing_memory_copy_ext_record_t>& data,
|
||||
const stats_entry_t& stats);
|
||||
|
||||
void
|
||||
generate_csv(const output_config& cfg,
|
||||
|
||||
@@ -186,11 +186,11 @@ void
|
||||
write_json(json_output& json_ar,
|
||||
const output_config& /*cfg*/,
|
||||
const metadata& /*tool_metadata*/,
|
||||
const domain_stats_vec_t& domain_stats,
|
||||
generator<rocprofiler_buffer_tracing_hip_api_ext_record_t>&& hip_api_gen,
|
||||
generator<rocprofiler_buffer_tracing_hsa_api_record_t> hsa_api_gen,
|
||||
generator<tool_buffer_tracing_kernel_dispatch_with_stream_record_t> kernel_dispatch_gen,
|
||||
generator<tool_buffer_tracing_memory_copy_with_stream_record_t> memory_copy_gen,
|
||||
const domain_stats_vec_t& domain_stats,
|
||||
generator<rocprofiler_buffer_tracing_hip_api_ext_record_t>&& hip_api_gen,
|
||||
generator<rocprofiler_buffer_tracing_hsa_api_record_t> hsa_api_gen,
|
||||
generator<tool_buffer_tracing_kernel_dispatch_ext_record_t> kernel_dispatch_gen,
|
||||
generator<tool_buffer_tracing_memory_copy_ext_record_t> memory_copy_gen,
|
||||
generator<tool_counter_record_t> counter_collection_gen,
|
||||
generator<rocprofiler_buffer_tracing_marker_api_record_t> marker_api_gen,
|
||||
generator<rocprofiler_buffer_tracing_scratch_memory_record_t> scratch_memory_gen,
|
||||
|
||||
@@ -81,14 +81,14 @@ void
|
||||
write_json(json_output&, const output_config& cfg, const metadata& tool_metadata, uint64_t pid);
|
||||
|
||||
void
|
||||
write_json(json_output& json_ar,
|
||||
const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const domain_stats_vec_t& domain_stats,
|
||||
generator<rocprofiler_buffer_tracing_hip_api_ext_record_t>&& hip_api_gen,
|
||||
generator<rocprofiler_buffer_tracing_hsa_api_record_t> hsa_api_gen,
|
||||
generator<tool_buffer_tracing_kernel_dispatch_with_stream_record_t> kernel_dispatch_gen,
|
||||
generator<tool_buffer_tracing_memory_copy_with_stream_record_t> memory_copy_gen,
|
||||
write_json(json_output& json_ar,
|
||||
const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const domain_stats_vec_t& domain_stats,
|
||||
generator<rocprofiler_buffer_tracing_hip_api_ext_record_t>&& hip_api_gen,
|
||||
generator<rocprofiler_buffer_tracing_hsa_api_record_t> hsa_api_gen,
|
||||
generator<tool_buffer_tracing_kernel_dispatch_ext_record_t> kernel_dispatch_gen,
|
||||
generator<tool_buffer_tracing_memory_copy_ext_record_t> memory_copy_gen,
|
||||
generator<tool_counter_record_t> counter_collection_gen,
|
||||
generator<rocprofiler_buffer_tracing_marker_api_record_t> marker_api_gen,
|
||||
generator<rocprofiler_buffer_tracing_scratch_memory_record_t> scratch_memory_gen,
|
||||
|
||||
@@ -356,15 +356,15 @@ create_attribute_list()
|
||||
|
||||
void
|
||||
write_otf2(
|
||||
const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
uint64_t pid,
|
||||
const std::vector<agent_info>& agent_data,
|
||||
std::deque<rocprofiler_buffer_tracing_hip_api_ext_record_t>* hip_api_data,
|
||||
std::deque<rocprofiler_buffer_tracing_hsa_api_record_t>* hsa_api_data,
|
||||
std::deque<tool_buffer_tracing_kernel_dispatch_with_stream_record_t>* kernel_dispatch_data,
|
||||
std::deque<tool_buffer_tracing_memory_copy_with_stream_record_t>* memory_copy_data,
|
||||
std::deque<rocprofiler_buffer_tracing_marker_api_record_t>* marker_api_data,
|
||||
const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
uint64_t pid,
|
||||
const std::vector<agent_info>& agent_data,
|
||||
std::deque<rocprofiler_buffer_tracing_hip_api_ext_record_t>* hip_api_data,
|
||||
std::deque<rocprofiler_buffer_tracing_hsa_api_record_t>* hsa_api_data,
|
||||
std::deque<tool_buffer_tracing_kernel_dispatch_ext_record_t>* kernel_dispatch_data,
|
||||
std::deque<tool_buffer_tracing_memory_copy_ext_record_t>* memory_copy_data,
|
||||
std::deque<rocprofiler_buffer_tracing_marker_api_record_t>* marker_api_data,
|
||||
std::deque<rocprofiler_buffer_tracing_scratch_memory_record_t>* /*scratch_memory_data*/,
|
||||
std::deque<rocprofiler_buffer_tracing_rccl_api_record_t>* rccl_api_data,
|
||||
std::deque<rocprofiler_buffer_tracing_memory_allocation_record_t>* memory_allocation_data,
|
||||
|
||||
@@ -36,19 +36,19 @@ namespace tool
|
||||
{
|
||||
void
|
||||
write_otf2(
|
||||
const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
uint64_t pid,
|
||||
const std::vector<agent_info>& agent_data,
|
||||
std::deque<rocprofiler_buffer_tracing_hip_api_ext_record_t>* hip_api_data,
|
||||
std::deque<rocprofiler_buffer_tracing_hsa_api_record_t>* hsa_api_data,
|
||||
std::deque<tool_buffer_tracing_kernel_dispatch_with_stream_record_t>* kernel_dispatch_data,
|
||||
std::deque<tool_buffer_tracing_memory_copy_with_stream_record_t>* memory_copy_data,
|
||||
std::deque<rocprofiler_buffer_tracing_marker_api_record_t>* marker_api_data,
|
||||
std::deque<rocprofiler_buffer_tracing_scratch_memory_record_t>* scratch_memory_data,
|
||||
std::deque<rocprofiler_buffer_tracing_rccl_api_record_t>* rccl_api_data,
|
||||
std::deque<rocprofiler_buffer_tracing_memory_allocation_record_t>* memory_allocation_data,
|
||||
std::deque<rocprofiler_buffer_tracing_rocdecode_api_ext_record_t>* rocdecode_api_data,
|
||||
std::deque<rocprofiler_buffer_tracing_rocjpeg_api_record_t>* rocjpeg_api_data);
|
||||
const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
uint64_t pid,
|
||||
const std::vector<agent_info>& agent_data,
|
||||
std::deque<rocprofiler_buffer_tracing_hip_api_ext_record_t>* hip_api_data,
|
||||
std::deque<rocprofiler_buffer_tracing_hsa_api_record_t>* hsa_api_data,
|
||||
std::deque<tool_buffer_tracing_kernel_dispatch_ext_record_t>* kernel_dispatch_data,
|
||||
std::deque<tool_buffer_tracing_memory_copy_ext_record_t>* memory_copy_data,
|
||||
std::deque<rocprofiler_buffer_tracing_marker_api_record_t>* marker_api_data,
|
||||
std::deque<rocprofiler_buffer_tracing_scratch_memory_record_t>* scratch_memory_data,
|
||||
std::deque<rocprofiler_buffer_tracing_rccl_api_record_t>* rccl_api_data,
|
||||
std::deque<rocprofiler_buffer_tracing_memory_allocation_record_t>* memory_allocation_data,
|
||||
std::deque<rocprofiler_buffer_tracing_rocdecode_api_ext_record_t>* rocdecode_api_data,
|
||||
std::deque<rocprofiler_buffer_tracing_rocjpeg_api_record_t>* rocjpeg_api_data);
|
||||
} // namespace tool
|
||||
} // namespace rocprofiler
|
||||
|
||||
@@ -65,15 +65,15 @@ get_hash_id(Tp&& _val)
|
||||
|
||||
void
|
||||
write_perfetto(
|
||||
const output_config& ocfg,
|
||||
const metadata& tool_metadata,
|
||||
std::vector<agent_info> agent_data,
|
||||
const generator<rocprofiler_buffer_tracing_hip_api_ext_record_t>& hip_api_gen,
|
||||
const generator<rocprofiler_buffer_tracing_hsa_api_record_t>& hsa_api_gen,
|
||||
const generator<tool_buffer_tracing_kernel_dispatch_with_stream_record_t>& kernel_dispatch_gen,
|
||||
const generator<tool_buffer_tracing_memory_copy_with_stream_record_t>& memory_copy_gen,
|
||||
const generator<tool_counter_record_t>& counter_collection_gen,
|
||||
const generator<rocprofiler_buffer_tracing_marker_api_record_t>& marker_api_gen,
|
||||
const output_config& ocfg,
|
||||
const metadata& tool_metadata,
|
||||
std::vector<agent_info> agent_data,
|
||||
const generator<rocprofiler_buffer_tracing_hip_api_ext_record_t>& hip_api_gen,
|
||||
const generator<rocprofiler_buffer_tracing_hsa_api_record_t>& hsa_api_gen,
|
||||
const generator<tool_buffer_tracing_kernel_dispatch_ext_record_t>& kernel_dispatch_gen,
|
||||
const generator<tool_buffer_tracing_memory_copy_ext_record_t>& memory_copy_gen,
|
||||
const generator<tool_counter_record_t>& counter_collection_gen,
|
||||
const generator<rocprofiler_buffer_tracing_marker_api_record_t>& marker_api_gen,
|
||||
const generator<rocprofiler_buffer_tracing_scratch_memory_record_t>& /*scratch_memory_gen*/,
|
||||
const generator<rocprofiler_buffer_tracing_rccl_api_record_t>& rccl_api_gen,
|
||||
const generator<rocprofiler_buffer_tracing_memory_allocation_record_t>& memory_allocation_gen,
|
||||
@@ -133,10 +133,12 @@ write_perfetto(
|
||||
|
||||
tracing_session->Setup(cfg);
|
||||
tracing_session->StartBlocking();
|
||||
|
||||
auto tids = std::set<rocprofiler_thread_id_t>{};
|
||||
auto demangled = std::unordered_map<std::string_view, std::string>{};
|
||||
auto agent_thread_ids = std::unordered_map<rocprofiler_agent_id_t, std::set<uint64_t>>{};
|
||||
const auto is_hip_initialized =
|
||||
tool_metadata.is_runtime_initialized(ROCPROFILER_RUNTIME_INITIALIZATION_HIP);
|
||||
const auto group_by_queue = ocfg.group_by_queue || !is_hip_initialized;
|
||||
auto tids = std::set<rocprofiler_thread_id_t>{};
|
||||
auto demangled = std::unordered_map<std::string_view, std::string>{};
|
||||
auto agent_thread_ids = std::unordered_map<rocprofiler_agent_id_t, std::set<uint64_t>>{};
|
||||
auto agent_thread_ids_alloc = std::unordered_map<rocprofiler_agent_id_t, std::set<uint64_t>>{};
|
||||
auto agent_queue_ids =
|
||||
std::unordered_map<rocprofiler_agent_id_t, std::unordered_set<rocprofiler_queue_id_t>>{};
|
||||
@@ -154,12 +156,7 @@ write_perfetto(
|
||||
auto agent_queue_tracks =
|
||||
std::unordered_map<rocprofiler_agent_id_t,
|
||||
std::unordered_map<rocprofiler_queue_id_t, ::perfetto::Track>>{};
|
||||
auto agent_stream_compute_tracks =
|
||||
std::unordered_map<rocprofiler_agent_id_t,
|
||||
std::unordered_map<rocprofiler_stream_id_t, ::perfetto::Track>>{};
|
||||
auto agent_stream_copy_tracks =
|
||||
std::unordered_map<rocprofiler_agent_id_t,
|
||||
std::unordered_map<rocprofiler_stream_id_t, ::perfetto::Track>>{};
|
||||
auto stream_tracks = std::unordered_map<rocprofiler_stream_id_t, ::perfetto::Track>{};
|
||||
|
||||
auto _get_agent = [&agent_data](rocprofiler_agent_id_t _id) -> const rocprofiler_agent_t* {
|
||||
for(const auto& itr : agent_data)
|
||||
@@ -194,7 +191,7 @@ write_perfetto(
|
||||
{
|
||||
tids.emplace(itr.thread_id);
|
||||
agent_stream_ids[itr.dst_agent_id].emplace(itr.stream_id);
|
||||
if(ocfg.group_by_queue)
|
||||
if(group_by_queue)
|
||||
{
|
||||
agent_thread_ids[itr.dst_agent_id].emplace(itr.thread_id);
|
||||
}
|
||||
@@ -212,7 +209,7 @@ write_perfetto(
|
||||
{
|
||||
tids.emplace(itr.thread_id);
|
||||
agent_stream_ids[itr.dispatch_info.agent_id].emplace(itr.stream_id);
|
||||
if(ocfg.group_by_queue)
|
||||
if(group_by_queue)
|
||||
{
|
||||
agent_queue_ids[itr.dispatch_info.agent_id].emplace(itr.dispatch_info.queue_id);
|
||||
}
|
||||
@@ -297,20 +294,11 @@ write_perfetto(
|
||||
{
|
||||
for(auto sitr : aitr.second)
|
||||
{
|
||||
const auto* _agent = _get_agent(aitr.first);
|
||||
const auto stream_id = sitr.handle;
|
||||
const auto stream_id = sitr.handle;
|
||||
|
||||
{
|
||||
auto _namess = std::stringstream{};
|
||||
_namess << "COMPUTE AGENT [" << _agent->logical_node_id << "] STREAM [" << stream_id
|
||||
<< "] ";
|
||||
|
||||
if(_agent->type == ROCPROFILER_AGENT_TYPE_CPU)
|
||||
_namess << "(CPU)";
|
||||
else if(_agent->type == ROCPROFILER_AGENT_TYPE_GPU)
|
||||
_namess << "(GPU)";
|
||||
else
|
||||
_namess << "(UNK)";
|
||||
_namess << fmt::format("STREAM [\" {} \"] ", stream_id);
|
||||
|
||||
auto _track = ::perfetto::Track{get_hash_id(_namess.str())};
|
||||
auto _desc = _track.Serialize();
|
||||
@@ -318,28 +306,7 @@ write_perfetto(
|
||||
|
||||
perfetto::TrackEvent::SetTrackDescriptor(_track, _desc);
|
||||
|
||||
agent_stream_compute_tracks[aitr.first].emplace(sitr, _track);
|
||||
}
|
||||
|
||||
{
|
||||
auto _namess = std::stringstream{};
|
||||
_namess << "COPY to AGENT [" << _agent->logical_node_id << "] STREAM [" << stream_id
|
||||
<< "] ";
|
||||
|
||||
if(_agent->type == ROCPROFILER_AGENT_TYPE_CPU)
|
||||
_namess << "(CPU)";
|
||||
else if(_agent->type == ROCPROFILER_AGENT_TYPE_GPU)
|
||||
_namess << "(GPU)";
|
||||
else
|
||||
_namess << "(UNK)";
|
||||
|
||||
auto _track = ::perfetto::Track{get_hash_id(_namess.str())};
|
||||
auto _desc = _track.Serialize();
|
||||
_desc.set_name(_namess.str());
|
||||
|
||||
perfetto::TrackEvent::SetTrackDescriptor(_track, _desc);
|
||||
|
||||
agent_stream_copy_tracks[aitr.first].emplace(sitr, _track);
|
||||
stream_tracks.emplace(sitr, _track);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -570,13 +537,13 @@ write_perfetto(
|
||||
auto name = buffer_names.at(itr.kind, itr.operation);
|
||||
|
||||
::perfetto::Track* _track = nullptr;
|
||||
if(ocfg.group_by_queue)
|
||||
if(group_by_queue)
|
||||
{
|
||||
_track = &agent_thread_tracks.at(itr.dst_agent_id).at(itr.thread_id);
|
||||
}
|
||||
else
|
||||
{
|
||||
_track = &agent_stream_copy_tracks.at(itr.dst_agent_id).at(itr.stream_id);
|
||||
_track = &stream_tracks.at(itr.stream_id);
|
||||
}
|
||||
|
||||
TRACE_EVENT_BEGIN(
|
||||
@@ -640,7 +607,7 @@ write_perfetto(
|
||||
rocprofiler_agent_id_t,
|
||||
std::unordered_map<
|
||||
rocprofiler_queue_id_t,
|
||||
std::vector<tool_buffer_tracing_kernel_dispatch_with_stream_record_t*>>>{};
|
||||
std::vector<tool_buffer_tracing_kernel_dispatch_ext_record_t*>>>{};
|
||||
for(auto& itr : generator)
|
||||
{
|
||||
const auto& info = itr.dispatch_info;
|
||||
@@ -671,14 +638,13 @@ write_perfetto(
|
||||
auto name = std::string_view{sym->kernel_name};
|
||||
|
||||
::perfetto::Track* _track = nullptr;
|
||||
if(ocfg.group_by_queue)
|
||||
if(group_by_queue)
|
||||
{
|
||||
_track = &agent_queue_tracks.at(info.agent_id).at(info.queue_id);
|
||||
}
|
||||
else
|
||||
{
|
||||
_track =
|
||||
&agent_stream_compute_tracks.at(info.agent_id).at((*it)->stream_id);
|
||||
_track = &stream_tracks.at((*it)->stream_id);
|
||||
}
|
||||
|
||||
// Temporary fix until timestamp issues are resolved: Set timestamps to be
|
||||
@@ -729,6 +695,11 @@ write_perfetto(
|
||||
.get_agent_index(agents_map.at(info.agent_id).id,
|
||||
ocfg.agent_index_value)
|
||||
.as_string("-"),
|
||||
"agent_type",
|
||||
tool_metadata
|
||||
.get_agent_index(agents_map.at(info.agent_id).id,
|
||||
ocfg.agent_index_value)
|
||||
.type,
|
||||
"corr_id",
|
||||
current.correlation_id.internal,
|
||||
"queue",
|
||||
|
||||
@@ -37,13 +37,13 @@ namespace tool
|
||||
{
|
||||
void
|
||||
write_perfetto(
|
||||
const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
std::vector<agent_info> agent_data,
|
||||
const generator<rocprofiler_buffer_tracing_hip_api_ext_record_t>& hip_api_gen,
|
||||
const generator<rocprofiler_buffer_tracing_hsa_api_record_t>& hsa_api_gen,
|
||||
const generator<tool_buffer_tracing_kernel_dispatch_with_stream_record_t>& kernel_dispatch_gen,
|
||||
const generator<tool_buffer_tracing_memory_copy_with_stream_record_t>& memory_copy_gen,
|
||||
const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
std::vector<agent_info> agent_data,
|
||||
const generator<rocprofiler_buffer_tracing_hip_api_ext_record_t>& hip_api_gen,
|
||||
const generator<rocprofiler_buffer_tracing_hsa_api_record_t>& hsa_api_gen,
|
||||
const generator<tool_buffer_tracing_kernel_dispatch_ext_record_t>& kernel_dispatch_gen,
|
||||
const generator<tool_buffer_tracing_memory_copy_ext_record_t>& memory_copy_gen,
|
||||
const generator<tool_counter_record_t>& counter_collection_gen,
|
||||
const generator<rocprofiler_buffer_tracing_marker_api_record_t>& marker_api_gen,
|
||||
const generator<rocprofiler_buffer_tracing_scratch_memory_record_t>& scratch_memory_gen,
|
||||
|
||||
@@ -63,8 +63,8 @@ get_stats(const stats_map_t& data_v)
|
||||
|
||||
stats_entry_t
|
||||
generate_stats(const output_config& /*cfg*/,
|
||||
const metadata& tool_metadata,
|
||||
const generator<tool_buffer_tracing_kernel_dispatch_with_stream_record_t>& data)
|
||||
const metadata& tool_metadata,
|
||||
const generator<tool_buffer_tracing_kernel_dispatch_ext_record_t>& data)
|
||||
{
|
||||
auto kernel_stats = stats_map_t{};
|
||||
for(auto ditr : data)
|
||||
@@ -119,8 +119,8 @@ generate_stats(const output_config& /*cfg*/,
|
||||
|
||||
stats_entry_t
|
||||
generate_stats(const output_config& /*cfg*/,
|
||||
const metadata& tool_metadata,
|
||||
const generator<tool_buffer_tracing_memory_copy_with_stream_record_t>& data)
|
||||
const metadata& tool_metadata,
|
||||
const generator<tool_buffer_tracing_memory_copy_ext_record_t>& data)
|
||||
{
|
||||
auto memory_copy_stats = stats_map_t{};
|
||||
for(auto ditr : data)
|
||||
|
||||
@@ -32,9 +32,9 @@ namespace rocprofiler
|
||||
namespace tool
|
||||
{
|
||||
stats_entry_t
|
||||
generate_stats(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const generator<tool_buffer_tracing_kernel_dispatch_with_stream_record_t>& data);
|
||||
generate_stats(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const generator<tool_buffer_tracing_kernel_dispatch_ext_record_t>& data);
|
||||
|
||||
stats_entry_t
|
||||
generate_stats(const output_config& cfg,
|
||||
@@ -47,9 +47,9 @@ generate_stats(const output_config& cfg
|
||||
const generator<rocprofiler_buffer_tracing_hsa_api_record_t>& data);
|
||||
|
||||
stats_entry_t
|
||||
generate_stats(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const generator<tool_buffer_tracing_memory_copy_with_stream_record_t>& data);
|
||||
generate_stats(const output_config& cfg,
|
||||
const metadata& tool_metadata,
|
||||
const generator<tool_buffer_tracing_memory_copy_ext_record_t>& data);
|
||||
|
||||
stats_entry_t
|
||||
generate_stats(const output_config& cfg,
|
||||
|
||||
@@ -475,6 +475,26 @@ metadata::add_external_correlation_id(uint64_t val)
|
||||
[](auto& _data, uint64_t _val) { return _data.emplace(_val).second; }, val);
|
||||
}
|
||||
|
||||
bool
|
||||
metadata::add_runtime_initialization(rocprofiler_runtime_initialization_operation_t runtime_op)
|
||||
{
|
||||
return runtime_initialization_set.wlock(
|
||||
[](auto& _data, rocprofiler_runtime_initialization_operation_t _runtime_op) {
|
||||
return _data.emplace(_runtime_op).second;
|
||||
},
|
||||
runtime_op);
|
||||
}
|
||||
|
||||
bool
|
||||
metadata::is_runtime_initialized(rocprofiler_runtime_initialization_operation_t runtime_op) const
|
||||
{
|
||||
return runtime_initialization_set.rlock(
|
||||
[](const auto& _data, rocprofiler_runtime_initialization_operation_t _runtime_op) {
|
||||
return _data.count(_runtime_op) > 0;
|
||||
},
|
||||
runtime_op);
|
||||
}
|
||||
|
||||
std::string_view
|
||||
metadata::get_marker_message(uint64_t corr_id) const
|
||||
{
|
||||
|
||||
@@ -90,6 +90,8 @@ using synced_map = common::Synchronized<Tp, true>;
|
||||
template <typename Tp>
|
||||
using synced_obj = common::Synchronized<Tp, true>;
|
||||
using pc_sampling_stats_t = rocprofiler_tool_pc_sampling_stats;
|
||||
using runtime_initialization_set_t =
|
||||
std::unordered_set<rocprofiler_runtime_initialization_operation_t>;
|
||||
|
||||
enum class agent_indexing
|
||||
{
|
||||
@@ -126,17 +128,18 @@ struct metadata
|
||||
agent_counter_info_map_t agent_counter_info = {};
|
||||
agent_pc_sample_config_info_map_t agent_pc_sample_config_info = {};
|
||||
|
||||
sdk::buffer_name_info buffer_names = {};
|
||||
sdk::callback_name_info callback_names = {};
|
||||
synced_map<code_object_data_map_t> code_objects = {};
|
||||
synced_map<kernel_symbol_data_map_t> kernel_symbols = {};
|
||||
synced_map<marker_message_map_t> marker_messages = {};
|
||||
synced_map<string_entry_map_t> string_entries = {};
|
||||
synced_map<external_corr_id_set_t> external_corr_ids = {};
|
||||
synced_map<host_function_info_map_t> host_functions = {};
|
||||
synced_map<code_object_load_info_vec_t> code_object_load = {};
|
||||
att_filenames_map_t att_filenames = {};
|
||||
synced_obj<pc_sampling_stats_t> pc_sampling_stats = {};
|
||||
sdk::buffer_name_info buffer_names = {};
|
||||
sdk::callback_name_info callback_names = {};
|
||||
synced_map<code_object_data_map_t> code_objects = {};
|
||||
synced_map<kernel_symbol_data_map_t> kernel_symbols = {};
|
||||
synced_map<marker_message_map_t> marker_messages = {};
|
||||
synced_map<string_entry_map_t> string_entries = {};
|
||||
synced_map<external_corr_id_set_t> external_corr_ids = {};
|
||||
synced_map<host_function_info_map_t> host_functions = {};
|
||||
synced_map<code_object_load_info_vec_t> code_object_load = {};
|
||||
att_filenames_map_t att_filenames = {};
|
||||
synced_obj<pc_sampling_stats_t> pc_sampling_stats = {};
|
||||
synced_obj<runtime_initialization_set_t> runtime_initialization_set = {};
|
||||
|
||||
metadata() = default;
|
||||
metadata(inprocess);
|
||||
@@ -182,6 +185,7 @@ struct metadata
|
||||
bool add_host_function(host_function_info&& func);
|
||||
bool add_string_entry(size_t key, std::string_view str);
|
||||
bool add_external_correlation_id(uint64_t);
|
||||
bool add_runtime_initialization(rocprofiler_runtime_initialization_operation_t);
|
||||
|
||||
std::string_view get_marker_message(uint64_t corr_id) const;
|
||||
std::string_view get_kernel_name(uint64_t kernel_id, uint64_t rename_id) const;
|
||||
@@ -193,6 +197,7 @@ struct metadata
|
||||
rocprofiler_tracing_operation_t op) const;
|
||||
agent_index get_agent_index(rocprofiler_agent_id_t agent, agent_indexing index) const;
|
||||
const std::string* get_string_entry(size_t key) const;
|
||||
bool is_runtime_initialized(rocprofiler_runtime_initialization_operation_t) const;
|
||||
|
||||
private:
|
||||
bool inprocess_init = false;
|
||||
|
||||
@@ -35,56 +35,55 @@ namespace rocprofiler
|
||||
{
|
||||
namespace tool
|
||||
{
|
||||
struct tool_buffer_tracing_kernel_dispatch_with_stream_record_t
|
||||
struct tool_buffer_tracing_kernel_dispatch_ext_record_t
|
||||
: rocprofiler_buffer_tracing_kernel_dispatch_record_t
|
||||
{
|
||||
using base_type = rocprofiler_buffer_tracing_kernel_dispatch_record_t;
|
||||
|
||||
tool_buffer_tracing_kernel_dispatch_with_stream_record_t(
|
||||
const base_type& _base,
|
||||
const rocprofiler_stream_id_t& _stream_id,
|
||||
const uint64_t& _kernel_rename_val)
|
||||
tool_buffer_tracing_kernel_dispatch_ext_record_t(const base_type& _base,
|
||||
const rocprofiler_stream_id_t& _stream_id,
|
||||
const uint64_t& _kernel_rename_val)
|
||||
: base_type{_base}
|
||||
, stream_id{_stream_id}
|
||||
, kernel_rename_val{_kernel_rename_val}
|
||||
{}
|
||||
|
||||
tool_buffer_tracing_kernel_dispatch_with_stream_record_t();
|
||||
~tool_buffer_tracing_kernel_dispatch_with_stream_record_t() = default;
|
||||
tool_buffer_tracing_kernel_dispatch_with_stream_record_t(
|
||||
const tool_buffer_tracing_kernel_dispatch_with_stream_record_t&) = default;
|
||||
tool_buffer_tracing_kernel_dispatch_with_stream_record_t(
|
||||
tool_buffer_tracing_kernel_dispatch_with_stream_record_t&&) noexcept = default;
|
||||
tool_buffer_tracing_kernel_dispatch_with_stream_record_t& operator =(
|
||||
const tool_buffer_tracing_kernel_dispatch_with_stream_record_t&) = default;
|
||||
tool_buffer_tracing_kernel_dispatch_with_stream_record_t& operator =(
|
||||
tool_buffer_tracing_kernel_dispatch_with_stream_record_t&&) noexcept = default;
|
||||
tool_buffer_tracing_kernel_dispatch_ext_record_t();
|
||||
~tool_buffer_tracing_kernel_dispatch_ext_record_t() = default;
|
||||
tool_buffer_tracing_kernel_dispatch_ext_record_t(
|
||||
const tool_buffer_tracing_kernel_dispatch_ext_record_t&) = default;
|
||||
tool_buffer_tracing_kernel_dispatch_ext_record_t(
|
||||
tool_buffer_tracing_kernel_dispatch_ext_record_t&&) noexcept = default;
|
||||
tool_buffer_tracing_kernel_dispatch_ext_record_t& operator =(
|
||||
const tool_buffer_tracing_kernel_dispatch_ext_record_t&) = default;
|
||||
tool_buffer_tracing_kernel_dispatch_ext_record_t& operator =(
|
||||
tool_buffer_tracing_kernel_dispatch_ext_record_t&&) noexcept = default;
|
||||
|
||||
rocprofiler_stream_id_t stream_id = {};
|
||||
uint64_t kernel_rename_val = {};
|
||||
};
|
||||
|
||||
struct tool_buffer_tracing_memory_copy_with_stream_record_t
|
||||
struct tool_buffer_tracing_memory_copy_ext_record_t
|
||||
: rocprofiler_buffer_tracing_memory_copy_record_t
|
||||
{
|
||||
using base_type = rocprofiler_buffer_tracing_memory_copy_record_t;
|
||||
|
||||
tool_buffer_tracing_memory_copy_with_stream_record_t(const base_type& _base,
|
||||
const rocprofiler_stream_id_t& _stream_id)
|
||||
tool_buffer_tracing_memory_copy_ext_record_t(const base_type& _base,
|
||||
const rocprofiler_stream_id_t& _stream_id)
|
||||
: base_type{_base}
|
||||
, stream_id{_stream_id}
|
||||
{}
|
||||
|
||||
tool_buffer_tracing_memory_copy_with_stream_record_t();
|
||||
~tool_buffer_tracing_memory_copy_with_stream_record_t() = default;
|
||||
tool_buffer_tracing_memory_copy_with_stream_record_t(
|
||||
const tool_buffer_tracing_memory_copy_with_stream_record_t&) = default;
|
||||
tool_buffer_tracing_memory_copy_with_stream_record_t(
|
||||
tool_buffer_tracing_memory_copy_with_stream_record_t&&) noexcept = default;
|
||||
tool_buffer_tracing_memory_copy_with_stream_record_t& operator =(
|
||||
const tool_buffer_tracing_memory_copy_with_stream_record_t&) = default;
|
||||
tool_buffer_tracing_memory_copy_with_stream_record_t& operator =(
|
||||
tool_buffer_tracing_memory_copy_with_stream_record_t&&) noexcept = default;
|
||||
tool_buffer_tracing_memory_copy_ext_record_t();
|
||||
~tool_buffer_tracing_memory_copy_ext_record_t() = default;
|
||||
tool_buffer_tracing_memory_copy_ext_record_t(
|
||||
const tool_buffer_tracing_memory_copy_ext_record_t&) = default;
|
||||
tool_buffer_tracing_memory_copy_ext_record_t(
|
||||
tool_buffer_tracing_memory_copy_ext_record_t&&) noexcept = default;
|
||||
tool_buffer_tracing_memory_copy_ext_record_t& operator =(
|
||||
const tool_buffer_tracing_memory_copy_ext_record_t&) = default;
|
||||
tool_buffer_tracing_memory_copy_ext_record_t& operator =(
|
||||
tool_buffer_tracing_memory_copy_ext_record_t&&) noexcept = default;
|
||||
|
||||
rocprofiler_stream_id_t stream_id = {};
|
||||
};
|
||||
@@ -97,8 +96,8 @@ namespace cereal
|
||||
|
||||
template <typename ArchiveT>
|
||||
void
|
||||
save(ArchiveT& ar,
|
||||
const ::rocprofiler::tool::tool_buffer_tracing_kernel_dispatch_with_stream_record_t& data)
|
||||
save(ArchiveT& ar,
|
||||
const ::rocprofiler::tool::tool_buffer_tracing_kernel_dispatch_ext_record_t& data)
|
||||
{
|
||||
cereal::save(ar, static_cast<const rocprofiler_buffer_tracing_kernel_dispatch_record_t&>(data));
|
||||
SAVE_DATA_FIELD(stream_id);
|
||||
@@ -107,8 +106,7 @@ save(ArchiveT&
|
||||
|
||||
template <typename ArchiveT>
|
||||
void
|
||||
save(ArchiveT& ar,
|
||||
const ::rocprofiler::tool::tool_buffer_tracing_memory_copy_with_stream_record_t& data)
|
||||
save(ArchiveT& ar, const ::rocprofiler::tool::tool_buffer_tracing_memory_copy_ext_record_t& data)
|
||||
{
|
||||
cereal::save(ar, static_cast<const rocprofiler_buffer_tracing_memory_copy_record_t&>(data));
|
||||
SAVE_DATA_FIELD(stream_id);
|
||||
|
||||
@@ -60,7 +60,8 @@ pop_stream_id()
|
||||
rocprofiler_stream_id_t
|
||||
get_stream_id()
|
||||
{
|
||||
return CHECK_NOTNULL(get_stream_stack())->back();
|
||||
return (CHECK_NOTNULL(get_stream_stack())->empty()) ? rocprofiler_stream_id_t{.handle = 0}
|
||||
: get_stream_stack()->back();
|
||||
}
|
||||
|
||||
bool
|
||||
|
||||
@@ -249,6 +249,7 @@ using kernel_rename_stack_t = std::stack<uint64_t>;
|
||||
auto* tool_metadata = as_pointer<tool::metadata>(tool::metadata::inprocess{});
|
||||
auto target_kernels = common::Synchronized<targeted_kernels_map_t>{};
|
||||
std::mutex att_shader_data;
|
||||
auto counter_collection_ctx = rocprofiler_context_id_t{0};
|
||||
|
||||
thread_local auto thread_dispatch_rename = as_pointer<kernel_rename_stack_t>();
|
||||
thread_local auto thread_dispatch_rename_dtor = common::scope_destructor{[]() {
|
||||
@@ -262,25 +263,6 @@ struct kernel_rename_and_stream_display_pair
|
||||
uint64_t kernel_rename_val{0};
|
||||
rocprofiler_stream_id_t stream_id{.handle = 0};
|
||||
};
|
||||
auto kernel_rename_and_stream_display_pair_dtors =
|
||||
new std::vector<kernel_rename_and_stream_display_pair*>{};
|
||||
|
||||
auto
|
||||
get_kernel_rename_and_stream_display_pair_lock()
|
||||
{
|
||||
static auto _mutex = std::mutex{};
|
||||
return std::unique_lock<std::mutex>{_mutex};
|
||||
}
|
||||
|
||||
void
|
||||
add_kernel_rename_and_stream_display_pairs(kernel_rename_and_stream_display_pair* ptr)
|
||||
{
|
||||
auto lock = get_kernel_rename_and_stream_display_pair_lock();
|
||||
if(ptr != nullptr && kernel_rename_and_stream_display_pair_dtors != nullptr)
|
||||
{
|
||||
kernel_rename_and_stream_display_pair_dtors->emplace_back(ptr);
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
add_kernel_target(uint64_t _kern_id, const std::unordered_set<size_t>& range)
|
||||
@@ -455,10 +437,19 @@ set_kernel_rename_and_stream_display_correlation_id(
|
||||
tool::get_config().kernel_rename && thread_dispatch_rename != nullptr &&
|
||||
!thread_dispatch_rename->empty() &&
|
||||
kind == ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_KERNEL_DISPATCH;
|
||||
const bool hip_stream_display_enabled =
|
||||
!tool::get_config().group_by_queue &&
|
||||
kernel_rename_and_stream_display_pair_dtors != nullptr &&
|
||||
!rocprofiler::tool::stream::stream_stack_empty();
|
||||
const bool hip_stream_display_enabled = !tool::get_config().group_by_queue;
|
||||
if(kernel_rename_service_enabled && ctx_id == counter_collection_ctx)
|
||||
{
|
||||
auto val = thread_dispatch_rename->top();
|
||||
if(tool_metadata) tool_metadata->add_external_correlation_id(val);
|
||||
external_corr_id->value = val;
|
||||
return 0;
|
||||
}
|
||||
// End early if counter collection context and no kernel rename service is enabled
|
||||
if(ctx_id == counter_collection_ctx)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
kernel_rename_and_stream_display_pair* kernel_rename_and_stream_display_vals = nullptr;
|
||||
if(kernel_rename_service_enabled || hip_stream_display_enabled)
|
||||
@@ -475,12 +466,11 @@ set_kernel_rename_and_stream_display_correlation_id(
|
||||
// Get stream ID from stream HIP display service
|
||||
if(hip_stream_display_enabled && kernel_rename_and_stream_display_vals != nullptr)
|
||||
{
|
||||
auto stream_id = rocprofiler::tool::stream::get_stream_id();
|
||||
auto stream_id = tool::stream::get_stream_id();
|
||||
kernel_rename_and_stream_display_vals->stream_id = stream_id;
|
||||
}
|
||||
// Set the external correlation id service to point to struct
|
||||
external_corr_id->ptr = kernel_rename_and_stream_display_vals;
|
||||
add_kernel_rename_and_stream_display_pairs(kernel_rename_and_stream_display_vals);
|
||||
|
||||
common::consume_args(thr_id, ctx_id, kind, op, internal_corr_id, user_data);
|
||||
|
||||
@@ -615,6 +605,24 @@ hip_stream_display_callback(rocprofiler_callback_tracing_record_t record,
|
||||
common::consume_args(user_data, data);
|
||||
}
|
||||
|
||||
// Stores which runtimes have been initialized in metadata
|
||||
void
|
||||
runtime_initialization_callback(rocprofiler_callback_tracing_record_t record,
|
||||
rocprofiler_user_data_t* user_data,
|
||||
void* data)
|
||||
{
|
||||
if(record.kind != ROCPROFILER_CALLBACK_TRACING_RUNTIME_INITIALIZATION) return;
|
||||
ROCP_CI_LOG_IF(WARNING, tool_metadata == nullptr)
|
||||
<< fmt::format("tool cannot record runtime initialization for {}",
|
||||
tool_metadata->get_operation_name(record.kind, record.operation));
|
||||
if(tool_metadata)
|
||||
{
|
||||
tool_metadata->add_runtime_initialization(
|
||||
static_cast<rocprofiler_runtime_initialization_operation_t>(record.operation));
|
||||
}
|
||||
common::consume_args(user_data, data);
|
||||
}
|
||||
|
||||
void
|
||||
callback_tracing_callback(rocprofiler_callback_tracing_record_t record,
|
||||
rocprofiler_user_data_t* user_data,
|
||||
@@ -942,8 +950,10 @@ buffered_tracing_callback(rocprofiler_context_id_t /*context*/,
|
||||
record->correlation_id.external.ptr);
|
||||
stream_id = kernel_stream_pair_ptr->stream_id;
|
||||
kernel_rename_val = kernel_stream_pair_ptr->kernel_rename_val;
|
||||
delete kernel_stream_pair_ptr;
|
||||
record->correlation_id.external.value = kernel_rename_val;
|
||||
}
|
||||
rocprofiler::tool::tool_buffer_tracing_kernel_dispatch_with_stream_record_t
|
||||
rocprofiler::tool::tool_buffer_tracing_kernel_dispatch_ext_record_t
|
||||
record_with_stream{*record, stream_id, kernel_rename_val};
|
||||
tool::write_ring_buffer(record_with_stream, domain_type::KERNEL_DISPATCH);
|
||||
}
|
||||
@@ -971,9 +981,11 @@ buffered_tracing_callback(rocprofiler_context_id_t /*context*/,
|
||||
static_cast<kernel_rename_and_stream_display_pair*>(
|
||||
record->correlation_id.external.ptr);
|
||||
stream_id = kernel_stream_pair_ptr->stream_id;
|
||||
delete kernel_stream_pair_ptr;
|
||||
record->correlation_id.external.ptr = nullptr;
|
||||
}
|
||||
rocprofiler::tool::tool_buffer_tracing_memory_copy_with_stream_record_t
|
||||
record_with_stream{*record, stream_id};
|
||||
rocprofiler::tool::tool_buffer_tracing_memory_copy_ext_record_t record_with_stream{
|
||||
*record, stream_id};
|
||||
tool::write_ring_buffer(record_with_stream, domain_type::MEMORY_COPY);
|
||||
}
|
||||
else if(header->kind == ROCPROFILER_BUFFER_TRACING_MEMORY_ALLOCATION)
|
||||
@@ -1366,16 +1378,10 @@ counter_record_callback(rocprofiler_dispatch_counting_service_data_t dispatch_da
|
||||
|
||||
auto counter_record = tool::tool_counter_record_t{};
|
||||
|
||||
counter_record.dispatch_data = dispatch_data;
|
||||
counter_record.thread_id = user_data.value;
|
||||
if(dispatch_data.correlation_id.external.ptr != nullptr)
|
||||
{
|
||||
// Extract the kernel id
|
||||
auto* kernel_stream_pair_ptr = static_cast<kernel_rename_and_stream_display_pair*>(
|
||||
dispatch_data.correlation_id.external.ptr);
|
||||
counter_record.kernel_rename_val = kernel_stream_pair_ptr->kernel_rename_val;
|
||||
}
|
||||
|
||||
counter_record.dispatch_data = dispatch_data;
|
||||
counter_record.thread_id = user_data.value;
|
||||
counter_record.kernel_rename_val = dispatch_data.correlation_id.external.value;
|
||||
counter_record.dispatch_data.correlation_id.external.value = counter_record.kernel_rename_val;
|
||||
auto serialized_records = std::vector<tool::tool_counter_value_t>{};
|
||||
serialized_records.reserve(record_count);
|
||||
|
||||
@@ -1805,10 +1811,16 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
|
||||
|
||||
if(tool::get_config().counter_collection)
|
||||
{
|
||||
ROCPROFILER_CALL(rocprofiler_create_context(&counter_collection_ctx),
|
||||
"failed to create context");
|
||||
ROCPROFILER_CALL(
|
||||
rocprofiler_configure_callback_dispatch_counting_service(
|
||||
get_client_ctx(), dispatch_callback, nullptr, counter_record_callback, nullptr),
|
||||
rocprofiler_configure_callback_dispatch_counting_service(counter_collection_ctx,
|
||||
dispatch_callback,
|
||||
nullptr,
|
||||
counter_record_callback,
|
||||
nullptr),
|
||||
"Could not setup counting service");
|
||||
ROCPROFILER_CALL(rocprofiler_start_context(counter_collection_ctx), "start context failed");
|
||||
}
|
||||
|
||||
if(tool::get_config().rocdecode_api_trace)
|
||||
@@ -1873,6 +1885,7 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
|
||||
}
|
||||
if(!tool::get_config().group_by_queue)
|
||||
{
|
||||
// Track stream ID information via callback service
|
||||
auto hip_stream_display_ctx = rocprofiler_context_id_t{0};
|
||||
|
||||
ROCPROFILER_CALL(rocprofiler_create_context(&hip_stream_display_ctx),
|
||||
@@ -1887,6 +1900,21 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
|
||||
nullptr),
|
||||
"stream tracing configure failed");
|
||||
ROCPROFILER_CALL(rocprofiler_start_context(hip_stream_display_ctx), "start context failed");
|
||||
|
||||
// Track if HIP runtime has been initialized via runtime_intialization service
|
||||
auto runtime_initialization_ctx = rocprofiler_context_id_t{0};
|
||||
ROCPROFILER_CALL(rocprofiler_create_context(&runtime_initialization_ctx),
|
||||
"failed to create context");
|
||||
ROCPROFILER_CALL(rocprofiler_configure_callback_tracing_service(
|
||||
runtime_initialization_ctx,
|
||||
ROCPROFILER_CALLBACK_TRACING_RUNTIME_INITIALIZATION,
|
||||
nullptr,
|
||||
0,
|
||||
runtime_initialization_callback,
|
||||
nullptr),
|
||||
"stream tracing configure failed");
|
||||
ROCPROFILER_CALL(rocprofiler_start_context(runtime_initialization_ctx),
|
||||
"start context failed");
|
||||
}
|
||||
if(tool::get_config().kernel_rename || !tool::get_config().group_by_queue)
|
||||
{
|
||||
@@ -1902,6 +1930,21 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
|
||||
set_kernel_rename_and_stream_display_correlation_id,
|
||||
nullptr),
|
||||
"Could not configure external correlation id request service");
|
||||
|
||||
if(tool::get_config().counter_collection)
|
||||
{
|
||||
auto counter_external_corr_id_request_kinds =
|
||||
std::array<rocprofiler_external_correlation_id_request_kind_t, 1>{
|
||||
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_KERNEL_DISPATCH};
|
||||
|
||||
ROCPROFILER_CALL(rocprofiler_configure_external_correlation_id_request_service(
|
||||
counter_collection_ctx,
|
||||
counter_external_corr_id_request_kinds.data(),
|
||||
counter_external_corr_id_request_kinds.size(),
|
||||
set_kernel_rename_and_stream_display_correlation_id,
|
||||
nullptr),
|
||||
"Could not configure external correlation id request service");
|
||||
}
|
||||
}
|
||||
|
||||
if(tool::get_config().pc_sampling_host_trap)
|
||||
@@ -2229,17 +2272,6 @@ tool_fini(void* /*tool_data*/)
|
||||
|
||||
run_cleanup();
|
||||
|
||||
if(kernel_rename_and_stream_display_pair_dtors != nullptr)
|
||||
{
|
||||
for(auto& itr : *kernel_rename_and_stream_display_pair_dtors)
|
||||
{
|
||||
delete itr;
|
||||
itr = nullptr;
|
||||
}
|
||||
delete kernel_rename_and_stream_display_pair_dtors;
|
||||
kernel_rename_and_stream_display_pair_dtors = nullptr;
|
||||
}
|
||||
|
||||
if(destructors)
|
||||
{
|
||||
for(const auto& itr : *destructors)
|
||||
|
||||
@@ -34,7 +34,6 @@
|
||||
#include "lib/rocprofiler-sdk/hsa/queue_controller.hpp"
|
||||
#include "lib/rocprofiler-sdk/registration.hpp"
|
||||
#include "lib/rocprofiler-sdk/tracing/tracing.hpp"
|
||||
#include "rocprofiler-sdk/hip/compiler_api_id.h"
|
||||
|
||||
#include <rocprofiler-sdk/buffer.h>
|
||||
#include <rocprofiler-sdk/callback_tracing.h>
|
||||
@@ -367,28 +366,11 @@ enable_stream_stack()
|
||||
{
|
||||
if(itr->is_tracing_one_of(ROCPROFILER_CALLBACK_TRACING_MEMORY_COPY,
|
||||
ROCPROFILER_CALLBACK_TRACING_HIP_RUNTIME_API,
|
||||
ROCPROFILER_CALLBACK_TRACING_HIP_COMPILER_API,
|
||||
ROCPROFILER_CALLBACK_TRACING_HIP_STREAM,
|
||||
ROCPROFILER_BUFFER_TRACING_MEMORY_COPY,
|
||||
ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API,
|
||||
ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API,
|
||||
ROCPROFILER_BUFFER_TRACING_HIP_STREAM,
|
||||
ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API_EXT,
|
||||
ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API_EXT))
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
enable_compiler_stream_stack()
|
||||
{
|
||||
for(const auto& itr : context::get_registered_contexts())
|
||||
{
|
||||
if(itr->is_tracing_one_of(ROCPROFILER_CALLBACK_TRACING_HIP_COMPILER_API,
|
||||
ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API,
|
||||
ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API_EXT))
|
||||
ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API_EXT))
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -415,11 +397,6 @@ update_table(Tp* _orig, std::integral_constant<size_t, OpIdx>)
|
||||
ROCP_TRACE << "updating table entry for " << _info.name;
|
||||
|
||||
constexpr auto num_args = function_args_type::size();
|
||||
constexpr auto is_hip_pop_call_config_func =
|
||||
std::is_same<decltype(info_type::operation_idx),
|
||||
rocprofiler_hip_compiler_api_id_t>::value &&
|
||||
(static_cast<rocprofiler_hip_compiler_api_id_t>(info_type::operation_idx) ==
|
||||
ROCPROFILER_HIP_COMPILER_API_ID___hipPopCallConfiguration);
|
||||
|
||||
if constexpr(common::mpl::is_one_of<hipStream_t, function_args_type>::value)
|
||||
{
|
||||
@@ -433,12 +410,6 @@ update_table(Tp* _orig, std::integral_constant<size_t, OpIdx>)
|
||||
static_assert(stream_idx == (num_args - rstream_idx - 1),
|
||||
"function has more than one stream argument");
|
||||
|
||||
// don't wrap the compiler API functions unless HIP compiler API tracing is enabled
|
||||
if constexpr(TableIdx == ROCPROFILER_HIP_TABLE_ID_Compiler)
|
||||
{
|
||||
if(!enable_compiler_stream_stack()) return;
|
||||
}
|
||||
|
||||
// 1. get the sub-table containing the function pointer in original table
|
||||
// 2. get reference to function pointer in sub-table in original table
|
||||
// 3. update function pointer with wrapper
|
||||
@@ -460,8 +431,7 @@ update_table(Tp* _orig, std::integral_constant<size_t, OpIdx>)
|
||||
_func = create_read_functor<TableIdx, OpIdx>(_func);
|
||||
}
|
||||
}
|
||||
else if constexpr(common::mpl::is_one_of<hipStream_t*, function_args_type>::value &&
|
||||
!is_hip_pop_call_config_func)
|
||||
else if constexpr(common::mpl::is_one_of<hipStream_t*, function_args_type>::value)
|
||||
{
|
||||
constexpr auto stream_idx =
|
||||
common::mpl::index_of<hipStream_t*, function_args_type>::value;
|
||||
@@ -474,12 +444,6 @@ update_table(Tp* _orig, std::integral_constant<size_t, OpIdx>)
|
||||
static_assert(stream_idx == (num_args - rstream_idx - 1),
|
||||
"function has more than one stream argument");
|
||||
|
||||
// don't wrap the compiler API functions unless HIP compiler API tracing is enabled
|
||||
if constexpr(TableIdx == ROCPROFILER_HIP_TABLE_ID_Compiler)
|
||||
{
|
||||
if(!enable_compiler_stream_stack()) return;
|
||||
}
|
||||
|
||||
ROCP_INFO << _info.name << " has been designated as a stream create function";
|
||||
|
||||
// 1. get the sub-table containing the function pointer in original table
|
||||
@@ -541,7 +505,6 @@ using hip_op_args_cb_t = rocprofiler_callback_tracing_operation_args_cb_t;
|
||||
template void update_table<TABLE_TYPE>(TABLE_TYPE * _tbl);
|
||||
|
||||
INSTANTIATE_HIP_TABLE_FUNC(hip_runtime_api_table_t, ROCPROFILER_HIP_TABLE_ID_Runtime)
|
||||
INSTANTIATE_HIP_TABLE_FUNC(hip_compiler_api_table_t, ROCPROFILER_HIP_TABLE_ID_Compiler)
|
||||
} // namespace stream
|
||||
} // namespace hip
|
||||
} // namespace rocprofiler
|
||||
|
||||
@@ -36,8 +36,7 @@ namespace hip
|
||||
{
|
||||
namespace stream
|
||||
{
|
||||
using hip_compiler_api_table_t = HipCompilerDispatchTable;
|
||||
using hip_runtime_api_table_t = HipDispatchTable;
|
||||
using hip_runtime_api_table_t = HipDispatchTable;
|
||||
|
||||
rocprofiler_stream_id_t
|
||||
get_stream_id();
|
||||
|
||||
@@ -253,6 +253,14 @@ WriteInterceptor(const void* packets,
|
||||
tracing::populate_contexts(ROCPROFILER_CALLBACK_TRACING_KERNEL_DISPATCH,
|
||||
ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH,
|
||||
tracing_data_v);
|
||||
// these are for the services (dispatch counter collection, pc sampling, ATT) which use
|
||||
// the queue/queue_controller callback mechanism
|
||||
const auto queue_callback_context_filter = [](const context::context* ctx) {
|
||||
return (ctx->counter_collection || ctx->pc_sampler || ctx->dispatch_thread_trace);
|
||||
};
|
||||
|
||||
for(const auto* itr : context::get_active_contexts(queue_callback_context_filter))
|
||||
tracing_data_v.external_correlation_ids.emplace(itr, tracing::empty_user_data);
|
||||
|
||||
const auto* packets_arr = static_cast<const rocprofiler_packet*>(packets);
|
||||
auto transformed_packets = std::vector<rocprofiler_packet>{};
|
||||
|
||||
@@ -858,9 +858,6 @@ rocprofiler_set_api_table(const char* name,
|
||||
// install rocprofiler API wrappers
|
||||
rocprofiler::hip::update_table(hip_compiler_api_table);
|
||||
|
||||
// install HIP stream deduction wrappers
|
||||
rocprofiler::hip::stream::update_table(hip_compiler_api_table);
|
||||
|
||||
// allow tools to install API wrappers
|
||||
rocprofiler::intercept_table::notify_intercept_table_registration(
|
||||
ROCPROFILER_HIP_COMPILER_TABLE,
|
||||
|
||||
@@ -38,3 +38,4 @@ if(rocJPEG_FOUND AND rocJPEG_VERSION VERSION_GREATER 0.6.0)
|
||||
add_subdirectory(rocjpeg)
|
||||
endif()
|
||||
add_subdirectory(hsa-code-object)
|
||||
add_subdirectory(hip-streams)
|
||||
|
||||
@@ -0,0 +1,41 @@
|
||||
#
|
||||
#
|
||||
#
|
||||
cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR)
|
||||
|
||||
if(NOT CMAKE_HIP_COMPILER)
|
||||
find_program(
|
||||
amdclangpp_EXECUTABLE
|
||||
NAMES amdclang++
|
||||
HINTS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm
|
||||
PATHS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm
|
||||
PATH_SUFFIXES bin llvm/bin NO_CACHE)
|
||||
mark_as_advanced(amdclangpp_EXECUTABLE)
|
||||
|
||||
if(amdclangpp_EXECUTABLE)
|
||||
set(CMAKE_HIP_COMPILER "${amdclangpp_EXECUTABLE}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
project(rocprofiler-sdk-tests-bin-hip-streams LANGUAGES CXX HIP)
|
||||
|
||||
foreach(_TYPE DEBUG MINSIZEREL RELEASE RELWITHDEBINFO)
|
||||
if("${CMAKE_HIP_FLAGS_${_TYPE}}" STREQUAL "")
|
||||
set(CMAKE_HIP_FLAGS_${_TYPE} "${CMAKE_CXX_FLAGS_${_TYPE}}")
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CXX_EXTENSIONS OFF)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
set(CMAKE_HIP_STANDARD 17)
|
||||
set(CMAKE_HIP_EXTENSIONS OFF)
|
||||
set(CMAKE_HIP_STANDARD_REQUIRED ON)
|
||||
|
||||
set_source_files_properties(compute_comm_overlap.cpp PROPERTIES LANGUAGE HIP)
|
||||
add_executable(hip-streams)
|
||||
target_sources(hip-streams PRIVATE compute_comm_overlap.cpp)
|
||||
target_link_libraries(hip-streams PRIVATE rocprofiler-sdk::tests-build-flags)
|
||||
|
||||
find_package(Threads REQUIRED)
|
||||
target_link_libraries(hip-streams PRIVATE Threads::Threads)
|
||||
@@ -0,0 +1,138 @@
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "hip/hip_runtime.h"
|
||||
|
||||
#define BLOCKDIM 64
|
||||
|
||||
/* Macro for checking GPU API return values */
|
||||
#define HIP_ASSERT(call) \
|
||||
do \
|
||||
{ \
|
||||
hipError_t gpuErr = call; \
|
||||
if(hipSuccess != gpuErr) \
|
||||
{ \
|
||||
printf( \
|
||||
"GPU API Error - %s:%d: '%s'\n", __FILE__, __LINE__, hipGetErrorString(gpuErr)); \
|
||||
exit(1); \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
// HIP kernel. Each thread takes care of one element of input
|
||||
__global__ void
|
||||
cube(double* input, double* output, size_t offset, size_t elements_per_stream)
|
||||
{
|
||||
size_t tid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
size_t gstride = blockDim.x * gridDim.x;
|
||||
|
||||
// Span all elements assigned to this stream
|
||||
for(size_t id = tid + offset; id < offset + elements_per_stream; id += gstride)
|
||||
for(size_t i = 0; i < 1000; ++i)
|
||||
output[id] = input[id] * input[id] * input[id];
|
||||
}
|
||||
|
||||
int
|
||||
main()
|
||||
{
|
||||
// number of streams
|
||||
const int num_streams = 8;
|
||||
|
||||
// Number of threads in each thread block
|
||||
const int blockSize = 512;
|
||||
|
||||
// Size of vectors
|
||||
int n = 100000000;
|
||||
int elements_per_stream = n / num_streams;
|
||||
int bytes_per_stream = elements_per_stream * sizeof(double);
|
||||
|
||||
// Host input vectors
|
||||
double* h_input1{nullptr};
|
||||
// Host output vector
|
||||
double* h_output1{nullptr};
|
||||
|
||||
// Device input vectors
|
||||
double* d_input1{nullptr};
|
||||
// Device output vector
|
||||
double* d_output1{nullptr};
|
||||
|
||||
// Creating events for timers
|
||||
hipEvent_t start{}, stop{};
|
||||
HIP_ASSERT(hipEventCreate(&start));
|
||||
HIP_ASSERT(hipEventCreate(&stop));
|
||||
|
||||
// Creating streams
|
||||
hipStream_t streams[num_streams];
|
||||
for(int i = 0; i < num_streams; ++i)
|
||||
{
|
||||
HIP_ASSERT(hipStreamCreate(&streams[i]));
|
||||
}
|
||||
|
||||
// Size, in bytes, of each vector
|
||||
size_t bytes = n * sizeof(double);
|
||||
|
||||
// Allocate page locked memory for these vectors on host
|
||||
HIP_ASSERT(hipHostMalloc(&h_input1, bytes));
|
||||
HIP_ASSERT(hipHostMalloc(&h_output1, bytes));
|
||||
|
||||
// Allocate memory for each vector on GPU
|
||||
HIP_ASSERT(hipMalloc(&d_input1, bytes));
|
||||
HIP_ASSERT(hipMalloc(&d_output1, bytes));
|
||||
|
||||
// Initialize vectors on host
|
||||
for(int i = 0; i < n; i++)
|
||||
{
|
||||
h_input1[i] = sin(i);
|
||||
}
|
||||
|
||||
// Number of thread blocks in grid
|
||||
const int gridSizePerStream = 104; //(int)ceil((float)elements_per_stream/blockSize);
|
||||
|
||||
HIP_ASSERT(hipEventRecord(start));
|
||||
// split H2D copies and kernel calls into separate loops
|
||||
for(int i = 0; i < num_streams; i++)
|
||||
{
|
||||
int offset = i * elements_per_stream;
|
||||
HIP_ASSERT(hipMemcpyAsync(&d_input1[offset],
|
||||
&h_input1[offset],
|
||||
bytes_per_stream,
|
||||
hipMemcpyHostToDevice,
|
||||
streams[i]));
|
||||
}
|
||||
for(int i = 0; i < num_streams; i++)
|
||||
{
|
||||
int offset = i * elements_per_stream;
|
||||
cube<<<gridSizePerStream, blockSize, 0, streams[i]>>>(
|
||||
d_input1, d_output1, offset, elements_per_stream);
|
||||
}
|
||||
for(int i = 0; i < num_streams; i++)
|
||||
{
|
||||
int offset = i * elements_per_stream;
|
||||
HIP_ASSERT(hipMemcpyAsync(&h_output1[offset],
|
||||
&d_output1[offset],
|
||||
bytes_per_stream,
|
||||
hipMemcpyDeviceToHost,
|
||||
streams[i]));
|
||||
}
|
||||
|
||||
HIP_ASSERT(hipEventRecord(stop));
|
||||
HIP_ASSERT(hipEventSynchronize(stop));
|
||||
|
||||
float milliseconds = 0;
|
||||
HIP_ASSERT(hipEventElapsedTime(&milliseconds, start, stop));
|
||||
|
||||
// Release device memory
|
||||
HIP_ASSERT(hipFree(d_input1));
|
||||
HIP_ASSERT(hipFree(d_output1));
|
||||
|
||||
// Release host memory
|
||||
HIP_ASSERT(hipHostFree(h_input1));
|
||||
HIP_ASSERT(hipHostFree(h_output1));
|
||||
|
||||
// Destroy streams
|
||||
for(int i = 0; i < num_streams; ++i)
|
||||
{
|
||||
HIP_ASSERT(hipStreamDestroy(streams[i]));
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -29,7 +29,6 @@ project(
|
||||
VERSION 0.0.0)
|
||||
|
||||
find_package(rocprofiler-sdk REQUIRED)
|
||||
find_package(rocDecode)
|
||||
|
||||
rocprofiler_configure_pytest_files(CONFIG pytest.ini COPY validate.py conftest.py)
|
||||
|
||||
@@ -41,9 +40,9 @@ set(hip-stream-display-env "${PRELOAD_ENV}")
|
||||
add_test(
|
||||
NAME rocprofv3-test-hip-stream-display-execute
|
||||
COMMAND
|
||||
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> --kernel-rename -d
|
||||
$<TARGET_FILE:rocprofiler-sdk::rocprofv3> --kernel-rename -s -d
|
||||
${CMAKE_CURRENT_BINARY_DIR}/%tag%-trace -o out --output-format json pftrace
|
||||
--log-level env -- $<TARGET_FILE:transpose>)
|
||||
--log-level env -- $<TARGET_FILE:hip-streams>)
|
||||
|
||||
set_tests_properties(
|
||||
rocprofv3-test-hip-stream-display-execute
|
||||
@@ -56,14 +55,14 @@ set_tests_properties(
|
||||
FAIL_REGULAR_EXPRESSION
|
||||
"threw an exception"
|
||||
DISABLED
|
||||
$<NOT:$<TARGET_EXISTS:transpose>>)
|
||||
$<NOT:$<TARGET_EXISTS:hip-streams>>)
|
||||
|
||||
add_test(
|
||||
NAME rocprofv3-test-hip-stream-display-validate
|
||||
COMMAND
|
||||
${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/validate.py --json-input
|
||||
${CMAKE_CURRENT_BINARY_DIR}/hip-stream-display/out_results.json --pftrace-input
|
||||
${CMAKE_CURRENT_BINARY_DIR}/hip-stream-display/out_results.pftrace)
|
||||
${CMAKE_CURRENT_BINARY_DIR}/hip-streams-trace/out_results.json --pftrace-input
|
||||
${CMAKE_CURRENT_BINARY_DIR}/hip-streams-trace/out_results.pftrace)
|
||||
|
||||
set_tests_properties(
|
||||
rocprofv3-test-hip-stream-display-validate
|
||||
@@ -76,4 +75,4 @@ set_tests_properties(
|
||||
FAIL_REGULAR_EXPRESSION
|
||||
"AssertionError"
|
||||
DISABLED
|
||||
$<NOT:$<TARGET_EXISTS:transpose>>)
|
||||
$<NOT:$<TARGET_EXISTS:hip-streams>>)
|
||||
|
||||
@@ -54,15 +54,17 @@ def test_stream_trace(json_data):
|
||||
buffer_records = data["buffer_records"]
|
||||
|
||||
kernel_dispatch_data = buffer_records["kernel_dispatch"]
|
||||
memory_copies_data = buffer_records["memory_copies"]
|
||||
memory_copies_data = buffer_records["memory_copy"]
|
||||
assert len(kernel_dispatch_data) > 0
|
||||
assert len(memory_copies_data) > 0
|
||||
|
||||
# Expect stream ids to be set to 1 or 2 for transpose executable
|
||||
expected_stream_ids = set((1, 2))
|
||||
# Expect stream ids to be set between 1 and 8 inclusive for transpose executable
|
||||
expected_stream_ids = set([i for i in range(1, 9)])
|
||||
|
||||
# check buffering data
|
||||
for titr in (kernel_dispatch_data, memory_copies_data):
|
||||
for node in rocdecode_data:
|
||||
stream_id_set = set()
|
||||
for node in titr:
|
||||
assert "size" in node
|
||||
assert "kind" in node
|
||||
assert "operation" in node
|
||||
@@ -70,7 +72,7 @@ def test_stream_trace(json_data):
|
||||
assert "end_timestamp" in node
|
||||
assert "start_timestamp" in node
|
||||
assert "thread_id" in node
|
||||
assert "_stream_id" in node
|
||||
assert "stream_id" in node
|
||||
|
||||
assert node.size > 0
|
||||
assert node.thread_id > 0
|
||||
@@ -78,13 +80,15 @@ def test_stream_trace(json_data):
|
||||
assert node.end_timestamp > 0
|
||||
assert node.start_timestamp < node.end_timestamp
|
||||
|
||||
assert node._stream_id.handle in expected_stream_ids
|
||||
stream_id = node.stream_id.handle
|
||||
stream_id_set.add(stream_id)
|
||||
assert stream_id_set == expected_stream_ids
|
||||
|
||||
|
||||
def test_perfetto_data(pftrace_data, json_data):
|
||||
import rocprofiler_sdk.tests.rocprofv3 as rocprofv3
|
||||
|
||||
assert pftrace_data != None
|
||||
assert pftrace_data.empty == False
|
||||
rocprofv3.test_perfetto_data(
|
||||
pftrace_data,
|
||||
json_data,
|
||||
|
||||
Referencia en una nueva incidencia
Block a user