Fixing Perfetto Plugin & Updating Perfetto SDK
Change-Id: Idad07448a70b0b17acee899c0265147455a9d87c
[ROCm/rocprofiler commit: 5c8cb39b08]
このコミットが含まれているのは:
@@ -18,6 +18,7 @@
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#include "perfetto.h"
|
||||
#include "rocprofiler.h"
|
||||
|
||||
#include <cassert>
|
||||
@@ -60,6 +61,7 @@ PERFETTO_DEFINE_CATEGORIES(
|
||||
perfetto::Category("External_API").SetDescription("ACTIVITY_DOMAIN_EXT_API"),
|
||||
perfetto::Category("HIP_OPS").SetDescription("ACTIVITY_DOMAIN_HIP_OPS"),
|
||||
perfetto::Category("HSA_OPS").SetDescription("ACTIVITY_DOMAIN_HSA_OPS"),
|
||||
perfetto::Category("MEM_COPIES").SetDescription("MEMORY_COPY_ASYNCHRONOUS_ACTIVITY"),
|
||||
perfetto::Category("KERNELS").SetDescription("KERNEL_DISPATCHES"),
|
||||
perfetto::Category("COUNTERS").SetDescription("PERFORMANCE_COUNTERS"));
|
||||
|
||||
@@ -97,8 +99,7 @@ class perfetto_plugin_t {
|
||||
const char* temp_file_name = getenv("OUT_FILE_NAME");
|
||||
output_file_name = temp_file_name ? std::string(temp_file_name) + "_" : "";
|
||||
|
||||
if (output_dir == nullptr)
|
||||
output_dir = "./";
|
||||
if (output_dir == nullptr) output_dir = "./";
|
||||
|
||||
output_prefix_ = output_dir;
|
||||
if (!fs::is_directory(fs::status(output_prefix_))) {
|
||||
@@ -107,9 +108,11 @@ class perfetto_plugin_t {
|
||||
return;
|
||||
}
|
||||
|
||||
machine_id_ = gethostid();
|
||||
gethostname(hostname_, sizeof(hostname_));
|
||||
|
||||
perfetto::TracingInitArgs args;
|
||||
args.backends |= perfetto::kInProcessBackend;
|
||||
|
||||
perfetto::Tracing::Initialize(args);
|
||||
perfetto::TrackEvent::Register();
|
||||
|
||||
@@ -122,13 +125,14 @@ class perfetto_plugin_t {
|
||||
track_event_cfg.add_enabled_categories("External_API");
|
||||
track_event_cfg.add_enabled_categories("HIP_OPS");
|
||||
track_event_cfg.add_enabled_categories("HSA_OPS");
|
||||
track_event_cfg.add_enabled_categories("MEM_COPIES");
|
||||
track_event_cfg.add_enabled_categories("KERNELS");
|
||||
track_event_cfg.add_enabled_categories("COUNTERS");
|
||||
|
||||
perfetto::TraceConfig trace_cfg;
|
||||
|
||||
auto buffer_cfg = trace_cfg.add_buffers();
|
||||
uint32_t max_buffer_size = 10 * 1024 * 1024; // Default max buffer size is 10 GB
|
||||
uint32_t max_buffer_size = 1024 * 1024; // Default max buffer size is 1 GB
|
||||
const char* max_buffer_size_str = getenv("rocprofiler_PERFETTO_MAX_BUFFER_SIZE_KIB");
|
||||
if (max_buffer_size_str && std::atol(max_buffer_size_str) > 0)
|
||||
max_buffer_size = std::atol(max_buffer_size_str);
|
||||
@@ -148,26 +152,18 @@ class perfetto_plugin_t {
|
||||
tracing_session_->Setup(trace_cfg, file_descriptor_);
|
||||
tracing_session_->StartBlocking();
|
||||
|
||||
machine_id = gethostid();
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(thread_tracks_lock_);
|
||||
process_name =
|
||||
perfetto::ProcessTrack::Current().Serialize().mutable_process()->process_name();
|
||||
auto process_track_desc = perfetto::ProcessTrack::Current().Serialize();
|
||||
uint64_t track_id =
|
||||
track_counter_.fetch_add((1 + machine_id) * GetPid(), std::memory_order_acquire);
|
||||
for (uint64_t tid : track_ids_used_) {
|
||||
while (track_id == tid) {
|
||||
track_id =
|
||||
track_counter_.fetch_add((1 + machine_id) * GetPid(), std::memory_order_acquire);
|
||||
}
|
||||
}
|
||||
process_track_desc.mutable_process()->set_process_name(get_thread_track_str());
|
||||
perfetto::TrackEvent::SetTrackDescriptor(perfetto::ProcessTrack::Current(),
|
||||
process_track_desc);
|
||||
perfetto::ProcessTrack::Current().Serialize().set_uuid(track_id);
|
||||
thread_tracks_.emplace(GetPid(), perfetto::ProcessTrack::Current());
|
||||
}
|
||||
// Give a custom name for the traced process.
|
||||
perfetto::ProcessTrack process_track = perfetto::ProcessTrack::Current();
|
||||
perfetto::protos::gen::TrackDescriptor desc = process_track.Serialize();
|
||||
desc.mutable_process()->set_process_name("Node: " + std::string(hostname_) + " Rank " +
|
||||
std::to_string(MPI_rank));
|
||||
perfetto::TrackEvent::SetTrackDescriptor(process_track, desc);
|
||||
|
||||
mem_copies_track_.emplace(perfetto::Track::ThreadScoped("Memory Copies Operations"));
|
||||
perfetto::protos::gen::TrackDescriptor mem_copies_track_desc = mem_copies_track_->Serialize();
|
||||
mem_copies_track_desc.mutable_process()->set_process_name("Memory Copies Operations");
|
||||
mem_copies_track_desc.mutable_process()->set_pid(GetPid() * QUEUE_CONSTANT);
|
||||
perfetto::TrackEvent::SetTrackDescriptor(mem_copies_track_.value(), mem_copies_track_desc);
|
||||
|
||||
is_valid_ = true;
|
||||
}
|
||||
@@ -180,14 +176,13 @@ class perfetto_plugin_t {
|
||||
}
|
||||
|
||||
std::string replace_MPI_macros(std::string output_file_name) {
|
||||
std::vector<const char*> MPI_BUILTINS = {
|
||||
"MPI_RANK", "OMPI_COMM_WORLD_RANK", "MV2_COMM_WORLD_RANK"
|
||||
};
|
||||
std::vector<const char*> MPI_BUILTINS = {"MPI_RANK", "OMPI_COMM_WORLD_RANK",
|
||||
"MV2_COMM_WORLD_RANK"};
|
||||
bIsMPI = false;
|
||||
|
||||
for (const char* envvar : MPI_BUILTINS) {
|
||||
const char* rank_env_var = getenv(envvar);
|
||||
if (rank_env_var == nullptr) continue; // MPI var is does not exist
|
||||
if (rank_env_var == nullptr) continue; // MPI var is does not exist
|
||||
|
||||
MPI_rank = atoi(rank_env_var);
|
||||
bIsMPI = true;
|
||||
@@ -195,26 +190,13 @@ class perfetto_plugin_t {
|
||||
}
|
||||
|
||||
size_t key_find = output_file_name.rfind("%rank");
|
||||
if (key_find != std::string::npos) { // Contains a %?rank string
|
||||
output_file_name = output_file_name.substr(0, key_find) + std::to_string(MPI_rank)
|
||||
+ output_file_name.substr(key_find + std::string("%rank").size());
|
||||
if (key_find != std::string::npos) { // Contains a %?rank string
|
||||
output_file_name = output_file_name.substr(0, key_find) + std::to_string(MPI_rank) +
|
||||
output_file_name.substr(key_find + std::string("%rank").size());
|
||||
}
|
||||
return output_file_name;
|
||||
}
|
||||
|
||||
std::string get_thread_track_str() {
|
||||
if (!bIsMPI)
|
||||
rocprofiler::string_printf("Node: %s Process ID: %lu Thread ID:", hostname_, GetPid());
|
||||
std::stringstream thread_track_str;
|
||||
thread_track_str << "Rank: " << MPI_rank << " (" << hostname_
|
||||
<< ") Process ID:" << GetPid() << " Thread ID:";
|
||||
return thread_track_str.str();
|
||||
}
|
||||
|
||||
std::string get_device_track_str() {
|
||||
return rocprofiler::string_printf("Node: %s Device:", hostname_);
|
||||
}
|
||||
|
||||
const char* GetDomainName(rocprofiler_tracer_activity_domain_t domain) {
|
||||
switch (domain) {
|
||||
case ACTIVITY_DOMAIN_ROCTX:
|
||||
@@ -252,18 +234,20 @@ class perfetto_plugin_t {
|
||||
std::unordered_map<int, perfetto::Track>::iterator device_track_it;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(device_tracks_lock_);
|
||||
device_track_it = device_tracks_.find(device_id);
|
||||
uint64_t device_track_id = (device_id + 2) * (machine_id_ + 2);
|
||||
device_track_it = device_tracks_.find(device_track_id);
|
||||
if (device_track_it == device_tracks_.end()) {
|
||||
/* Create a new perfetto::Track (Sub-Track) */
|
||||
device_track_it =
|
||||
device_tracks_
|
||||
.emplace(device_id, perfetto::ProcessTrack::Global(((device_id + 1) * machine_id)))
|
||||
.first;
|
||||
device_tracks_.emplace(device_track_id, perfetto::Track::Global(device_track_id)).first;
|
||||
auto gpu_desc = device_track_it->second.Serialize();
|
||||
gpu_desc.mutable_process()->set_pid(device_id);
|
||||
gpu_desc.mutable_process()->set_process_name(get_device_track_str());
|
||||
gpu_desc.mutable_process()->set_chrome_process_type(
|
||||
perfetto::protos::gen::ProcessDescriptor::PROCESS_GPU);
|
||||
gpu_desc.mutable_process()->set_process_name("Node: " + std::string(hostname_) +
|
||||
" Device: ");
|
||||
perfetto::TrackEvent::SetTrackDescriptor(device_track_it->second, gpu_desc);
|
||||
track_ids_used_.emplace_back(device_id + 1 + machine_id);
|
||||
track_ids_used_.emplace_back(device_track_id);
|
||||
}
|
||||
}
|
||||
auto& gpu_track = device_track_it->second;
|
||||
@@ -272,25 +256,19 @@ class perfetto_plugin_t {
|
||||
auto queue_track_it = queue_tracks_.find(gpu_queue_id.first);
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(queue_tracks_lock_);
|
||||
queue_track_it = queue_tracks_.find(gpu_queue_id.first);
|
||||
uint64_t queue_track_id = (gpu_queue_id.first + 2) * (device_id + 2) * (machine_id_ + 2);
|
||||
queue_track_it = queue_tracks_.find(queue_track_id);
|
||||
if (queue_track_it == queue_tracks_.end()) {
|
||||
/* Create a new perfetto::Track */
|
||||
queue_track_it = queue_tracks_
|
||||
.emplace(gpu_queue_id.first,
|
||||
perfetto::Track((profiler_record.queue_id.handle + 1 +
|
||||
profiler_record.gpu_id.handle) *
|
||||
QUEUE_CONSTANT * machine_id * GetPid(),
|
||||
gpu_track))
|
||||
.first;
|
||||
queue_track_it =
|
||||
queue_tracks_.emplace(queue_track_id, perfetto::Track(queue_track_id, gpu_track)).first;
|
||||
|
||||
auto queue_desc = queue_track_it->second.Serialize();
|
||||
std::string queue_str =
|
||||
rocprofiler::string_printf("Process ID: %lu Queue %ld", GetPid(), gpu_queue_id.second);
|
||||
std::string queue_str = rocprofiler::string_printf("Queue %ld", gpu_queue_id.second);
|
||||
queue_desc.set_name(queue_str);
|
||||
perfetto::TrackEvent::SetTrackDescriptor(queue_track_it->second, queue_desc);
|
||||
}
|
||||
track_ids_used_.emplace_back(profiler_record.queue_id.handle + machine_id + 1 +
|
||||
profiler_record.gpu_id.handle);
|
||||
track_ids_used_.emplace_back(queue_track_id);
|
||||
}
|
||||
auto& queue_track = queue_track_it->second;
|
||||
|
||||
@@ -319,8 +297,7 @@ class perfetto_plugin_t {
|
||||
TRACE_EVENT_END("KERNELS", queue_track, profiler_record.timestamps.end.value);
|
||||
|
||||
auto get_counter_track_fn = [&](std::string counter_name) {
|
||||
std ::string counter_track_id =
|
||||
std::to_string(machine_id) + std::to_string(GetPid()) + counter_name;
|
||||
std ::string counter_track_id = hostname_ + std::to_string(GetPid()) + counter_name;
|
||||
std::pair<int, std::string> gpu_counter_track_id = std::make_pair(device_id, counter_name);
|
||||
std::unordered_map<std::string, perfetto::CounterTrack>::iterator counters_track_it;
|
||||
{
|
||||
@@ -335,8 +312,7 @@ class perfetto_plugin_t {
|
||||
.first;
|
||||
|
||||
auto counter_track_desc = counters_track_it->second.Serialize();
|
||||
std::string counter_track_str = "Process ID " + std::to_string(GetPid()) + " - Counter " +
|
||||
gpu_counter_track_id.second;
|
||||
std::string counter_track_str = "Counter " + gpu_counter_track_id.second;
|
||||
counter_track_desc.set_name(counter_track_str);
|
||||
perfetto::TrackEvent::SetTrackDescriptor(counters_track_it->second, counter_track_desc);
|
||||
}
|
||||
@@ -386,42 +362,39 @@ class perfetto_plugin_t {
|
||||
if (tracer_record.domain == ACTIVITY_DOMAIN_HIP_OPS ||
|
||||
tracer_record.domain == ACTIVITY_DOMAIN_HSA_OPS) {
|
||||
int device_id = tracer_record.agent_id.handle;
|
||||
if (tracer_record.domain == ACTIVITY_DOMAIN_HIP_OPS && device_id > 0) device_id--;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(device_tracks_lock_);
|
||||
device_track_it = device_tracks_.find(device_id);
|
||||
if (device_track_it == device_tracks_.end()) {
|
||||
/* Create a new perfetto::Track (Sub-Track) */
|
||||
device_track_it =
|
||||
device_tracks_
|
||||
.emplace(device_id,
|
||||
perfetto::ProcessTrack::Global(((device_id + 1) * machine_id)))
|
||||
.first;
|
||||
auto gpu_desc = device_track_it->second.Serialize();
|
||||
gpu_desc.mutable_process()->set_pid(device_id);
|
||||
gpu_desc.mutable_process()->set_process_name(get_device_track_str());
|
||||
perfetto::TrackEvent::SetTrackDescriptor(device_track_it->second, gpu_desc);
|
||||
track_ids_used_.emplace_back(1 + machine_id + device_id);
|
||||
}
|
||||
|
||||
std::lock_guard<std::mutex> lock(device_tracks_lock_);
|
||||
uint64_t device_track_id = (device_id + 2) * (machine_id_ + 2);
|
||||
device_track_it = device_tracks_.find(device_track_id);
|
||||
if (device_track_it == device_tracks_.end()) {
|
||||
/* Create a new perfetto::Track (Sub-Track) */
|
||||
device_track_it =
|
||||
device_tracks_.emplace(device_track_id, perfetto::ProcessTrack::Global(device_track_id))
|
||||
.first;
|
||||
auto gpu_desc = device_track_it->second.Serialize();
|
||||
gpu_desc.mutable_process()->set_pid(device_id);
|
||||
gpu_desc.mutable_process()->set_chrome_process_type(
|
||||
perfetto::protos::gen::ProcessDescriptor::PROCESS_GPU);
|
||||
gpu_desc.mutable_process()->set_process_name("Node: " + std::string(hostname_) +
|
||||
" Device: ");
|
||||
perfetto::TrackEvent::SetTrackDescriptor(device_track_it->second, gpu_desc);
|
||||
track_ids_used_.emplace_back(device_track_id);
|
||||
}
|
||||
} else {
|
||||
std::lock_guard<std::mutex> lock(thread_tracks_lock_);
|
||||
thread_track_it = thread_tracks_.find(thread_id);
|
||||
uint64_t thread_track_id = (thread_id + 2) * GetPid() * (machine_id_ + 2);
|
||||
thread_track_it = thread_tracks_.find(thread_track_id);
|
||||
if (thread_track_it == thread_tracks_.end()) {
|
||||
uint64_t track_id =
|
||||
track_counter_.fetch_add((1 + machine_id) * GetPid(), std::memory_order_acquire);
|
||||
for (uint64_t tid : track_ids_used_) {
|
||||
while (track_id == tid) {
|
||||
track_id =
|
||||
track_counter_.fetch_add((1 + machine_id) * GetPid(), std::memory_order_acquire);
|
||||
}
|
||||
}
|
||||
thread_track_it =
|
||||
thread_tracks_.emplace(thread_id, perfetto::ProcessTrack::Global(track_id)).first;
|
||||
thread_tracks_
|
||||
.emplace(thread_track_id,
|
||||
perfetto::Track(thread_track_id, perfetto::ProcessTrack::Current()))
|
||||
.first;
|
||||
auto thread_track_desc = thread_track_it->second.Serialize();
|
||||
thread_track_desc.mutable_process()->set_pid(thread_id);
|
||||
thread_track_desc.mutable_process()->set_process_name(get_thread_track_str());
|
||||
thread_track_desc.mutable_process()->set_process_name("Thread: ");
|
||||
perfetto::TrackEvent::SetTrackDescriptor(thread_track_it->second, thread_track_desc);
|
||||
track_ids_used_.emplace_back(thread_track_id);
|
||||
}
|
||||
}
|
||||
auto& thread_track = thread_track_it->second;
|
||||
@@ -435,10 +408,10 @@ class perfetto_plugin_t {
|
||||
if (roctx_track_it == roctx_tracks_.end()) {
|
||||
/* Create a new perfetto::Track */
|
||||
uint64_t track_id =
|
||||
track_counter_.fetch_add((1 + machine_id) * GetPid(), std::memory_order_acquire);
|
||||
track_counter_.fetch_add((1 + machine_id_) * GetPid(), std::memory_order_acquire);
|
||||
for (uint64_t tid : track_ids_used_) {
|
||||
while (track_id == tid) {
|
||||
track_id = track_counter_.fetch_add((1 + machine_id) * GetPid(),
|
||||
track_id = track_counter_.fetch_add((1 + machine_id_) * GetPid(),
|
||||
std::memory_order_acquire);
|
||||
}
|
||||
}
|
||||
@@ -475,10 +448,10 @@ class perfetto_plugin_t {
|
||||
if (hsa_track_it == hsa_tracks_.end()) {
|
||||
/* Create a new perfetto::Track */
|
||||
uint64_t track_id =
|
||||
track_counter_.fetch_add((1 + machine_id) * GetPid(), std::memory_order_acquire);
|
||||
track_counter_.fetch_add((1 + machine_id_) * GetPid(), std::memory_order_acquire);
|
||||
for (uint64_t tid : track_ids_used_) {
|
||||
while (track_id == tid) {
|
||||
track_id = track_counter_.fetch_add((1 + machine_id) * GetPid(),
|
||||
track_id = track_counter_.fetch_add((1 + machine_id_) * GetPid(),
|
||||
std::memory_order_acquire);
|
||||
}
|
||||
}
|
||||
@@ -526,10 +499,10 @@ class perfetto_plugin_t {
|
||||
if (hip_track_it == hip_tracks_.end()) {
|
||||
/* Create a new perfetto::Track */
|
||||
uint64_t track_id =
|
||||
track_counter_.fetch_add((1 + machine_id) * GetPid(), std::memory_order_acquire);
|
||||
track_counter_.fetch_add((1 + machine_id_) * GetPid(), std::memory_order_acquire);
|
||||
for (uint64_t tid : track_ids_used_) {
|
||||
while (track_id == tid) {
|
||||
track_id = track_counter_.fetch_add((1 + machine_id) * GetPid(),
|
||||
track_id = track_counter_.fetch_add((1 + machine_id_) * GetPid(),
|
||||
std::memory_order_acquire);
|
||||
}
|
||||
}
|
||||
@@ -591,36 +564,38 @@ class perfetto_plugin_t {
|
||||
case ACTIVITY_DOMAIN_HIP_OPS: {
|
||||
// TODO(aelwazir): Stream ID is removed from the API processing.
|
||||
// Waiting for better implementation to get the stream id.
|
||||
uint64_t stream_id = 0;
|
||||
std::unordered_map<int, perfetto::Track>::iterator stream_track_it;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(stream_tracks_lock_);
|
||||
stream_track_it = stream_tracks_.find(stream_id);
|
||||
if (stream_track_it == stream_tracks_.end()) {
|
||||
/* Create a new perfetto::Track */
|
||||
uint64_t track_id = ((1 + stream_id + tracer_record.agent_id.handle) * machine_id *
|
||||
STREAM_CONSTANT * GetPid());
|
||||
stream_track_it =
|
||||
stream_tracks_.emplace(stream_id, perfetto::Track(track_id, gpu_track)).first;
|
||||
// uint64_t stream_id = 0;
|
||||
// std::unordered_map<int, perfetto::Track>::iterator stream_track_it;
|
||||
// {
|
||||
// std::lock_guard<std::mutex> lock(stream_tracks_lock_);
|
||||
// uint64_t stream_track_id = (stream_id + STREAM_CONSTANT) *
|
||||
// (tracer_record.agent_id.handle + 2) * (machine_id_ + 2);
|
||||
// stream_track_it = stream_tracks_.find(stream_track_id);
|
||||
// if (stream_track_it == stream_tracks_.end()) {
|
||||
// /* Create a new perfetto::Track */
|
||||
// stream_track_it =
|
||||
// stream_tracks_.emplace(stream_track_id, perfetto::Track(stream_track_id,
|
||||
// gpu_track))
|
||||
// .first;
|
||||
|
||||
auto stream_desc = stream_track_it->second.Serialize();
|
||||
std::string stream_str =
|
||||
rocprofiler::string_printf("Process ID: %lu Stream %d", GetPid(), stream_id);
|
||||
stream_desc.set_name(stream_str);
|
||||
perfetto::TrackEvent::SetTrackDescriptor(stream_track_it->second, stream_desc);
|
||||
track_ids_used_.emplace_back(1 + machine_id + tracer_record.agent_id.handle);
|
||||
}
|
||||
}
|
||||
auto& stream_track = stream_track_it->second;
|
||||
// auto stream_desc = stream_track_it->second.Serialize();
|
||||
// std::string stream_str = rocprofiler::string_printf("Stream %d", stream_id);
|
||||
// stream_desc.set_name(stream_str);
|
||||
// perfetto::TrackEvent::SetTrackDescriptor(stream_track_it->second, stream_desc);
|
||||
// track_ids_used_.emplace_back(stream_track_id);
|
||||
// }
|
||||
// }
|
||||
// auto& stream_track = stream_track_it->second;
|
||||
rocprofiler_timestamp_t timestamp;
|
||||
rocprofiler_get_timestamp(×tamp);
|
||||
std::string::size_type pos = std::string::npos;
|
||||
if (tracer_record.api_data_handle.handle && tracer_record.api_data_handle.size > 1) {
|
||||
kernel_name = rocprofiler::cxx_demangle(
|
||||
strdup(reinterpret_cast<const char*>(tracer_record.api_data_handle.handle)));
|
||||
TRACE_EVENT_BEGIN(
|
||||
"HIP_OPS",
|
||||
perfetto::StaticString(strdup(rocprofiler::truncate_name(kernel_name).c_str())),
|
||||
stream_track, tracer_record.timestamps.begin.value, "Agent ID",
|
||||
gpu_track, tracer_record.timestamps.begin.value, "Agent ID",
|
||||
tracer_record.agent_id.handle, "Process ID", GetPid(), "Kernel Name", kernel_name,
|
||||
perfetto::Flow::ProcessScoped(tracer_record.correlation_id.value));
|
||||
} else {
|
||||
@@ -636,49 +611,65 @@ class perfetto_plugin_t {
|
||||
} else {
|
||||
activity_name = const_cast<char*>(std::string("N/A").c_str());
|
||||
}
|
||||
if (tracer_record.phase == ROCPROFILER_PHASE_NONE)
|
||||
TRACE_EVENT_BEGIN("HIP_OPS", perfetto::StaticString(activity_name), stream_track,
|
||||
tracer_record.timestamps.begin.value, "Agent ID",
|
||||
tracer_record.agent_id.handle, "Process ID", GetPid(),
|
||||
perfetto::Flow::ProcessScoped(tracer_record.correlation_id.value));
|
||||
else if (tracer_record.phase == ROCPROFILER_PHASE_ENTER)
|
||||
TRACE_EVENT_BEGIN("HIP_OPS", perfetto::StaticString(activity_name), stream_track,
|
||||
timestamp.value, "Agent ID", tracer_record.agent_id.handle,
|
||||
"Process ID", GetPid(),
|
||||
perfetto::Flow::ProcessScoped(tracer_record.correlation_id.value));
|
||||
pos = std::string(activity_name).find("Copy");
|
||||
if (tracer_record.phase == ROCPROFILER_PHASE_NONE) {
|
||||
if (std::string::npos == pos)
|
||||
TRACE_EVENT_BEGIN("HIP_OPS", perfetto::StaticString(activity_name), gpu_track,
|
||||
tracer_record.timestamps.begin.value, "Process ID", GetPid(),
|
||||
perfetto::Flow::ProcessScoped(tracer_record.correlation_id.value));
|
||||
else
|
||||
TRACE_EVENT_BEGIN("MEM_COPIES", perfetto::StaticString(activity_name),
|
||||
mem_copies_track_.value(), tracer_record.timestamps.begin.value,
|
||||
"Process ID", GetPid(),
|
||||
perfetto::Flow::ProcessScoped(tracer_record.correlation_id.value));
|
||||
} else if (tracer_record.phase == ROCPROFILER_PHASE_ENTER) {
|
||||
if (std::string::npos == pos)
|
||||
TRACE_EVENT_BEGIN("HIP_OPS", perfetto::StaticString(activity_name), gpu_track,
|
||||
timestamp.value, "Process ID", GetPid(),
|
||||
perfetto::Flow::ProcessScoped(tracer_record.correlation_id.value));
|
||||
else
|
||||
TRACE_EVENT_BEGIN("MEM_COPIES", perfetto::StaticString(activity_name),
|
||||
mem_copies_track_.value(), timestamp.value, "Process ID", GetPid(),
|
||||
perfetto::Flow::ProcessScoped(tracer_record.correlation_id.value));
|
||||
}
|
||||
}
|
||||
if (tracer_record.phase == ROCPROFILER_PHASE_NONE) {
|
||||
if (std::string::npos == pos)
|
||||
TRACE_EVENT_END("HIP_OPS", gpu_track, tracer_record.timestamps.end.value);
|
||||
else
|
||||
TRACE_EVENT_END("MEM_COPIES", mem_copies_track_.value(),
|
||||
tracer_record.timestamps.end.value);
|
||||
} else if (tracer_record.phase == ROCPROFILER_PHASE_EXIT) {
|
||||
if (std::string::npos == pos)
|
||||
TRACE_EVENT_END("HIP_OPS", gpu_track, timestamp.value);
|
||||
else
|
||||
TRACE_EVENT_END("MEM_COPIES", mem_copies_track_.value(), timestamp.value);
|
||||
}
|
||||
if (tracer_record.phase == ROCPROFILER_PHASE_NONE)
|
||||
TRACE_EVENT_END("HIP_OPS", stream_track, tracer_record.timestamps.end.value);
|
||||
else if (tracer_record.phase == ROCPROFILER_PHASE_EXIT)
|
||||
TRACE_EVENT_END("HIP_OPS", stream_track, timestamp.value);
|
||||
break;
|
||||
}
|
||||
case ACTIVITY_DOMAIN_HSA_OPS: {
|
||||
std::pair<int, uint64_t> gpu_queue_id =
|
||||
std::make_pair(tracer_record.agent_id.handle, tracer_record.queue_id.handle);
|
||||
std::unordered_map<int, perfetto::Track>::iterator queue_track_it;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(queue_tracks_lock_);
|
||||
queue_track_it = queue_tracks_.find(gpu_queue_id.first);
|
||||
if (queue_track_it == queue_tracks_.end()) {
|
||||
/* Create a new perfetto::Track */
|
||||
uint64_t track_id =
|
||||
((1 + tracer_record.queue_id.handle + tracer_record.agent_id.handle) * machine_id *
|
||||
QUEUE_CONSTANT * GetPid());
|
||||
queue_track_it =
|
||||
queue_tracks_.emplace(gpu_queue_id.first, perfetto::Track(track_id, gpu_track))
|
||||
.first;
|
||||
// std::pair<int, uint64_t> gpu_queue_id =
|
||||
// std::make_pair(tracer_record.agent_id.handle, tracer_record.queue_id.handle);
|
||||
// std::unordered_map<int, perfetto::Track>::iterator queue_track_it;
|
||||
// {
|
||||
// std::lock_guard<std::mutex> lock(queue_tracks_lock_);
|
||||
// uint64_t queue_track_id = (tracer_record.queue_id.handle + 2) *
|
||||
// (tracer_record.agent_id.handle + 2) * (machine_id_ + 2);
|
||||
// queue_track_it = queue_tracks_.find(queue_track_id);
|
||||
// if (queue_track_it == queue_tracks_.end()) {
|
||||
// /* Create a new perfetto::Track */
|
||||
// queue_track_it =
|
||||
// queue_tracks_.emplace(queue_track_id, perfetto::Track(queue_track_id, gpu_track))
|
||||
// .first;
|
||||
|
||||
auto queue_desc = queue_track_it->second.Serialize();
|
||||
std::string queue_str = rocprofiler::string_printf("Process ID: %lu Queue %ld", GetPid(),
|
||||
gpu_queue_id.second);
|
||||
queue_desc.set_name(queue_str);
|
||||
perfetto::TrackEvent::SetTrackDescriptor(queue_track_it->second, queue_desc);
|
||||
}
|
||||
track_ids_used_.emplace_back(tracer_record.queue_id.handle + machine_id + 1 +
|
||||
tracer_record.agent_id.handle);
|
||||
}
|
||||
auto& queue_track = queue_track_it->second;
|
||||
// auto queue_desc = queue_track_it->second.Serialize();
|
||||
// std::string queue_str = rocprofiler::string_printf("Queue %ld", gpu_queue_id.second);
|
||||
// queue_desc.set_name(queue_str);
|
||||
// perfetto::TrackEvent::SetTrackDescriptor(queue_track_it->second, queue_desc);
|
||||
// }
|
||||
// track_ids_used_.emplace_back(queue_track_id);
|
||||
// }
|
||||
// auto& queue_track = queue_track_it->second;
|
||||
size_t activity_name_size = 0;
|
||||
CHECK_ROCPROFILER(rocprofiler_query_hsa_tracer_api_data_info_size(
|
||||
session_id, ROCPROFILER_HSA_ACTIVITY_NAME, tracer_record.api_data_handle,
|
||||
@@ -689,12 +680,12 @@ class perfetto_plugin_t {
|
||||
session_id, ROCPROFILER_HSA_ACTIVITY_NAME, tracer_record.api_data_handle,
|
||||
tracer_record.operation_id, &activity_name));
|
||||
}
|
||||
TRACE_EVENT_BEGIN("HSA_OPS", perfetto::StaticString(activity_name), queue_track,
|
||||
tracer_record.timestamps.begin.value, "Agent ID",
|
||||
tracer_record.agent_id.handle, "Queue ID", tracer_record.queue_id.handle,
|
||||
TRACE_EVENT_BEGIN("MEM_COPIES", perfetto::StaticString(activity_name),
|
||||
mem_copies_track_.value(), tracer_record.timestamps.begin.value,
|
||||
"Process ID", GetPid(),
|
||||
perfetto::Flow::ProcessScoped(tracer_record.correlation_id.value));
|
||||
TRACE_EVENT_END("HSA_OPS", queue_track, tracer_record.timestamps.end.value);
|
||||
TRACE_EVENT_END("MEM_COPIES", mem_copies_track_.value(),
|
||||
tracer_record.timestamps.end.value);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
@@ -743,6 +734,7 @@ class perfetto_plugin_t {
|
||||
bool bIsMPI = false;
|
||||
int MPI_rank = 0;
|
||||
size_t roctx_track_entries_{0};
|
||||
std::optional<perfetto::Track> mem_copies_track_;
|
||||
|
||||
// Correlate stream id(s) with correlation id(s) to identify the stream id of every HIP activity
|
||||
std::unordered_map<uint64_t, uint64_t> stream_ids_;
|
||||
@@ -766,7 +758,7 @@ class perfetto_plugin_t {
|
||||
stream_tracks_lock_, counter_tracks_lock_;
|
||||
|
||||
char hostname_[1024];
|
||||
uint64_t machine_id;
|
||||
uint64_t machine_id_;
|
||||
|
||||
std::ofstream stream_;
|
||||
};
|
||||
|
||||
ファイル差分が大きすぎるため省略します
差分を読み込み
ファイル差分が大きすぎるため省略します
差分を読み込み
@@ -42,6 +42,7 @@
|
||||
#include <vector>
|
||||
|
||||
#include "core/hardware/hsa_info.h"
|
||||
#include "core/hsa/hsa_common.h"
|
||||
#include "src/core/session/tracer/src/correlation_id.h"
|
||||
#include "src/core/session/tracer/src/exception.h"
|
||||
#include "src/core/session/tracer/src/roctracer.h"
|
||||
@@ -145,6 +146,7 @@ class Tracker {
|
||||
void (*handler)(const entry_t*);
|
||||
union {
|
||||
struct {
|
||||
hsa_agent_t dst_agent;
|
||||
} copy;
|
||||
struct {
|
||||
const char* name;
|
||||
@@ -168,7 +170,7 @@ class Tracker {
|
||||
|
||||
// Creating a proxy signal
|
||||
status = rocprofiler::hsa_support::GetCoreApiTable().hsa_signal_create_fn(1, 0, NULL,
|
||||
&(entry->signal));
|
||||
&(entry->signal));
|
||||
if (status != HSA_STATUS_SUCCESS) rocprofiler::fatal("hsa_signal_create failed");
|
||||
status = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_signal_async_handler_fn(
|
||||
entry->signal, HSA_SIGNAL_CONDITION_LT, 1, Handler, entry);
|
||||
@@ -332,7 +334,8 @@ hsa_status_t MemoryPoolAllocateIntercept(hsa_amd_memory_pool_t pool, size_t size
|
||||
return HSA_STATUS_SUCCESS;
|
||||
|
||||
auto it = agent_info_map.find(agent.handle);
|
||||
if (it == agent_info_map.end()) rocprofiler::fatal("agent was not found in the agent_info map");
|
||||
if (it == agent_info_map.end())
|
||||
rocprofiler::fatal("agent was not found in the agent_info map");
|
||||
|
||||
hsa_evt_data_t data{};
|
||||
data.device.type = it->second.type;
|
||||
@@ -343,7 +346,8 @@ hsa_status_t MemoryPoolAllocateIntercept(hsa_amd_memory_pool_t pool, size_t size
|
||||
ReportActivity(ACTIVITY_DOMAIN_HSA_EVT, HSA_EVT_ID_DEVICE, &data);
|
||||
return HSA_STATUS_SUCCESS;
|
||||
};
|
||||
rocprofiler::hsa_support::GetCoreApiTable().hsa_iterate_agents_fn(agent_callback, &callback_data);
|
||||
rocprofiler::hsa_support::GetCoreApiTable().hsa_iterate_agents_fn(agent_callback,
|
||||
&callback_data);
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
@@ -374,7 +378,8 @@ hsa_status_t AgentsAllowAccessIntercept(uint32_t num_agents, const hsa_agent_t*
|
||||
while (num_agents--) {
|
||||
hsa_agent_t agent = *agents++;
|
||||
auto it = agent_info_map.find(agent.handle);
|
||||
if (it == agent_info_map.end()) rocprofiler::fatal("agent was not found in the agent_info map");
|
||||
if (it == agent_info_map.end())
|
||||
rocprofiler::fatal("agent was not found in the agent_info map");
|
||||
|
||||
hsa_evt_data_t data{};
|
||||
data.device.type = it->second.type;
|
||||
@@ -508,7 +513,11 @@ void MemoryASyncCopyHandler(const Tracker::entry_t* entry) {
|
||||
record.op = HSA_OP_ID_COPY;
|
||||
record.begin_ns = entry->begin;
|
||||
record.end_ns = entry->end;
|
||||
record.device_id = 0;
|
||||
record.device_id = (entry->agent.handle > 0)
|
||||
? rocprofiler::hsa_support::GetAgentInfo(entry->agent.handle).getIndex()
|
||||
: (entry->copy.dst_agent.handle > 0)
|
||||
? rocprofiler::hsa_support::GetAgentInfo(entry->copy.dst_agent.handle).getIndex()
|
||||
: 0;
|
||||
record.correlation_id = entry->correlation_id;
|
||||
ReportActivity(ACTIVITY_DOMAIN_HSA_OPS, HSA_OP_ID_COPY, &record);
|
||||
}
|
||||
@@ -533,6 +542,8 @@ hsa_status_t MemoryASyncCopyIntercept(void* dst, hsa_agent_t dst_agent, const vo
|
||||
Tracker::entry_t* entry = new Tracker::entry_t();
|
||||
entry->handler = MemoryASyncCopyHandler;
|
||||
entry->correlation_id = CorrelationId();
|
||||
entry->agent = src_agent;
|
||||
entry->copy.dst_agent = dst_agent;
|
||||
Tracker::Enable(Tracker::COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry);
|
||||
|
||||
status = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_memory_async_copy_fn(
|
||||
@@ -566,6 +577,7 @@ hsa_status_t MemoryASyncCopyRectIntercept(const hsa_pitched_ptr_t* dst,
|
||||
Tracker::entry_t* entry = new Tracker::entry_t();
|
||||
entry->handler = MemoryASyncCopyHandler;
|
||||
entry->correlation_id = CorrelationId();
|
||||
entry->agent = copy_agent;
|
||||
Tracker::Enable(Tracker::COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry);
|
||||
|
||||
status = rocprofiler::hsa_support::GetAmdExtTable().hsa_amd_memory_async_copy_rect_fn(
|
||||
@@ -596,6 +608,8 @@ hsa_status_t MemoryASyncCopyOnEngineIntercept(
|
||||
Tracker::entry_t* entry = new Tracker::entry_t();
|
||||
entry->handler = MemoryASyncCopyHandler;
|
||||
entry->correlation_id = CorrelationId();
|
||||
entry->agent = src_agent;
|
||||
entry->copy.dst_agent = dst_agent;
|
||||
Tracker::Enable(Tracker::COPY_ENTRY_TYPE, hsa_agent_t{}, completion_signal, entry);
|
||||
|
||||
status = saved_amd_ext_api.hsa_amd_memory_async_copy_on_engine_fn(
|
||||
@@ -846,6 +860,7 @@ void Initialize(HsaApiTable* table) {
|
||||
cpu_agent = agent;
|
||||
rocprofiler::queue::InitializePools(cpu_agent, &agent_info);
|
||||
uint32_t cpu_numa_node_id;
|
||||
// Change into KFD GPU ID
|
||||
if (GetCoreApiTable().hsa_agent_get_info_fn(
|
||||
agent, HSA_AGENT_INFO_NODE, &cpu_numa_node_id) != HSA_STATUS_SUCCESS)
|
||||
rocprofiler::fatal("hsa_agent_get_info(HSA_AGENT_INFO_NODE) failed");
|
||||
|
||||
新しいイシューから参照
ユーザーをブロックする