Reimplement rocpd conversion to otf2 in Python (#1051)
This commit is contained in:
committed by
GitHub
parent
9baa65a8b7
commit
d18ab20f7d
@@ -25,7 +25,6 @@
|
||||
#include "lib/python/rocpd/source/common.hpp"
|
||||
#include "lib/python/rocpd/source/functions.hpp"
|
||||
#include "lib/python/rocpd/source/interop.hpp"
|
||||
#include "lib/python/rocpd/source/otf2.hpp"
|
||||
#include "lib/python/rocpd/source/perfetto.hpp"
|
||||
#include "lib/python/rocpd/source/serialization/sql.hpp"
|
||||
#include "lib/python/rocpd/source/sql_generator.hpp"
|
||||
@@ -508,147 +507,6 @@ PYBIND11_MODULE(libpyrocpd, pyrocpd)
|
||||
},
|
||||
"Write pftrace output file from rocpd SQLite3 database");
|
||||
|
||||
pyrocpd.def(
|
||||
"write_otf2",
|
||||
[](rocpd::RocpdImportData& data, const tool::output_config& output_cfg) {
|
||||
auto _create_agent_index =
|
||||
[&output_cfg](const rocpd::types::agent& _agent) -> tool::agent_index {
|
||||
auto ret_index = tool::create_agent_index(
|
||||
output_cfg.agent_index_value,
|
||||
_agent.node_id, // absolute index
|
||||
static_cast<uint32_t>(_agent.logical_node_id), // relative index
|
||||
static_cast<uint32_t>(_agent.logical_node_type_id), // type-relative index
|
||||
std::string_view(_agent.type));
|
||||
return ret_index;
|
||||
};
|
||||
|
||||
constexpr auto kernels_order_by =
|
||||
"agent_abs_index ASC, stream_id ASC, queue_id ASC, start ASC, end DESC";
|
||||
|
||||
// to initialise the OTF@ session properly we need to know:
|
||||
// (1) the process with the earliest start time
|
||||
// (2) find the process with the longest duration
|
||||
uint64_t min_start_time = std::numeric_limits<uint64_t>::max();
|
||||
uint64_t max_fini_time = std::numeric_limits<uint64_t>::min();
|
||||
for(auto obj : {data.connection})
|
||||
{
|
||||
auto* conn = rocpd::interop::get_connection(std::move(obj));
|
||||
|
||||
// min start
|
||||
sqlite3_stmt* _stmt_min_start_max_fini = nullptr;
|
||||
uint64_t _min_start_time = std::numeric_limits<uint64_t>::max();
|
||||
uint64_t _max_fini_time = std::numeric_limits<uint64_t>::min();
|
||||
|
||||
sqlite3_prepare_v2(conn,
|
||||
"SELECT MIN(start), MAX(fini) FROM processes;",
|
||||
-1,
|
||||
&_stmt_min_start_max_fini,
|
||||
nullptr);
|
||||
if(sqlite3_step(_stmt_min_start_max_fini) == SQLITE_ROW)
|
||||
{
|
||||
_min_start_time =
|
||||
static_cast<uint64_t>(sqlite3_column_int64(_stmt_min_start_max_fini, 0));
|
||||
_max_fini_time =
|
||||
static_cast<uint64_t>(sqlite3_column_int64(_stmt_min_start_max_fini, 1));
|
||||
}
|
||||
|
||||
sqlite3_finalize(_stmt_min_start_max_fini);
|
||||
min_start_time = std::min(min_start_time, _min_start_time);
|
||||
max_fini_time = std::max(max_fini_time, _max_fini_time);
|
||||
}
|
||||
|
||||
auto otf2_session =
|
||||
rocpd::output::OTF2Session(output_cfg, min_start_time, max_fini_time);
|
||||
|
||||
auto sqlgen_otf2 = common::simple_timer{
|
||||
fmt::format("OTF2 generation from {} SQL database(s)", data.size())};
|
||||
|
||||
uint16_t _process_counter = 0;
|
||||
for(auto obj : {data.connection})
|
||||
{
|
||||
auto* conn = rocpd::interop::get_connection(std::move(obj));
|
||||
auto nodes = rocpd::read<rocpd::types::node>(conn);
|
||||
for(const auto& nitr : nodes)
|
||||
{
|
||||
auto agents = rocpd::read<rocpd::types::agent>(
|
||||
conn, fmt::format("WHERE guid = '{}' AND nid = {}", nitr.guid, nitr.id));
|
||||
auto processes = rocpd::read<rocpd::types::process>(
|
||||
conn, fmt::format("WHERE guid = '{}' AND nid = {}", nitr.guid, nitr.id));
|
||||
|
||||
// absolute_index |-> (agent, agent_index)
|
||||
auto agents_map = std::unordered_map<uint64_t, rocpd::output::extended_agent>{};
|
||||
|
||||
for(const auto& itr : agents)
|
||||
{
|
||||
const rocprofiler::tool::agent_index new_index = _create_agent_index(itr);
|
||||
const std::string labeled_name = fmt::format("{}", itr.name);
|
||||
agents_map.emplace(
|
||||
itr.absolute_index,
|
||||
rocpd::output::extended_agent{itr, new_index, labeled_name});
|
||||
}
|
||||
|
||||
for(const auto& pitr : processes)
|
||||
{
|
||||
ROCP_FATAL_IF(pitr.nid != nitr.id || pitr.guid != nitr.guid)
|
||||
<< fmt::format("Found process with a mismatched nid/guid. process: "
|
||||
"{}/{} vs. node: {}/{}",
|
||||
pitr.nid,
|
||||
pitr.guid,
|
||||
nitr.id,
|
||||
nitr.guid);
|
||||
|
||||
auto select_guid_nid_pid =
|
||||
[&nitr, &pitr](std::string_view tbl,
|
||||
std::string_view where_extra_condition = "") {
|
||||
return fmt::format("SELECT * FROM {} WHERE guid = '{}' AND "
|
||||
"nid = {} AND pid = {} {}",
|
||||
tbl,
|
||||
pitr.guid,
|
||||
nitr.id,
|
||||
pitr.pid,
|
||||
where_extra_condition);
|
||||
};
|
||||
|
||||
constexpr auto region_order_by = "start ASC, end DESC";
|
||||
|
||||
auto _sqlgen_otf2 = common::simple_timer{fmt::format(
|
||||
"OTF2 generation from SQL for process {} (total)", pitr.pid)};
|
||||
|
||||
auto kernels = rocpd::sql_generator<rocpd::types::kernel_dispatch>{
|
||||
conn, select_guid_nid_pid("kernels"), kernels_order_by};
|
||||
|
||||
auto memory_allocations =
|
||||
rocpd::sql_generator<rocpd::types::memory_allocation>{
|
||||
conn, select_guid_nid_pid("memory_allocations"), region_order_by};
|
||||
|
||||
auto memory_copies = rocpd::sql_generator<rocpd::types::memory_copies>{
|
||||
conn, select_guid_nid_pid("memory_copies"), region_order_by};
|
||||
|
||||
auto regions = rocpd::sql_generator<rocpd::types::region>{
|
||||
conn, select_guid_nid_pid("regions"), region_order_by};
|
||||
|
||||
auto threads = rocpd::sql_generator<rocpd::types::thread>{
|
||||
conn, select_guid_nid_pid("threads")};
|
||||
|
||||
ROCP_TRACE << "Starting OTF2 generation from SQL for process " << pitr.pid;
|
||||
auto _sqlgen_perfw = common::simple_timer{fmt::format(
|
||||
"OTF2 generation from SQL for process {} (write)", pitr.pid)};
|
||||
rocpd::output::write_otf2(otf2_session,
|
||||
pitr,
|
||||
_process_counter,
|
||||
agents_map,
|
||||
threads,
|
||||
regions,
|
||||
kernels,
|
||||
memory_copies,
|
||||
memory_allocations);
|
||||
_process_counter++;
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"Write OTF2 output file from rocpd SQLite3 database");
|
||||
|
||||
// NOLINTEND(performance-unnecessary-value-param)
|
||||
|
||||
// reads in all the agent info from database
|
||||
|
||||
@@ -23,13 +23,440 @@
|
||||
# THE SOFTWARE.
|
||||
###############################################################################
|
||||
|
||||
import os
|
||||
import otf2
|
||||
from otf2.enums import LocationType, LocationGroupType, RegionRole, Paradigm
|
||||
import shutil
|
||||
import time
|
||||
from collections import defaultdict
|
||||
|
||||
from .importer import RocpdImportData
|
||||
from . import output_config
|
||||
from . import libpyrocpd
|
||||
|
||||
|
||||
def get_perfetto_category_name(category):
|
||||
"""Map category names to perfetto category names"""
|
||||
category_map = {
|
||||
"NONE": "none",
|
||||
"HSA_CORE_API": "hsa_api",
|
||||
"HSA_AMD_EXT_API": "hsa_api",
|
||||
"HSA_IMAGE_EXT_API": "hsa_api",
|
||||
"HSA_FINALIZE_EXT_API": "hsa_api",
|
||||
"HIP_RUNTIME_API": "hip_api",
|
||||
"HIP_COMPILER_API": "hip_api",
|
||||
"MARKER_CORE_API": "marker_api",
|
||||
"MARKER_CORE_RANGE_API": "marker_api",
|
||||
"MARKER_CONTROL_API": "marker_api",
|
||||
"MARKER_NAME_API": "marker_api",
|
||||
"MEMORY_COPY": "memory_copy",
|
||||
"MEMORY_ALLOCATION": "memory_allocation",
|
||||
"KERNEL_DISPATCH": "kernel_dispatch",
|
||||
"SCRATCH_MEMORY": "scratch_memory",
|
||||
"CORRELATION_ID_RETIREMENT": "none",
|
||||
"RCCL_API": "rccl_api",
|
||||
"OMPT": "openmp",
|
||||
"RUNTIME_INITIALIZATION": "none",
|
||||
"ROCDECODE_API": "rocdecode_api",
|
||||
"ROCJPEG_API": "rocjpeg_api",
|
||||
"HIP_STREAM": "hip_api",
|
||||
"HIP_RUNTIME_API_EXT": "hip_api",
|
||||
"HIP_COMPILER_API_EXT": "hip_api",
|
||||
"ROCDECODE_API_EXT": "rocdecode_api",
|
||||
"KFD_EVENT_PAGE_MIGRATE": "kfd_events",
|
||||
"KFD_EVENT_PAGE_FAULT": "kfd_events",
|
||||
"KFD_EVENT_QUEUE": "kfd_events",
|
||||
"KFD_EVENT_UNMAP_FROM_GPU": "kfd_events",
|
||||
"KFD_EVENT_DROPPED_EVENTS": "kfd_events",
|
||||
"KFD_PAGE_MIGRATE": "kfd_events",
|
||||
"KFD_PAGE_FAULT": "kfd_events",
|
||||
"KFD_QUEUE": "kfd_events",
|
||||
}
|
||||
return category_map.get(category, "none")
|
||||
|
||||
|
||||
def allocation_level_type_name(level, type):
|
||||
if level == "REAL":
|
||||
name = "MEMORY"
|
||||
elif level == "VIRTUAL":
|
||||
name = "MEMORY_VMEM"
|
||||
elif level == "SCRATCH":
|
||||
name = "SCRATCH_MEMORY"
|
||||
else:
|
||||
return "UNKNOWN_LEVEL"
|
||||
|
||||
if type == "ALLOC":
|
||||
return name + "_ALLOCATE"
|
||||
elif type == "FREE":
|
||||
return name + "_FREE"
|
||||
else:
|
||||
return level + "_MEMORY_NONE"
|
||||
|
||||
return name
|
||||
|
||||
|
||||
def write_otf2(importData, config):
|
||||
return libpyrocpd.write_otf2(importData, config)
|
||||
|
||||
timer_resolution = 1_000_000_000
|
||||
trace_dir = getattr(config, "output_path", "./otf_traces")
|
||||
trace_file = f"{getattr(config, 'output_file', 'traces')}_results"
|
||||
if not os.path.exists(trace_dir):
|
||||
os.makedirs(trace_dir)
|
||||
|
||||
if os.path.exists(trace_dir):
|
||||
trace_subdir = os.path.join(trace_dir, trace_file)
|
||||
if os.path.exists(trace_subdir):
|
||||
shutil.rmtree(trace_subdir)
|
||||
|
||||
otf2_file = os.path.join(trace_dir, f"{trace_file}.otf2")
|
||||
def_file = os.path.join(trace_dir, f"{trace_file}.def")
|
||||
|
||||
print(f"Writing: {otf2_file}")
|
||||
print(f"Writing: {def_file}")
|
||||
print(f"Writing: {trace_subdir} directory")
|
||||
|
||||
if os.path.exists(otf2_file):
|
||||
os.remove(otf2_file)
|
||||
if os.path.exists(def_file):
|
||||
os.remove(def_file)
|
||||
with otf2.writer.open(
|
||||
trace_dir, trace_file, timer_resolution=timer_resolution
|
||||
) as archive:
|
||||
conn = getattr(importData, "connection", None)
|
||||
if conn is not None:
|
||||
try:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("SELECT MIN(start), MAX(fini) FROM processes;")
|
||||
min_start, max_finish = cursor.fetchone()
|
||||
with otf2.writer.DefinitionWriter(archive) as global_def_writer:
|
||||
global_offset = min_start
|
||||
duration = max_finish - min_start
|
||||
realtime_timestamp = int(round(time.time() * timer_resolution))
|
||||
global_def_writer.write_clock_properties(
|
||||
timer_resolution, global_offset, duration, realtime_timestamp
|
||||
)
|
||||
perfetto_category = archive.definitions.attribute(
|
||||
name="category", description="tracing category"
|
||||
)
|
||||
memory_copy_attributes = {perfetto_category: "memory_copy"}
|
||||
memory_allocation_attributes = {
|
||||
perfetto_category: "memory_allocation"
|
||||
}
|
||||
kernel_attributes = {perfetto_category: "kernel_dispatch"}
|
||||
kernel_rename = getattr(config, "kernel_rename")
|
||||
agent_index_value = getattr(config, "agent_index_value")
|
||||
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("SELECT DISTINCT guid, id FROM rocpd_info_node")
|
||||
for row in cursor:
|
||||
guid, nid = row
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(
|
||||
"SELECT pid, hostname, command FROM processes WHERE guid = ? AND nid = ?",
|
||||
(guid, nid),
|
||||
)
|
||||
for row in cursor:
|
||||
pid, hostname, command = row
|
||||
tree_node = archive.definitions.system_tree_node(
|
||||
name=command, class_name=hostname, parent=None
|
||||
)
|
||||
cpu_location_group = archive.definitions.location_group(
|
||||
name=command,
|
||||
location_group_type=LocationGroupType.PROCESS,
|
||||
system_tree_parent=tree_node,
|
||||
)
|
||||
api_calls = defaultdict(list)
|
||||
memory_copies = defaultdict(list)
|
||||
memory_allocations = defaultdict(list)
|
||||
memory_deallocations = defaultdict(list)
|
||||
memory_unknown = defaultdict(list)
|
||||
kernel_dispatches = defaultdict(list)
|
||||
agents = {}
|
||||
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(
|
||||
"""SELECT tid, start, end, name, category FROM regions
|
||||
WHERE guid = ? AND nid = ? AND pid = ?""",
|
||||
(guid, nid, pid),
|
||||
)
|
||||
for row in cursor:
|
||||
tid, start, end, name, category = row
|
||||
api_calls[tid].append((start, end, name, category))
|
||||
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(
|
||||
"""SELECT tid, dst_agent_abs_index, start, end, name
|
||||
FROM memory_copies WHERE guid = ? AND nid = ?
|
||||
AND pid = ? ORDER BY start ASC""",
|
||||
(guid, nid, pid),
|
||||
)
|
||||
for row in cursor:
|
||||
tid, agent, start, end, name = row
|
||||
memory_copies[(tid, agent)].append((start, end, name))
|
||||
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(
|
||||
"""SELECT tid, agent_abs_index, start, end, level, type
|
||||
FROM memory_allocations WHERE guid = ? AND nid = ?
|
||||
AND pid = ? ORDER BY start ASC""",
|
||||
(guid, nid, pid),
|
||||
)
|
||||
for row in cursor:
|
||||
tid, agent, start, end, level, type = row
|
||||
name = allocation_level_type_name(level, type)
|
||||
if type == "ALLOC":
|
||||
memory_allocations[(tid, agent)].append(
|
||||
(start, end, name)
|
||||
)
|
||||
elif type == "FREE":
|
||||
memory_deallocations[tid].append((start, end, name))
|
||||
else:
|
||||
memory_unknown[tid].append((start, end, name))
|
||||
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(
|
||||
"""SELECT tid, agent_abs_index, queue_id,
|
||||
start, end, name, region
|
||||
FROM kernels WHERE guid = ? AND nid = ?
|
||||
AND pid = ? ORDER BY start ASC""",
|
||||
(guid, nid, pid),
|
||||
)
|
||||
for row in cursor:
|
||||
tid, agent, queue, start, end, name, region = row
|
||||
if kernel_rename and region:
|
||||
kernel_dispatches[(tid, agent, queue)].append(
|
||||
(start, end, region)
|
||||
)
|
||||
else:
|
||||
kernel_dispatches[(tid, agent, queue)].append(
|
||||
(start, end, name)
|
||||
)
|
||||
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(
|
||||
"SELECT id, name, type FROM rocpd_info_agent WHERE guid = ? AND nid = ?",
|
||||
(guid, nid),
|
||||
)
|
||||
for row in cursor:
|
||||
id, name, type = row
|
||||
agents[id] = (
|
||||
(
|
||||
f"{type} Agent-{id}"
|
||||
if agent_index_value
|
||||
else f"{type} {type}-{id}"
|
||||
),
|
||||
archive.definitions.location_group(
|
||||
name=name,
|
||||
location_group_type=LocationGroupType.ACCELERATOR,
|
||||
system_tree_parent=tree_node,
|
||||
),
|
||||
)
|
||||
|
||||
# Write API Call Events
|
||||
for tid, data in api_calls.items():
|
||||
cpu_location = archive.definitions.location(
|
||||
name=f"Thread {tid}",
|
||||
type=LocationType.CPU_THREAD,
|
||||
group=cpu_location_group,
|
||||
)
|
||||
event_writer = otf2.writer.EventWriter(
|
||||
archive, cpu_location
|
||||
)
|
||||
events = []
|
||||
for start, end, name, category in data:
|
||||
attributes = {
|
||||
perfetto_category: get_perfetto_category_name(
|
||||
category
|
||||
)
|
||||
}
|
||||
|
||||
region = archive.definitions.region(
|
||||
name=name,
|
||||
region_role=RegionRole.FUNCTION,
|
||||
paradigm=Paradigm.HIP,
|
||||
)
|
||||
events.append((start, "start", region, attributes))
|
||||
events.append((end, "end", region, None))
|
||||
events.sort(key=lambda x: x[0])
|
||||
for timestamp, event_type, region, attributes in events:
|
||||
if event_type == "start":
|
||||
event_writer.enter(
|
||||
timestamp, region, attributes=attributes
|
||||
)
|
||||
else: # if event_type == "end":
|
||||
event_writer.leave(timestamp, region)
|
||||
|
||||
# Write Memory Copy Events
|
||||
for (tid, agent_id), data in memory_copies.items():
|
||||
agent_name, agent_location_group = agents.get(
|
||||
agent_id, (f"Unknown Agent {agent_id}", None)
|
||||
)
|
||||
memory_copy_location = archive.definitions.location(
|
||||
name=f"Thread {tid}, Copy to {agent_name}",
|
||||
type=LocationType.ACCELERATOR_STREAM,
|
||||
group=agent_location_group,
|
||||
)
|
||||
memory_copy_writer = otf2.writer.EventWriter(
|
||||
archive, memory_copy_location
|
||||
)
|
||||
memory_copy_events = []
|
||||
for start, end, name in data:
|
||||
region = archive.definitions.region(
|
||||
name=name,
|
||||
region_role=RegionRole.DATA_TRANSFER,
|
||||
paradigm=Paradigm.HIP,
|
||||
)
|
||||
memory_copy_events.append((start, "enter", region))
|
||||
memory_copy_events.append((end, "leave", region))
|
||||
memory_copy_events.sort(key=lambda x: x[0])
|
||||
for timestamp, event_type, region in memory_copy_events:
|
||||
if event_type == "enter":
|
||||
memory_copy_writer.enter(
|
||||
timestamp,
|
||||
region,
|
||||
attributes=memory_copy_attributes,
|
||||
)
|
||||
else: # if event_type == "leave":
|
||||
memory_copy_writer.leave(timestamp, region)
|
||||
|
||||
# Write Memory Allocation Events
|
||||
for (tid, agent_id), data in memory_allocations.items():
|
||||
agent_name, agent_location_group = agents.get(
|
||||
agent_id, (f"Unknown Agent {agent_id}", None)
|
||||
)
|
||||
memory_allocation_location = archive.definitions.location(
|
||||
name=f"Thread {tid}, Memory Allocate at {agent_name}",
|
||||
type=LocationType.ACCELERATOR_STREAM,
|
||||
group=agent_location_group,
|
||||
)
|
||||
memory_allocation_writer = otf2.writer.EventWriter(
|
||||
archive, memory_allocation_location
|
||||
)
|
||||
memory_allocation_events = []
|
||||
for start, end, name in data:
|
||||
region = archive.definitions.region(
|
||||
name=name,
|
||||
region_role=RegionRole.ALLOCATE,
|
||||
paradigm=Paradigm.HIP,
|
||||
)
|
||||
memory_allocation_events.append(
|
||||
(start, "enter", region)
|
||||
)
|
||||
memory_allocation_events.append(
|
||||
(end, "leave", region)
|
||||
)
|
||||
memory_allocation_events.sort(key=lambda x: x[0])
|
||||
for (
|
||||
timestamp,
|
||||
event_type,
|
||||
region,
|
||||
) in memory_allocation_events:
|
||||
if event_type == "enter":
|
||||
memory_allocation_writer.enter(
|
||||
timestamp,
|
||||
region,
|
||||
attributes=memory_allocation_attributes,
|
||||
)
|
||||
else: # if event_type == "leave":
|
||||
memory_allocation_writer.leave(timestamp, region)
|
||||
|
||||
# Write Memory Deallocation Events
|
||||
for tid, data in memory_deallocations.items():
|
||||
memory_free_location = archive.definitions.location(
|
||||
name=f"Thread {tid}, Memory Deallocate (Free)",
|
||||
type=LocationType.ACCELERATOR_STREAM,
|
||||
group=cpu_location_group,
|
||||
)
|
||||
memory_free_writer = otf2.writer.EventWriter(
|
||||
archive, memory_free_location
|
||||
)
|
||||
memory_free_events = []
|
||||
for start, end, name in data:
|
||||
region = archive.definitions.region(
|
||||
name=name,
|
||||
region_role=RegionRole.DEALLOCATE,
|
||||
paradigm=Paradigm.HIP,
|
||||
)
|
||||
memory_free_events.append((start, "enter", region))
|
||||
memory_free_events.append((end, "leave", region))
|
||||
memory_free_events.sort(key=lambda x: x[0])
|
||||
for timestamp, event_type, region in memory_free_events:
|
||||
if event_type == "enter":
|
||||
memory_free_writer.enter(
|
||||
timestamp,
|
||||
region,
|
||||
attributes=memory_allocation_attributes,
|
||||
)
|
||||
else: # if event_type == "leave":
|
||||
memory_free_writer.leave(timestamp, region)
|
||||
|
||||
# Write Unknown Memory Events
|
||||
for tid, data in memory_unknown.items():
|
||||
memory_unknown_location = archive.definitions.location(
|
||||
name=f"Thread {tid}, Memory Operation UNK",
|
||||
type=LocationType.ACCELERATOR_STREAM,
|
||||
group=cpu_location_group,
|
||||
)
|
||||
memory_unknown_writer = otf2.writer.EventWriter(
|
||||
archive, memory_unknown_location
|
||||
)
|
||||
memory_unknown_events = []
|
||||
for start, end, name in data:
|
||||
region = archive.definitions.region(
|
||||
name=name,
|
||||
)
|
||||
memory_unknown_events.append((start, "enter", region))
|
||||
memory_unknown_events.append((end, "leave", region))
|
||||
memory_unknown_events.sort(key=lambda x: x[0])
|
||||
for (
|
||||
timestamp,
|
||||
event_type,
|
||||
region,
|
||||
) in memory_unknown_events:
|
||||
if event_type == "enter":
|
||||
memory_unknown_writer.enter(
|
||||
timestamp,
|
||||
region,
|
||||
attributes=memory_allocation_attributes,
|
||||
)
|
||||
else: # if event_type == "leave":
|
||||
memory_unknown_writer.leave(timestamp, region)
|
||||
|
||||
# Write Kernel Dispatch Events
|
||||
for (tid, agent_id, queue), data in kernel_dispatches.items():
|
||||
agent_name, agent_location_group = agents.get(
|
||||
agent_id, (f"Unknown Agent {agent_id}", None)
|
||||
)
|
||||
kernel_location = archive.definitions.location(
|
||||
name=f"Thread {tid}, Compute on {agent_name}, Queue {queue}",
|
||||
type=LocationType.ACCELERATOR_STREAM,
|
||||
group=agent_location_group,
|
||||
)
|
||||
kernel_writer = otf2.writer.EventWriter(
|
||||
archive, kernel_location
|
||||
)
|
||||
kernel_events = []
|
||||
for start, end, name in data:
|
||||
region = archive.definitions.region(
|
||||
name=name,
|
||||
region_role=RegionRole.FUNCTION,
|
||||
paradigm=Paradigm.HIP,
|
||||
)
|
||||
kernel_events.append((start, "enter", region))
|
||||
kernel_events.append((end, "leave", region))
|
||||
kernel_events.sort(key=lambda x: x[0])
|
||||
for timestamp, event_type, region in kernel_events:
|
||||
if event_type == "enter":
|
||||
kernel_writer.enter(
|
||||
timestamp,
|
||||
region,
|
||||
attributes=kernel_attributes,
|
||||
)
|
||||
else: # if event_type == "leave":
|
||||
kernel_writer.leave(timestamp, region)
|
||||
except Exception as e:
|
||||
print("Could not query sqlite database:", e)
|
||||
else:
|
||||
print("No sqlite connection found in importData.")
|
||||
|
||||
|
||||
def execute(input, config=None, **kwargs):
|
||||
|
||||
@@ -2,10 +2,10 @@
|
||||
# libpyrocpd python binding sources
|
||||
#
|
||||
|
||||
set(libpyrocpd_source_headers common.hpp functions.hpp interop.hpp perfetto.hpp otf2.hpp
|
||||
set(libpyrocpd_source_headers common.hpp functions.hpp interop.hpp perfetto.hpp
|
||||
sql_generator.hpp pysqlite_Connection.h types.hpp)
|
||||
|
||||
set(libpyrocpd_source_sources functions.cpp interop.cpp otf2.cpp perfetto.cpp types.cpp)
|
||||
set(libpyrocpd_source_sources functions.cpp interop.cpp perfetto.cpp types.cpp)
|
||||
|
||||
foreach(_PYTHON_VERSION ${ROCPROFILER_PYTHON_VERSIONS})
|
||||
rocprofiler_rocpd_python_bindings_target_sources(
|
||||
|
||||
@@ -1,905 +0,0 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#include "lib/python/rocpd/source/otf2.hpp"
|
||||
|
||||
#include "lib/common/defines.hpp"
|
||||
#include "lib/common/filesystem.hpp"
|
||||
#include "lib/common/hasher.hpp"
|
||||
#include "lib/common/mpl.hpp"
|
||||
#include "lib/common/units.hpp"
|
||||
#include "lib/common/utility.hpp"
|
||||
#include "lib/output/generator.hpp"
|
||||
#include "lib/output/metadata.hpp"
|
||||
#include "lib/output/node_info.hpp"
|
||||
#include "lib/output/output_config.hpp"
|
||||
#include "lib/output/output_stream.hpp"
|
||||
#include "lib/output/sql/common.hpp"
|
||||
#include "lib/output/stream_info.hpp"
|
||||
#include "lib/output/timestamps.hpp"
|
||||
#include "lib/rocprofiler-sdk-tool/config.hpp"
|
||||
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
#include <rocprofiler-sdk/marker/api_id.h>
|
||||
#include <rocprofiler-sdk/rocprofiler.h>
|
||||
#include <rocprofiler-sdk/cxx/hash.hpp>
|
||||
#include <rocprofiler-sdk/cxx/operators.hpp>
|
||||
#include <rocprofiler-sdk/cxx/perfetto.hpp>
|
||||
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include <otf2/OTF2_AttributeList.h>
|
||||
#include <otf2/OTF2_AttributeValue.h>
|
||||
#include <otf2/OTF2_Definitions.h>
|
||||
#include <otf2/OTF2_GeneralDefinitions.h>
|
||||
#include <otf2/OTF2_Pthread_Locks.h>
|
||||
#include <otf2/otf2.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
#include <cstdint>
|
||||
#include <ctime>
|
||||
#include <future>
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
#include <thread>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#define OTF2_CHECK(result) \
|
||||
{ \
|
||||
OTF2_ErrorCode ROCPROFILER_VARIABLE(CHECKSTATUS, __LINE__) = result; \
|
||||
if(ROCPROFILER_VARIABLE(CHECKSTATUS, __LINE__) != OTF2_SUCCESS) \
|
||||
{ \
|
||||
auto _err_name = OTF2_Error_GetName(ROCPROFILER_VARIABLE(CHECKSTATUS, __LINE__)); \
|
||||
auto _err_msg = \
|
||||
OTF2_Error_GetDescription(ROCPROFILER_VARIABLE(CHECKSTATUS, __LINE__)); \
|
||||
ROCP_FATAL << #result << " failed with error code " << _err_name \
|
||||
<< " (code=" << ROCPROFILER_VARIABLE(CHECKSTATUS, __LINE__) \
|
||||
<< ") :: " << _err_msg; \
|
||||
} \
|
||||
}
|
||||
|
||||
namespace rocpd
|
||||
{
|
||||
namespace output
|
||||
{
|
||||
namespace
|
||||
{
|
||||
template <typename Tp, size_t N>
|
||||
struct array_hash
|
||||
{
|
||||
size_t operator()(const std::array<Tp, N>& _data) const
|
||||
{
|
||||
constexpr size_t seed = 0x9e3779b9;
|
||||
size_t _val = 0;
|
||||
for(const auto& itr : _data)
|
||||
_val ^= std::hash<Tp>{}(itr) + seed + (_val << 6) + (_val >> 2);
|
||||
return _val;
|
||||
}
|
||||
|
||||
template <typename... Up>
|
||||
size_t operator()(Up... _data) const
|
||||
{
|
||||
static_assert(sizeof...(Up) == N, "Insufficient data");
|
||||
return operator()(std::array<Tp, N>{std::forward<Up>(_data)...});
|
||||
}
|
||||
};
|
||||
|
||||
struct region_info
|
||||
{
|
||||
std::string name = {};
|
||||
OTF2_RegionRole_enum region_role = OTF2_REGION_ROLE_FUNCTION;
|
||||
OTF2_Paradigm_enum paradigm = OTF2_PARADIGM_HIP;
|
||||
};
|
||||
|
||||
OTF2_FlushType
|
||||
pre_flush(void* userData,
|
||||
OTF2_FileType fileType,
|
||||
OTF2_LocationRef location,
|
||||
void* callerData,
|
||||
bool fini);
|
||||
|
||||
OTF2_TimeStamp
|
||||
post_flush(void* userData, OTF2_FileType fileType, OTF2_LocationRef location);
|
||||
|
||||
template <typename... Args>
|
||||
void
|
||||
consume_variables(Args&&...)
|
||||
{}
|
||||
|
||||
using event_writer_t = OTF2_EvtWriter;
|
||||
using archive_t = OTF2_Archive;
|
||||
using attribute_list_t = OTF2_AttributeList;
|
||||
using hash_value_t = size_t;
|
||||
using hash_map_t = std::unordered_map<hash_value_t, region_info>;
|
||||
|
||||
auto main_tid = rocprofiler::common::get_tid();
|
||||
archive_t* archive = nullptr;
|
||||
auto flush_callbacks = OTF2_FlushCallbacks{pre_flush, post_flush};
|
||||
OTF2_GlobalDefWriter* global_def_writer = nullptr; // shared between data bases (processes)
|
||||
|
||||
enum rocprofiler_location_type_t
|
||||
{
|
||||
ROCPROFILER_AGENT_NO_TYPE = 0,
|
||||
ROCPROFILER_AGENT_MEMORY_COPY_TYPE,
|
||||
ROCPROFILER_AGENT_DISPATCH_TYPE,
|
||||
ROCPROFILER_AGENT_MEMORY_ALLOC_TYPE,
|
||||
ROCPROFILER_AGENT_MEMORY_DEALLOC_TYPE
|
||||
};
|
||||
|
||||
struct location_base
|
||||
{
|
||||
uint64_t pid = 0;
|
||||
uint64_t tid = 0;
|
||||
uint64_t agent_handle = 0;
|
||||
uint64_t queue_id = 0;
|
||||
rocprofiler_location_type_t type = ROCPROFILER_AGENT_NO_TYPE;
|
||||
|
||||
location_base(pid_t _pid,
|
||||
pid_t _tid,
|
||||
uint64_t _agent_handle = 0,
|
||||
rocprofiler_location_type_t _type = ROCPROFILER_AGENT_NO_TYPE,
|
||||
uint64_t _queue_id = 0)
|
||||
: pid{static_cast<uint64_t>(_pid)}
|
||||
, tid{static_cast<uint64_t>(_tid)}
|
||||
, agent_handle{_agent_handle}
|
||||
, queue_id{_queue_id}
|
||||
, type{_type}
|
||||
{}
|
||||
|
||||
auto hash() const
|
||||
{
|
||||
return array_hash<uint64_t, 5>{}(pid, tid, agent_handle + 1, queue_id + 1, type);
|
||||
}
|
||||
};
|
||||
|
||||
bool
|
||||
operator<(const location_base& lhs, const location_base& rhs)
|
||||
{
|
||||
return std::tie(lhs.pid, lhs.tid, lhs.agent_handle, lhs.queue_id, lhs.type) <
|
||||
std::tie(rhs.pid, rhs.tid, rhs.agent_handle, rhs.queue_id, rhs.type);
|
||||
}
|
||||
|
||||
struct location_data : location_base
|
||||
{
|
||||
location_data(pid_t _pid,
|
||||
pid_t _tid,
|
||||
uint64_t _agent_handle = 0,
|
||||
rocprofiler_location_type_t _type = ROCPROFILER_AGENT_NO_TYPE,
|
||||
uint64_t _queue_id = 0)
|
||||
: location_base{_pid, _tid, _agent_handle, _type, _queue_id}
|
||||
, index{++index_counter}
|
||||
, event_writer{OTF2_Archive_GetEvtWriter(CHECK_NOTNULL(archive), index)}
|
||||
{
|
||||
CHECK_NOTNULL(event_writer);
|
||||
}
|
||||
|
||||
using location_base::hash;
|
||||
|
||||
static uint64_t index_counter;
|
||||
|
||||
uint64_t index = 0;
|
||||
event_writer_t* event_writer = nullptr;
|
||||
|
||||
bool operator==(const location_base& rhs) const { return (hash() == rhs.hash()); }
|
||||
};
|
||||
|
||||
uint64_t location_data::index_counter = 0;
|
||||
|
||||
OTF2_TimeStamp
|
||||
get_time()
|
||||
{
|
||||
auto _ts = rocprofiler_timestamp_t{};
|
||||
rocprofiler_get_timestamp(&_ts);
|
||||
return static_cast<OTF2_TimeStamp>(_ts);
|
||||
}
|
||||
|
||||
auto&
|
||||
get_locations()
|
||||
{
|
||||
static auto _v = std::vector<std::unique_ptr<location_data>>{};
|
||||
return _v;
|
||||
}
|
||||
|
||||
const location_data*
|
||||
get_location(const location_base& _location, bool _init = false)
|
||||
{
|
||||
for(auto& itr : get_locations())
|
||||
if(*itr == _location) return itr.get();
|
||||
|
||||
if(_init)
|
||||
return get_locations()
|
||||
.emplace_back(std::make_unique<location_data>(_location.pid,
|
||||
_location.tid,
|
||||
_location.agent_handle,
|
||||
_location.type,
|
||||
_location.queue_id))
|
||||
.get();
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
event_writer_t*
|
||||
get_event_writer(const location_base& _location, bool _init = false)
|
||||
{
|
||||
const auto* _loc = get_location(_location, _init);
|
||||
return (_loc) ? _loc->event_writer : nullptr;
|
||||
}
|
||||
|
||||
OTF2_FlushType
|
||||
pre_flush(void* userData,
|
||||
OTF2_FileType fileType,
|
||||
OTF2_LocationRef location,
|
||||
void* callerData,
|
||||
bool fini)
|
||||
{
|
||||
consume_variables(userData, fileType, location, callerData, fini);
|
||||
return OTF2_FLUSH;
|
||||
}
|
||||
|
||||
OTF2_TimeStamp
|
||||
post_flush(void* userData, OTF2_FileType fileType, OTF2_LocationRef location)
|
||||
{
|
||||
consume_variables(userData, fileType, location);
|
||||
return get_time();
|
||||
}
|
||||
|
||||
template <typename Tp>
|
||||
size_t
|
||||
get_hash_id(Tp&& _val)
|
||||
{
|
||||
using value_type = rocprofiler::common::mpl::unqualified_type_t<Tp>;
|
||||
|
||||
if constexpr(!std::is_pointer<Tp>::value)
|
||||
return std::hash<value_type>{}(std::forward<Tp>(_val));
|
||||
else if constexpr(std::is_same<value_type, const char*>::value ||
|
||||
std::is_same<value_type, char*>::value)
|
||||
return get_hash_id(std::string_view{_val});
|
||||
else
|
||||
return get_hash_id(*_val);
|
||||
}
|
||||
|
||||
template <typename... Args>
|
||||
auto
|
||||
add_event(std::string_view name,
|
||||
const location_base& _location,
|
||||
rocprofiler_callback_phase_t _phase,
|
||||
OTF2_TimeStamp _ts,
|
||||
attribute_list_t* _attributes = nullptr)
|
||||
{
|
||||
auto* evt_writer = get_event_writer(_location, true);
|
||||
auto _hash = get_hash_id(name);
|
||||
|
||||
if(_phase == ROCPROFILER_CALLBACK_PHASE_ENTER)
|
||||
OTF2_CHECK(OTF2_EvtWriter_Enter(evt_writer, _attributes, _ts, _hash))
|
||||
else if(_phase == ROCPROFILER_CALLBACK_PHASE_EXIT)
|
||||
OTF2_CHECK(OTF2_EvtWriter_Leave(evt_writer, _attributes, _ts, _hash))
|
||||
else
|
||||
ROCP_FATAL << "otf2::add_event phase is not enter or exit";
|
||||
}
|
||||
|
||||
void
|
||||
setup(const rocprofiler::tool::output_config& cfg, uint64_t min_start, uint64_t max_fini)
|
||||
{
|
||||
namespace fs = rocprofiler::common::filesystem;
|
||||
|
||||
auto _filename = rocprofiler::tool::get_output_filename(cfg, "results", std::string_view{});
|
||||
auto _filepath = fs::path{_filename};
|
||||
auto _name = _filepath.filename().string();
|
||||
auto _path = _filepath.parent_path().string();
|
||||
|
||||
if(fs::exists(_filepath)) fs::remove_all(_filepath);
|
||||
|
||||
constexpr uint64_t evt_chunk_size = 2 * rocprofiler::common::units::MB;
|
||||
constexpr uint64_t def_chunk_size = 8 * rocprofiler::common::units::MB;
|
||||
|
||||
archive = OTF2_Archive_Open(_path.c_str(),
|
||||
_name.c_str(),
|
||||
OTF2_FILEMODE_WRITE,
|
||||
evt_chunk_size, // event chunk size
|
||||
def_chunk_size, // def chunk size
|
||||
OTF2_SUBSTRATE_POSIX,
|
||||
OTF2_COMPRESSION_NONE);
|
||||
|
||||
OTF2_CHECK(OTF2_Archive_SetFlushCallbacks(archive, &flush_callbacks, nullptr));
|
||||
OTF2_CHECK(OTF2_Archive_SetSerialCollectiveCallbacks(archive));
|
||||
OTF2_CHECK(OTF2_Pthread_Archive_SetLockingCallbacks(archive, nullptr));
|
||||
OTF2_CHECK(OTF2_Archive_OpenEvtFiles(archive));
|
||||
|
||||
ROCP_ERROR << "Opened result file: " << _filename << ".otf2";
|
||||
|
||||
auto _timer_resolution =
|
||||
rocprofiler::common::get_clock_period_ns_impl(rocprofiler::common::default_clock_id) *
|
||||
std::nano::den;
|
||||
auto _global_offset = min_start;
|
||||
auto _max_trace_length = (max_fini - min_start);
|
||||
|
||||
global_def_writer = OTF2_Archive_GetGlobalDefWriter(archive);
|
||||
OTF2_CHECK(OTF2_GlobalDefWriter_WriteClockProperties(
|
||||
global_def_writer,
|
||||
_timer_resolution,
|
||||
_global_offset,
|
||||
_max_trace_length,
|
||||
std::chrono::system_clock::now().time_since_epoch().count()));
|
||||
|
||||
OTF2_CHECK(OTF2_GlobalDefWriter_WriteString(global_def_writer, 0, ""));
|
||||
|
||||
auto add_write_string = [](size_t _hash, std::string_view _name_strv) {
|
||||
static auto _existing = std::unordered_set<size_t>{};
|
||||
if(_hash > 0 && _existing.count(_hash) == 0)
|
||||
{
|
||||
OTF2_CHECK(
|
||||
OTF2_GlobalDefWriter_WriteString(global_def_writer, _hash, _name_strv.data()));
|
||||
_existing.emplace(_hash);
|
||||
}
|
||||
};
|
||||
|
||||
auto add_write_string_val = [&add_write_string](std::string_view _name_v) {
|
||||
auto _hash_v = get_hash_id(_name_v);
|
||||
add_write_string(_hash_v, _name_v);
|
||||
return _hash_v;
|
||||
};
|
||||
|
||||
//(must be shared between processes)
|
||||
auto _attr_name = std::string_view{"category"};
|
||||
auto _attr_desc = std::string_view{"tracing category"};
|
||||
|
||||
auto _attr_name_hash = add_write_string_val(_attr_name);
|
||||
auto _attr_desc_hash = add_write_string_val(_attr_desc);
|
||||
|
||||
OTF2_CHECK(OTF2_GlobalDefWriter_WriteAttribute(
|
||||
global_def_writer, 0, _attr_name_hash, _attr_desc_hash, OTF2_TYPE_STRING));
|
||||
}
|
||||
|
||||
void
|
||||
shutdown()
|
||||
{
|
||||
OTF2_CHECK(OTF2_Archive_Close(archive));
|
||||
}
|
||||
|
||||
struct event_info
|
||||
{
|
||||
explicit event_info(location_base&& _loc)
|
||||
: m_location{output::get_location(std::forward<location_base>(_loc), true)}
|
||||
{}
|
||||
|
||||
auto id() const { return m_location->index; }
|
||||
auto hash() const { return m_location->hash(); }
|
||||
const location_base* get_location() const { return m_location; }
|
||||
|
||||
std::string name = {};
|
||||
uint64_t event_count = 0;
|
||||
|
||||
private:
|
||||
const location_data* m_location = nullptr;
|
||||
};
|
||||
|
||||
attribute_list_t*
|
||||
create_attribute_list_for_name(const char* name)
|
||||
{
|
||||
auto* _val = OTF2_AttributeList_New();
|
||||
auto _hash = get_hash_id(name);
|
||||
auto _attr_value = OTF2_AttributeValue{};
|
||||
_attr_value.stringRef = _hash;
|
||||
OTF2_AttributeList_AddAttribute(_val, 0, OTF2_TYPE_STRING, _attr_value);
|
||||
return _val;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
OTF2Session::OTF2Session(const tool::output_config& output_cfg,
|
||||
uint64_t min_start,
|
||||
uint64_t max_fini)
|
||||
: config{output_cfg}
|
||||
{
|
||||
setup(output_cfg, min_start, max_fini);
|
||||
}
|
||||
|
||||
OTF2Session::~OTF2Session() { shutdown(); }
|
||||
|
||||
void
|
||||
write_otf2(const OTF2Session& otf2_session,
|
||||
const types::process& process,
|
||||
const uint16_t tree_node_id,
|
||||
const std::unordered_map<uint64_t, extended_agent>& agent_data,
|
||||
const tool::generator<types::thread>& thread_gen,
|
||||
const tool::generator<types::region>& api_gen,
|
||||
const tool::generator<types::kernel_dispatch>& kernel_dispatch_gen,
|
||||
const tool::generator<types::memory_copies>& memory_copy_gen,
|
||||
const tool::generator<types::memory_allocation>& memory_allocation_gen)
|
||||
{
|
||||
const uint64_t _no_agent_handle = 0;
|
||||
// std::numeric_limits<uint64_t>::max() - 1;
|
||||
const auto& ocfg = otf2_session.config;
|
||||
|
||||
auto _app_ts = rocprofiler::tool::timestamps_t{process.start, process.fini};
|
||||
|
||||
auto thread_event_info = std::map<pid_t, event_info>{};
|
||||
auto agent_memcpy_info =
|
||||
std::map<pid_t, std::map<uint64_t, event_info>>{}; // tid -> agent_handle ->evt
|
||||
auto agent_memalloc_info =
|
||||
std::map<pid_t, std::map<uint64_t, event_info>>{}; // // tid -> agent_handle ->evt
|
||||
auto agent_dispatch_info =
|
||||
std::map<pid_t,
|
||||
std::map<uint64_t, std::map<uint64_t, event_info>>>{}; // tid -> agent_handle
|
||||
// -> quieue_id -> evt
|
||||
auto _get_alloc_level_type_name = [](const std::string& level,
|
||||
const std::string& type) -> std::string {
|
||||
static const std::unordered_map<std::string, std::unordered_map<std::string, std::string>>
|
||||
name_map = {
|
||||
{"REAL",
|
||||
{{"ALLOC", "MEMORY_ALLOCATION_ALLOCATE"}, {"FREE", "MEMORY_ALLOCATION_FREE"}}},
|
||||
{"VIRTUAL",
|
||||
{{"ALLOC", "MEMORY_ALLOCATION_VMEM_ALLOCATE"},
|
||||
{"FREE", "MEMORY_ALLOCATION_VMEM_FREE"}}},
|
||||
{"SCRATCH", {{"ALLOC", "SCRATCH_MEMORY_ALLOC"}, {"FREE", "SCRATCH_MEMORY_FREE"}}}};
|
||||
|
||||
if((name_map.count(level) != 0u) && (name_map.at(level).count(type) != 0u))
|
||||
return name_map.at(level).at(type);
|
||||
|
||||
return level == "SCRATCH" || level == "REAL" || level == "VIRTUAL" ? level + "_MEMORY_NONE"
|
||||
: "UNKNOWN_LEVEL";
|
||||
};
|
||||
|
||||
for(auto ditr : thread_gen)
|
||||
for(const auto& itr : thread_gen.get(ditr))
|
||||
{
|
||||
auto _evt_info = event_info{location_base{process.pid, itr.tid}};
|
||||
_evt_info.name = fmt::format("Thread {}", itr.tid);
|
||||
thread_event_info.emplace(itr.tid, _evt_info);
|
||||
}
|
||||
|
||||
auto _hash_data = hash_map_t{};
|
||||
|
||||
struct evt_data
|
||||
{
|
||||
rocprofiler_callback_phase_t phase = ROCPROFILER_CALLBACK_PHASE_NONE;
|
||||
std::string name = {};
|
||||
const location_base* location = nullptr;
|
||||
uint64_t timestamp = 0;
|
||||
OTF2_AttributeList* attributes = nullptr;
|
||||
};
|
||||
|
||||
auto _data = std::deque<evt_data>{};
|
||||
auto _attr_str = std::unordered_map<size_t, std::string_view>{};
|
||||
|
||||
// copypatse from perfetto. TODO: Move to a common place?
|
||||
auto get_category_string = [](std::string_view _category) {
|
||||
static auto buffer_names = rocprofiler::sdk::get_buffer_tracing_names();
|
||||
auto _category_idx = ROCPROFILER_BUFFER_TRACING_NONE;
|
||||
for(const auto& citr : buffer_names)
|
||||
{
|
||||
if(_category == citr.name) _category_idx = citr.value;
|
||||
}
|
||||
return rocprofiler::sdk::get_perfetto_category(_category_idx);
|
||||
};
|
||||
|
||||
auto get_category_attribute = [&get_category_string,
|
||||
&_attr_str](const std::string& category) -> OTF2_AttributeList* {
|
||||
const auto* _perfetto_category = get_category_string(category);
|
||||
_attr_str.emplace(get_hash_id(_perfetto_category), _perfetto_category);
|
||||
return create_attribute_list_for_name(_perfetto_category);
|
||||
};
|
||||
|
||||
for(auto ditr : api_gen)
|
||||
for(const auto& itr : api_gen.get(ditr))
|
||||
{
|
||||
std::string _name = itr.name;
|
||||
_hash_data.emplace(get_hash_id(_name),
|
||||
region_info{_name, OTF2_REGION_ROLE_FUNCTION, OTF2_PARADIGM_HIP});
|
||||
|
||||
auto& _evt_info = thread_event_info.at(itr.tid);
|
||||
_evt_info.event_count += 1;
|
||||
|
||||
auto* attributes = get_category_attribute(itr.category);
|
||||
_data.emplace_back(evt_data{ROCPROFILER_CALLBACK_PHASE_ENTER,
|
||||
_name,
|
||||
_evt_info.get_location(),
|
||||
itr.start,
|
||||
attributes});
|
||||
|
||||
if(!attributes)
|
||||
{
|
||||
ROCP_FATAL << "Undefined attributes for api call " << itr.name;
|
||||
}
|
||||
|
||||
_data.emplace_back(evt_data{ROCPROFILER_CALLBACK_PHASE_EXIT,
|
||||
_name,
|
||||
_evt_info.get_location(),
|
||||
itr.end,
|
||||
nullptr});
|
||||
}
|
||||
|
||||
for(auto ditr : memory_copy_gen)
|
||||
for(const auto& itr : memory_copy_gen.get(ditr))
|
||||
{
|
||||
std::string _name = itr.name;
|
||||
_hash_data.emplace(
|
||||
get_hash_id(_name),
|
||||
region_info{_name, OTF2_REGION_ROLE_DATA_TRANSFER, OTF2_PARADIGM_HIP});
|
||||
|
||||
const auto& _extended_agent = agent_data.at(itr.dst_agent_abs_index);
|
||||
auto _agent_handle = _extended_agent.types_agent.id.handle;
|
||||
auto _evt_info = event_info{location_base{
|
||||
process.pid, itr.tid, _agent_handle, ROCPROFILER_AGENT_MEMORY_COPY_TYPE}};
|
||||
|
||||
auto agent_index_info = _extended_agent.agent_index;
|
||||
_evt_info.name = fmt::format("Thread {}, Copy to {} {}",
|
||||
itr.tid,
|
||||
agent_index_info.type,
|
||||
agent_index_info.as_string("-"));
|
||||
|
||||
_evt_info.event_count += 1;
|
||||
|
||||
agent_memcpy_info[itr.tid].emplace(_agent_handle, _evt_info);
|
||||
|
||||
const auto* _perfetto_name =
|
||||
rocprofiler::sdk::perfetto_category<rocprofiler::sdk::category::memory_copy>::name;
|
||||
|
||||
_attr_str.emplace(get_hash_id(_perfetto_name), _perfetto_name);
|
||||
auto* _attrs = create_attribute_list_for_name(_perfetto_name);
|
||||
_data.emplace_back(evt_data{ROCPROFILER_CALLBACK_PHASE_ENTER,
|
||||
_name,
|
||||
_evt_info.get_location(),
|
||||
itr.start,
|
||||
_attrs});
|
||||
_data.emplace_back(evt_data{ROCPROFILER_CALLBACK_PHASE_EXIT,
|
||||
_name,
|
||||
_evt_info.get_location(),
|
||||
itr.end,
|
||||
nullptr});
|
||||
};
|
||||
|
||||
for(auto ditr : memory_allocation_gen)
|
||||
for(const auto& itr : memory_allocation_gen.get(ditr))
|
||||
{
|
||||
std::string _alloc_operation = _get_alloc_level_type_name(itr.level, itr.type);
|
||||
|
||||
const auto* _perfetto_name = rocprofiler::sdk::perfetto_category<
|
||||
rocprofiler::sdk::category::memory_allocation>::name;
|
||||
|
||||
_attr_str.emplace(get_hash_id(_perfetto_name), _perfetto_name);
|
||||
auto* _attrs = create_attribute_list_for_name(_perfetto_name);
|
||||
|
||||
if(itr.type == "ALLOC")
|
||||
{
|
||||
_hash_data.emplace(
|
||||
get_hash_id(_alloc_operation),
|
||||
region_info{_alloc_operation, OTF2_REGION_ROLE_ALLOCATE, OTF2_PARADIGM_HIP});
|
||||
|
||||
const auto& _extended_agent = agent_data.at(itr.agent_abs_index);
|
||||
auto _handle = _extended_agent.types_agent.id.handle;
|
||||
|
||||
auto _evt_info = event_info{location_base{
|
||||
process.pid, itr.tid, _handle, ROCPROFILER_AGENT_MEMORY_ALLOC_TYPE}};
|
||||
|
||||
auto agent_index_info = _extended_agent.agent_index;
|
||||
_evt_info.name = fmt::format("Thread {}, Memory Allocate at {} {}",
|
||||
itr.tid,
|
||||
agent_index_info.type,
|
||||
agent_index_info.as_string("-"));
|
||||
|
||||
agent_memalloc_info[itr.tid].emplace(_handle, _evt_info);
|
||||
_evt_info.event_count += 1;
|
||||
|
||||
_data.emplace_back(evt_data{ROCPROFILER_CALLBACK_PHASE_ENTER,
|
||||
_alloc_operation,
|
||||
_evt_info.get_location(),
|
||||
itr.start,
|
||||
_attrs});
|
||||
_data.emplace_back(evt_data{ROCPROFILER_CALLBACK_PHASE_EXIT,
|
||||
_alloc_operation,
|
||||
_evt_info.get_location(),
|
||||
itr.end,
|
||||
nullptr});
|
||||
}
|
||||
else if(itr.type == "FREE") //
|
||||
{
|
||||
_hash_data.emplace(
|
||||
get_hash_id(_alloc_operation),
|
||||
region_info{_alloc_operation, OTF2_REGION_ROLE_DEALLOCATE, OTF2_PARADIGM_HIP});
|
||||
|
||||
auto _evt_info = event_info{location_base{
|
||||
process.pid, itr.tid, _no_agent_handle, ROCPROFILER_AGENT_MEMORY_DEALLOC_TYPE}};
|
||||
_evt_info.name = fmt::format("Thread {}, Memory Deallocate (Free)", itr.tid);
|
||||
|
||||
agent_memalloc_info[itr.tid].emplace(_no_agent_handle, _evt_info);
|
||||
|
||||
_evt_info.event_count += 1;
|
||||
|
||||
_data.emplace_back(evt_data{ROCPROFILER_CALLBACK_PHASE_ENTER,
|
||||
_alloc_operation,
|
||||
_evt_info.get_location(),
|
||||
itr.start,
|
||||
_attrs});
|
||||
_data.emplace_back(evt_data{ROCPROFILER_CALLBACK_PHASE_EXIT,
|
||||
_alloc_operation,
|
||||
_evt_info.get_location(),
|
||||
itr.end,
|
||||
nullptr});
|
||||
}
|
||||
else
|
||||
{
|
||||
auto _evt_info = event_info{location_base{process.pid, itr.tid}};
|
||||
_evt_info.name = fmt::format("Thread {}, Memory Operation UNK", itr.tid);
|
||||
_evt_info.event_count += 1;
|
||||
agent_memalloc_info[itr.tid].emplace(_no_agent_handle, _evt_info);
|
||||
_data.emplace_back(evt_data{ROCPROFILER_CALLBACK_PHASE_ENTER,
|
||||
_alloc_operation,
|
||||
_evt_info.get_location(),
|
||||
itr.start,
|
||||
_attrs});
|
||||
_data.emplace_back(evt_data{ROCPROFILER_CALLBACK_PHASE_EXIT,
|
||||
_alloc_operation,
|
||||
_evt_info.get_location(),
|
||||
itr.end,
|
||||
nullptr});
|
||||
}
|
||||
}
|
||||
|
||||
auto _queue_ids = std::map<uint64_t, uint64_t>{};
|
||||
for(auto ditr : kernel_dispatch_gen)
|
||||
for(const auto& itr : kernel_dispatch_gen.get(ditr))
|
||||
{
|
||||
auto _name = fmt::format(
|
||||
"{}", (ocfg.kernel_rename && !itr.region.empty()) ? itr.region : itr.name);
|
||||
_hash_data.emplace(get_hash_id(_name),
|
||||
region_info{_name, OTF2_REGION_ROLE_FUNCTION, OTF2_PARADIGM_HIP});
|
||||
|
||||
const auto* _perfetto_name = rocprofiler::sdk::perfetto_category<
|
||||
rocprofiler::sdk::category::kernel_dispatch>::name;
|
||||
|
||||
_attr_str.emplace(get_hash_id(_perfetto_name), _perfetto_name);
|
||||
auto* _attrs = create_attribute_list_for_name(_perfetto_name);
|
||||
|
||||
const auto& _extended_agent = agent_data.at(itr.agent_abs_index);
|
||||
auto _handle = _extended_agent.types_agent.id.handle;
|
||||
auto agent_index_info = _extended_agent.agent_index;
|
||||
|
||||
auto _evt_info = event_info{location_base{
|
||||
process.pid, itr.tid, _handle, ROCPROFILER_AGENT_DISPATCH_TYPE, itr.queue_id}};
|
||||
|
||||
if(_queue_ids.count(itr.queue_id) == 0)
|
||||
{
|
||||
_queue_ids.emplace(itr.queue_id, _queue_ids.size());
|
||||
}
|
||||
|
||||
_evt_info.name = fmt::format("Thread {}, Compute on {} {}, Queue {}",
|
||||
|
||||
itr.tid,
|
||||
agent_index_info.type,
|
||||
agent_index_info.as_string("-"),
|
||||
_queue_ids.at(itr.queue_id));
|
||||
|
||||
_evt_info.event_count += 1;
|
||||
agent_dispatch_info[itr.tid][_handle].emplace(itr.queue_id, _evt_info);
|
||||
|
||||
_data.emplace_back(evt_data{ROCPROFILER_CALLBACK_PHASE_ENTER,
|
||||
_name,
|
||||
_evt_info.get_location(),
|
||||
itr.start,
|
||||
_attrs});
|
||||
_data.emplace_back(evt_data{ROCPROFILER_CALLBACK_PHASE_EXIT,
|
||||
_name,
|
||||
_evt_info.get_location(),
|
||||
itr.end,
|
||||
nullptr});
|
||||
}
|
||||
|
||||
std::sort(_data.begin(), _data.end(), [](const evt_data& lhs, const evt_data& rhs) {
|
||||
if(lhs.timestamp != rhs.timestamp) return (lhs.timestamp < rhs.timestamp);
|
||||
if(lhs.phase != rhs.phase) return (lhs.phase > rhs.phase);
|
||||
return (*lhs.location < *rhs.location);
|
||||
});
|
||||
|
||||
for(const auto& itr : _data)
|
||||
{
|
||||
add_event(itr.name, *itr.location, itr.phase, itr.timestamp, itr.attributes);
|
||||
ROCP_ERROR_IF(itr.timestamp < _app_ts.app_start_time)
|
||||
<< "event found with timestamp < app start time by "
|
||||
<< (_app_ts.app_start_time - itr.timestamp) << " nsec :: " << itr.name;
|
||||
ROCP_ERROR_IF(itr.timestamp > _app_ts.app_end_time)
|
||||
<< "event found with timestamp > app end time by "
|
||||
<< (itr.timestamp - _app_ts.app_end_time) << " nsec :: " << itr.name;
|
||||
}
|
||||
|
||||
for(const auto& itr : _data)
|
||||
{
|
||||
if(itr.attributes) OTF2_AttributeList_Delete(itr.attributes);
|
||||
}
|
||||
|
||||
OTF2_CHECK(OTF2_Archive_CloseEvtFiles(archive));
|
||||
|
||||
OTF2_CHECK(OTF2_Archive_OpenDefFiles(archive));
|
||||
for(auto& itr : get_locations())
|
||||
{
|
||||
OTF2_DefWriter* def_writer = OTF2_Archive_GetDefWriter(archive, itr->index);
|
||||
OTF2_Archive_CloseDefWriter(archive, def_writer);
|
||||
}
|
||||
OTF2_CHECK(OTF2_Archive_CloseDefFiles(archive));
|
||||
|
||||
for(const auto& itr : _hash_data)
|
||||
{
|
||||
if(itr.first != 0)
|
||||
OTF2_CHECK(OTF2_GlobalDefWriter_WriteString(
|
||||
global_def_writer, itr.first, itr.second.name.c_str()));
|
||||
}
|
||||
|
||||
for(const auto& itr : _hash_data)
|
||||
{
|
||||
if(itr.first != 0)
|
||||
OTF2_CHECK(OTF2_GlobalDefWriter_WriteRegion(global_def_writer,
|
||||
itr.first,
|
||||
itr.first,
|
||||
0,
|
||||
0,
|
||||
itr.second.region_role,
|
||||
itr.second.paradigm,
|
||||
OTF2_REGION_FLAG_NONE,
|
||||
0,
|
||||
0,
|
||||
0));
|
||||
}
|
||||
|
||||
auto add_write_string = [](size_t _hash, std::string_view _name) {
|
||||
static auto _existing = std::unordered_set<size_t>{};
|
||||
if(_hash > 0 && _existing.count(_hash) == 0)
|
||||
{
|
||||
OTF2_CHECK(OTF2_GlobalDefWriter_WriteString(global_def_writer, _hash, _name.data()));
|
||||
_existing.emplace(_hash);
|
||||
}
|
||||
};
|
||||
|
||||
for(const auto& itr : _attr_str)
|
||||
add_write_string(itr.first, itr.second);
|
||||
|
||||
std::istringstream command_line(process.command);
|
||||
std::string _exe_name;
|
||||
command_line >> _exe_name; // Extracts characters until whitespace
|
||||
_exe_name = fmt::format("{}", _exe_name);
|
||||
auto _exe_hash = get_hash_id(_exe_name);
|
||||
add_write_string(_exe_hash, _exe_name);
|
||||
|
||||
auto _node_name = std::string{"node"};
|
||||
{
|
||||
if(!process.hostname.empty())
|
||||
{
|
||||
if(process.hostname.length() >= PATH_MAX)
|
||||
{
|
||||
_node_name = process.hostname.substr(0, PATH_MAX - 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
_node_name = process.hostname;
|
||||
}
|
||||
}
|
||||
}
|
||||
_node_name = fmt::format("{}", _node_name);
|
||||
auto _node_hash = get_hash_id(_node_name);
|
||||
add_write_string(_node_hash, _node_name);
|
||||
|
||||
// debug
|
||||
OTF2_CHECK(OTF2_GlobalDefWriter_WriteSystemTreeNode(
|
||||
global_def_writer, tree_node_id, _exe_hash, _node_hash, OTF2_UNDEFINED_SYSTEM_TREE_NODE));
|
||||
|
||||
// Process
|
||||
OTF2_CHECK(OTF2_GlobalDefWriter_WriteLocationGroup(global_def_writer,
|
||||
tree_node_id,
|
||||
_exe_hash,
|
||||
OTF2_LOCATION_GROUP_TYPE_PROCESS,
|
||||
tree_node_id,
|
||||
OTF2_UNDEFINED_LOCATION_GROUP));
|
||||
|
||||
// Accelerators (must be shared between the processes)
|
||||
for(const auto& [abs_idx, extended_agent] : agent_data)
|
||||
{
|
||||
auto _handle = extended_agent.types_agent.id.handle;
|
||||
const auto _name = std::string_view{extended_agent.labeled_name};
|
||||
auto _hash = get_hash_id(_name);
|
||||
|
||||
add_write_string(_hash, _name);
|
||||
OTF2_CHECK(OTF2_GlobalDefWriter_WriteLocationGroup(global_def_writer,
|
||||
_handle,
|
||||
_hash,
|
||||
OTF2_LOCATION_GROUP_TYPE_ACCELERATOR,
|
||||
tree_node_id,
|
||||
OTF2_UNDEFINED_LOCATION_GROUP));
|
||||
}
|
||||
|
||||
// Thread Events
|
||||
for(auto& [tid, evt] : thread_event_info)
|
||||
{
|
||||
auto _hash = get_hash_id(evt.name);
|
||||
|
||||
add_write_string(_hash, evt.name);
|
||||
OTF2_CHECK(OTF2_GlobalDefWriter_WriteLocation(global_def_writer,
|
||||
evt.id(), // id
|
||||
_hash,
|
||||
OTF2_LOCATION_TYPE_CPU_THREAD,
|
||||
2 * evt.event_count, // # events
|
||||
tree_node_id // location group
|
||||
));
|
||||
}
|
||||
|
||||
// Memcpy Events
|
||||
for(auto& [tid, itr] : agent_memcpy_info)
|
||||
{
|
||||
for(auto& [agent_handle, evt] : itr)
|
||||
{
|
||||
auto _hash = get_hash_id(evt.name);
|
||||
|
||||
add_write_string(_hash, evt.name);
|
||||
OTF2_CHECK(OTF2_GlobalDefWriter_WriteLocation(global_def_writer,
|
||||
evt.id(), // id
|
||||
_hash,
|
||||
OTF2_LOCATION_TYPE_ACCELERATOR_STREAM,
|
||||
2 * evt.event_count, // # events
|
||||
agent_handle // location group
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
// Memalloc Events
|
||||
for(auto& [tid, itr] : agent_memalloc_info)
|
||||
{
|
||||
for(auto& [agent_handle, evt] : itr)
|
||||
{
|
||||
auto _hash = get_hash_id(evt.name);
|
||||
|
||||
add_write_string(_hash, evt.name);
|
||||
|
||||
OTF2_CHECK(OTF2_GlobalDefWriter_WriteLocation(global_def_writer,
|
||||
evt.id(), // id
|
||||
_hash,
|
||||
OTF2_LOCATION_TYPE_ACCELERATOR_STREAM,
|
||||
2 * evt.event_count, // # events
|
||||
agent_handle // location group
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
// Dispatch Events
|
||||
for(auto& [tid, itr] : agent_dispatch_info)
|
||||
{
|
||||
for(auto& [agent_handle, qitr] : itr)
|
||||
{
|
||||
for(auto& [queue_id, evt] : qitr)
|
||||
{
|
||||
auto _hash = get_hash_id(evt.name);
|
||||
|
||||
add_write_string(_hash, evt.name);
|
||||
|
||||
OTF2_CHECK(OTF2_GlobalDefWriter_WriteLocation(global_def_writer,
|
||||
evt.id(), // id
|
||||
_hash,
|
||||
OTF2_LOCATION_TYPE_ACCELERATOR_STREAM,
|
||||
2 * evt.event_count, // # events
|
||||
agent_handle // location group
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace output
|
||||
} // namespace rocpd
|
||||
@@ -1,71 +0,0 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "lib/python/rocpd/source/types.hpp"
|
||||
|
||||
#include "lib/common/defines.hpp"
|
||||
#include "lib/output/generator.hpp"
|
||||
#include "lib/output/metadata.hpp"
|
||||
#include "lib/output/node_info.hpp"
|
||||
#include "lib/output/output_config.hpp"
|
||||
#include "lib/output/sql/common.hpp"
|
||||
#include "lib/output/stream_info.hpp"
|
||||
#include "lib/rocprofiler-sdk-tool/config.hpp"
|
||||
|
||||
#include <vector>
|
||||
|
||||
namespace rocpd
|
||||
{
|
||||
namespace output
|
||||
{
|
||||
namespace tool = rocprofiler::tool;
|
||||
|
||||
struct extended_agent
|
||||
{
|
||||
const rocpd::types::agent& types_agent;
|
||||
const tool::agent_index agent_index;
|
||||
const std::string labeled_name;
|
||||
};
|
||||
|
||||
struct OTF2Session
|
||||
{
|
||||
OTF2Session(const tool::output_config& output_cfg, uint64_t min_start, uint64_t max_fini);
|
||||
|
||||
~OTF2Session();
|
||||
|
||||
const tool::output_config& config;
|
||||
};
|
||||
|
||||
void
|
||||
write_otf2(const OTF2Session& session,
|
||||
const types::process& process,
|
||||
const uint16_t tree_node_id,
|
||||
const std::unordered_map<uint64_t, extended_agent>& agent_data,
|
||||
const tool::generator<types::thread>& thread_gen,
|
||||
const tool::generator<types::region>& api_gen,
|
||||
const tool::generator<types::kernel_dispatch>& kernel_dispatch_gen,
|
||||
const tool::generator<types::memory_copies>& memory_copy_gen,
|
||||
const tool::generator<types::memory_allocation>& memory_allocation_gen);
|
||||
} // namespace output
|
||||
} // namespace rocpd
|
||||
Reference in New Issue
Block a user