diff --git a/projects/rocprofiler-sdk/source/lib/python/rocpd/libpyrocpd.cpp b/projects/rocprofiler-sdk/source/lib/python/rocpd/libpyrocpd.cpp index 009afd694b..3bf5b4cb68 100644 --- a/projects/rocprofiler-sdk/source/lib/python/rocpd/libpyrocpd.cpp +++ b/projects/rocprofiler-sdk/source/lib/python/rocpd/libpyrocpd.cpp @@ -25,7 +25,6 @@ #include "lib/python/rocpd/source/common.hpp" #include "lib/python/rocpd/source/functions.hpp" #include "lib/python/rocpd/source/interop.hpp" -#include "lib/python/rocpd/source/otf2.hpp" #include "lib/python/rocpd/source/perfetto.hpp" #include "lib/python/rocpd/source/serialization/sql.hpp" #include "lib/python/rocpd/source/sql_generator.hpp" @@ -508,147 +507,6 @@ PYBIND11_MODULE(libpyrocpd, pyrocpd) }, "Write pftrace output file from rocpd SQLite3 database"); - pyrocpd.def( - "write_otf2", - [](rocpd::RocpdImportData& data, const tool::output_config& output_cfg) { - auto _create_agent_index = - [&output_cfg](const rocpd::types::agent& _agent) -> tool::agent_index { - auto ret_index = tool::create_agent_index( - output_cfg.agent_index_value, - _agent.node_id, // absolute index - static_cast(_agent.logical_node_id), // relative index - static_cast(_agent.logical_node_type_id), // type-relative index - std::string_view(_agent.type)); - return ret_index; - }; - - constexpr auto kernels_order_by = - "agent_abs_index ASC, stream_id ASC, queue_id ASC, start ASC, end DESC"; - - // to initialise the OTF@ session properly we need to know: - // (1) the process with the earliest start time - // (2) find the process with the longest duration - uint64_t min_start_time = std::numeric_limits::max(); - uint64_t max_fini_time = std::numeric_limits::min(); - for(auto obj : {data.connection}) - { - auto* conn = rocpd::interop::get_connection(std::move(obj)); - - // min start - sqlite3_stmt* _stmt_min_start_max_fini = nullptr; - uint64_t _min_start_time = std::numeric_limits::max(); - uint64_t _max_fini_time = std::numeric_limits::min(); - - sqlite3_prepare_v2(conn, - "SELECT MIN(start), MAX(fini) FROM processes;", - -1, - &_stmt_min_start_max_fini, - nullptr); - if(sqlite3_step(_stmt_min_start_max_fini) == SQLITE_ROW) - { - _min_start_time = - static_cast(sqlite3_column_int64(_stmt_min_start_max_fini, 0)); - _max_fini_time = - static_cast(sqlite3_column_int64(_stmt_min_start_max_fini, 1)); - } - - sqlite3_finalize(_stmt_min_start_max_fini); - min_start_time = std::min(min_start_time, _min_start_time); - max_fini_time = std::max(max_fini_time, _max_fini_time); - } - - auto otf2_session = - rocpd::output::OTF2Session(output_cfg, min_start_time, max_fini_time); - - auto sqlgen_otf2 = common::simple_timer{ - fmt::format("OTF2 generation from {} SQL database(s)", data.size())}; - - uint16_t _process_counter = 0; - for(auto obj : {data.connection}) - { - auto* conn = rocpd::interop::get_connection(std::move(obj)); - auto nodes = rocpd::read(conn); - for(const auto& nitr : nodes) - { - auto agents = rocpd::read( - conn, fmt::format("WHERE guid = '{}' AND nid = {}", nitr.guid, nitr.id)); - auto processes = rocpd::read( - conn, fmt::format("WHERE guid = '{}' AND nid = {}", nitr.guid, nitr.id)); - - // absolute_index |-> (agent, agent_index) - auto agents_map = std::unordered_map{}; - - for(const auto& itr : agents) - { - const rocprofiler::tool::agent_index new_index = _create_agent_index(itr); - const std::string labeled_name = fmt::format("{}", itr.name); - agents_map.emplace( - itr.absolute_index, - rocpd::output::extended_agent{itr, new_index, labeled_name}); - } - - for(const auto& pitr : processes) - { - ROCP_FATAL_IF(pitr.nid != nitr.id || pitr.guid != nitr.guid) - << fmt::format("Found process with a mismatched nid/guid. process: " - "{}/{} vs. node: {}/{}", - pitr.nid, - pitr.guid, - nitr.id, - nitr.guid); - - auto select_guid_nid_pid = - [&nitr, &pitr](std::string_view tbl, - std::string_view where_extra_condition = "") { - return fmt::format("SELECT * FROM {} WHERE guid = '{}' AND " - "nid = {} AND pid = {} {}", - tbl, - pitr.guid, - nitr.id, - pitr.pid, - where_extra_condition); - }; - - constexpr auto region_order_by = "start ASC, end DESC"; - - auto _sqlgen_otf2 = common::simple_timer{fmt::format( - "OTF2 generation from SQL for process {} (total)", pitr.pid)}; - - auto kernels = rocpd::sql_generator{ - conn, select_guid_nid_pid("kernels"), kernels_order_by}; - - auto memory_allocations = - rocpd::sql_generator{ - conn, select_guid_nid_pid("memory_allocations"), region_order_by}; - - auto memory_copies = rocpd::sql_generator{ - conn, select_guid_nid_pid("memory_copies"), region_order_by}; - - auto regions = rocpd::sql_generator{ - conn, select_guid_nid_pid("regions"), region_order_by}; - - auto threads = rocpd::sql_generator{ - conn, select_guid_nid_pid("threads")}; - - ROCP_TRACE << "Starting OTF2 generation from SQL for process " << pitr.pid; - auto _sqlgen_perfw = common::simple_timer{fmt::format( - "OTF2 generation from SQL for process {} (write)", pitr.pid)}; - rocpd::output::write_otf2(otf2_session, - pitr, - _process_counter, - agents_map, - threads, - regions, - kernels, - memory_copies, - memory_allocations); - _process_counter++; - } - } - } - }, - "Write OTF2 output file from rocpd SQLite3 database"); - // NOLINTEND(performance-unnecessary-value-param) // reads in all the agent info from database diff --git a/projects/rocprofiler-sdk/source/lib/python/rocpd/otf2.py b/projects/rocprofiler-sdk/source/lib/python/rocpd/otf2.py index 112f2b1824..3a3f0b1521 100644 --- a/projects/rocprofiler-sdk/source/lib/python/rocpd/otf2.py +++ b/projects/rocprofiler-sdk/source/lib/python/rocpd/otf2.py @@ -23,13 +23,440 @@ # THE SOFTWARE. ############################################################################### +import os +import otf2 +from otf2.enums import LocationType, LocationGroupType, RegionRole, Paradigm +import shutil +import time +from collections import defaultdict + from .importer import RocpdImportData from . import output_config -from . import libpyrocpd + + +def get_perfetto_category_name(category): + """Map category names to perfetto category names""" + category_map = { + "NONE": "none", + "HSA_CORE_API": "hsa_api", + "HSA_AMD_EXT_API": "hsa_api", + "HSA_IMAGE_EXT_API": "hsa_api", + "HSA_FINALIZE_EXT_API": "hsa_api", + "HIP_RUNTIME_API": "hip_api", + "HIP_COMPILER_API": "hip_api", + "MARKER_CORE_API": "marker_api", + "MARKER_CORE_RANGE_API": "marker_api", + "MARKER_CONTROL_API": "marker_api", + "MARKER_NAME_API": "marker_api", + "MEMORY_COPY": "memory_copy", + "MEMORY_ALLOCATION": "memory_allocation", + "KERNEL_DISPATCH": "kernel_dispatch", + "SCRATCH_MEMORY": "scratch_memory", + "CORRELATION_ID_RETIREMENT": "none", + "RCCL_API": "rccl_api", + "OMPT": "openmp", + "RUNTIME_INITIALIZATION": "none", + "ROCDECODE_API": "rocdecode_api", + "ROCJPEG_API": "rocjpeg_api", + "HIP_STREAM": "hip_api", + "HIP_RUNTIME_API_EXT": "hip_api", + "HIP_COMPILER_API_EXT": "hip_api", + "ROCDECODE_API_EXT": "rocdecode_api", + "KFD_EVENT_PAGE_MIGRATE": "kfd_events", + "KFD_EVENT_PAGE_FAULT": "kfd_events", + "KFD_EVENT_QUEUE": "kfd_events", + "KFD_EVENT_UNMAP_FROM_GPU": "kfd_events", + "KFD_EVENT_DROPPED_EVENTS": "kfd_events", + "KFD_PAGE_MIGRATE": "kfd_events", + "KFD_PAGE_FAULT": "kfd_events", + "KFD_QUEUE": "kfd_events", + } + return category_map.get(category, "none") + + +def allocation_level_type_name(level, type): + if level == "REAL": + name = "MEMORY" + elif level == "VIRTUAL": + name = "MEMORY_VMEM" + elif level == "SCRATCH": + name = "SCRATCH_MEMORY" + else: + return "UNKNOWN_LEVEL" + + if type == "ALLOC": + return name + "_ALLOCATE" + elif type == "FREE": + return name + "_FREE" + else: + return level + "_MEMORY_NONE" + + return name def write_otf2(importData, config): - return libpyrocpd.write_otf2(importData, config) + + timer_resolution = 1_000_000_000 + trace_dir = getattr(config, "output_path", "./otf_traces") + trace_file = f"{getattr(config, 'output_file', 'traces')}_results" + if not os.path.exists(trace_dir): + os.makedirs(trace_dir) + + if os.path.exists(trace_dir): + trace_subdir = os.path.join(trace_dir, trace_file) + if os.path.exists(trace_subdir): + shutil.rmtree(trace_subdir) + + otf2_file = os.path.join(trace_dir, f"{trace_file}.otf2") + def_file = os.path.join(trace_dir, f"{trace_file}.def") + + print(f"Writing: {otf2_file}") + print(f"Writing: {def_file}") + print(f"Writing: {trace_subdir} directory") + + if os.path.exists(otf2_file): + os.remove(otf2_file) + if os.path.exists(def_file): + os.remove(def_file) + with otf2.writer.open( + trace_dir, trace_file, timer_resolution=timer_resolution + ) as archive: + conn = getattr(importData, "connection", None) + if conn is not None: + try: + cursor = conn.cursor() + cursor.execute("SELECT MIN(start), MAX(fini) FROM processes;") + min_start, max_finish = cursor.fetchone() + with otf2.writer.DefinitionWriter(archive) as global_def_writer: + global_offset = min_start + duration = max_finish - min_start + realtime_timestamp = int(round(time.time() * timer_resolution)) + global_def_writer.write_clock_properties( + timer_resolution, global_offset, duration, realtime_timestamp + ) + perfetto_category = archive.definitions.attribute( + name="category", description="tracing category" + ) + memory_copy_attributes = {perfetto_category: "memory_copy"} + memory_allocation_attributes = { + perfetto_category: "memory_allocation" + } + kernel_attributes = {perfetto_category: "kernel_dispatch"} + kernel_rename = getattr(config, "kernel_rename") + agent_index_value = getattr(config, "agent_index_value") + + cursor = conn.cursor() + cursor.execute("SELECT DISTINCT guid, id FROM rocpd_info_node") + for row in cursor: + guid, nid = row + cursor = conn.cursor() + cursor.execute( + "SELECT pid, hostname, command FROM processes WHERE guid = ? AND nid = ?", + (guid, nid), + ) + for row in cursor: + pid, hostname, command = row + tree_node = archive.definitions.system_tree_node( + name=command, class_name=hostname, parent=None + ) + cpu_location_group = archive.definitions.location_group( + name=command, + location_group_type=LocationGroupType.PROCESS, + system_tree_parent=tree_node, + ) + api_calls = defaultdict(list) + memory_copies = defaultdict(list) + memory_allocations = defaultdict(list) + memory_deallocations = defaultdict(list) + memory_unknown = defaultdict(list) + kernel_dispatches = defaultdict(list) + agents = {} + + cursor = conn.cursor() + cursor.execute( + """SELECT tid, start, end, name, category FROM regions + WHERE guid = ? AND nid = ? AND pid = ?""", + (guid, nid, pid), + ) + for row in cursor: + tid, start, end, name, category = row + api_calls[tid].append((start, end, name, category)) + + cursor = conn.cursor() + cursor.execute( + """SELECT tid, dst_agent_abs_index, start, end, name + FROM memory_copies WHERE guid = ? AND nid = ? + AND pid = ? ORDER BY start ASC""", + (guid, nid, pid), + ) + for row in cursor: + tid, agent, start, end, name = row + memory_copies[(tid, agent)].append((start, end, name)) + + cursor = conn.cursor() + cursor.execute( + """SELECT tid, agent_abs_index, start, end, level, type + FROM memory_allocations WHERE guid = ? AND nid = ? + AND pid = ? ORDER BY start ASC""", + (guid, nid, pid), + ) + for row in cursor: + tid, agent, start, end, level, type = row + name = allocation_level_type_name(level, type) + if type == "ALLOC": + memory_allocations[(tid, agent)].append( + (start, end, name) + ) + elif type == "FREE": + memory_deallocations[tid].append((start, end, name)) + else: + memory_unknown[tid].append((start, end, name)) + + cursor = conn.cursor() + cursor.execute( + """SELECT tid, agent_abs_index, queue_id, + start, end, name, region + FROM kernels WHERE guid = ? AND nid = ? + AND pid = ? ORDER BY start ASC""", + (guid, nid, pid), + ) + for row in cursor: + tid, agent, queue, start, end, name, region = row + if kernel_rename and region: + kernel_dispatches[(tid, agent, queue)].append( + (start, end, region) + ) + else: + kernel_dispatches[(tid, agent, queue)].append( + (start, end, name) + ) + + cursor = conn.cursor() + cursor.execute( + "SELECT id, name, type FROM rocpd_info_agent WHERE guid = ? AND nid = ?", + (guid, nid), + ) + for row in cursor: + id, name, type = row + agents[id] = ( + ( + f"{type} Agent-{id}" + if agent_index_value + else f"{type} {type}-{id}" + ), + archive.definitions.location_group( + name=name, + location_group_type=LocationGroupType.ACCELERATOR, + system_tree_parent=tree_node, + ), + ) + + # Write API Call Events + for tid, data in api_calls.items(): + cpu_location = archive.definitions.location( + name=f"Thread {tid}", + type=LocationType.CPU_THREAD, + group=cpu_location_group, + ) + event_writer = otf2.writer.EventWriter( + archive, cpu_location + ) + events = [] + for start, end, name, category in data: + attributes = { + perfetto_category: get_perfetto_category_name( + category + ) + } + + region = archive.definitions.region( + name=name, + region_role=RegionRole.FUNCTION, + paradigm=Paradigm.HIP, + ) + events.append((start, "start", region, attributes)) + events.append((end, "end", region, None)) + events.sort(key=lambda x: x[0]) + for timestamp, event_type, region, attributes in events: + if event_type == "start": + event_writer.enter( + timestamp, region, attributes=attributes + ) + else: # if event_type == "end": + event_writer.leave(timestamp, region) + + # Write Memory Copy Events + for (tid, agent_id), data in memory_copies.items(): + agent_name, agent_location_group = agents.get( + agent_id, (f"Unknown Agent {agent_id}", None) + ) + memory_copy_location = archive.definitions.location( + name=f"Thread {tid}, Copy to {agent_name}", + type=LocationType.ACCELERATOR_STREAM, + group=agent_location_group, + ) + memory_copy_writer = otf2.writer.EventWriter( + archive, memory_copy_location + ) + memory_copy_events = [] + for start, end, name in data: + region = archive.definitions.region( + name=name, + region_role=RegionRole.DATA_TRANSFER, + paradigm=Paradigm.HIP, + ) + memory_copy_events.append((start, "enter", region)) + memory_copy_events.append((end, "leave", region)) + memory_copy_events.sort(key=lambda x: x[0]) + for timestamp, event_type, region in memory_copy_events: + if event_type == "enter": + memory_copy_writer.enter( + timestamp, + region, + attributes=memory_copy_attributes, + ) + else: # if event_type == "leave": + memory_copy_writer.leave(timestamp, region) + + # Write Memory Allocation Events + for (tid, agent_id), data in memory_allocations.items(): + agent_name, agent_location_group = agents.get( + agent_id, (f"Unknown Agent {agent_id}", None) + ) + memory_allocation_location = archive.definitions.location( + name=f"Thread {tid}, Memory Allocate at {agent_name}", + type=LocationType.ACCELERATOR_STREAM, + group=agent_location_group, + ) + memory_allocation_writer = otf2.writer.EventWriter( + archive, memory_allocation_location + ) + memory_allocation_events = [] + for start, end, name in data: + region = archive.definitions.region( + name=name, + region_role=RegionRole.ALLOCATE, + paradigm=Paradigm.HIP, + ) + memory_allocation_events.append( + (start, "enter", region) + ) + memory_allocation_events.append( + (end, "leave", region) + ) + memory_allocation_events.sort(key=lambda x: x[0]) + for ( + timestamp, + event_type, + region, + ) in memory_allocation_events: + if event_type == "enter": + memory_allocation_writer.enter( + timestamp, + region, + attributes=memory_allocation_attributes, + ) + else: # if event_type == "leave": + memory_allocation_writer.leave(timestamp, region) + + # Write Memory Deallocation Events + for tid, data in memory_deallocations.items(): + memory_free_location = archive.definitions.location( + name=f"Thread {tid}, Memory Deallocate (Free)", + type=LocationType.ACCELERATOR_STREAM, + group=cpu_location_group, + ) + memory_free_writer = otf2.writer.EventWriter( + archive, memory_free_location + ) + memory_free_events = [] + for start, end, name in data: + region = archive.definitions.region( + name=name, + region_role=RegionRole.DEALLOCATE, + paradigm=Paradigm.HIP, + ) + memory_free_events.append((start, "enter", region)) + memory_free_events.append((end, "leave", region)) + memory_free_events.sort(key=lambda x: x[0]) + for timestamp, event_type, region in memory_free_events: + if event_type == "enter": + memory_free_writer.enter( + timestamp, + region, + attributes=memory_allocation_attributes, + ) + else: # if event_type == "leave": + memory_free_writer.leave(timestamp, region) + + # Write Unknown Memory Events + for tid, data in memory_unknown.items(): + memory_unknown_location = archive.definitions.location( + name=f"Thread {tid}, Memory Operation UNK", + type=LocationType.ACCELERATOR_STREAM, + group=cpu_location_group, + ) + memory_unknown_writer = otf2.writer.EventWriter( + archive, memory_unknown_location + ) + memory_unknown_events = [] + for start, end, name in data: + region = archive.definitions.region( + name=name, + ) + memory_unknown_events.append((start, "enter", region)) + memory_unknown_events.append((end, "leave", region)) + memory_unknown_events.sort(key=lambda x: x[0]) + for ( + timestamp, + event_type, + region, + ) in memory_unknown_events: + if event_type == "enter": + memory_unknown_writer.enter( + timestamp, + region, + attributes=memory_allocation_attributes, + ) + else: # if event_type == "leave": + memory_unknown_writer.leave(timestamp, region) + + # Write Kernel Dispatch Events + for (tid, agent_id, queue), data in kernel_dispatches.items(): + agent_name, agent_location_group = agents.get( + agent_id, (f"Unknown Agent {agent_id}", None) + ) + kernel_location = archive.definitions.location( + name=f"Thread {tid}, Compute on {agent_name}, Queue {queue}", + type=LocationType.ACCELERATOR_STREAM, + group=agent_location_group, + ) + kernel_writer = otf2.writer.EventWriter( + archive, kernel_location + ) + kernel_events = [] + for start, end, name in data: + region = archive.definitions.region( + name=name, + region_role=RegionRole.FUNCTION, + paradigm=Paradigm.HIP, + ) + kernel_events.append((start, "enter", region)) + kernel_events.append((end, "leave", region)) + kernel_events.sort(key=lambda x: x[0]) + for timestamp, event_type, region in kernel_events: + if event_type == "enter": + kernel_writer.enter( + timestamp, + region, + attributes=kernel_attributes, + ) + else: # if event_type == "leave": + kernel_writer.leave(timestamp, region) + except Exception as e: + print("Could not query sqlite database:", e) + else: + print("No sqlite connection found in importData.") def execute(input, config=None, **kwargs): diff --git a/projects/rocprofiler-sdk/source/lib/python/rocpd/source/CMakeLists.txt b/projects/rocprofiler-sdk/source/lib/python/rocpd/source/CMakeLists.txt index 70ac637c1f..3812ad1020 100644 --- a/projects/rocprofiler-sdk/source/lib/python/rocpd/source/CMakeLists.txt +++ b/projects/rocprofiler-sdk/source/lib/python/rocpd/source/CMakeLists.txt @@ -2,10 +2,10 @@ # libpyrocpd python binding sources # -set(libpyrocpd_source_headers common.hpp functions.hpp interop.hpp perfetto.hpp otf2.hpp +set(libpyrocpd_source_headers common.hpp functions.hpp interop.hpp perfetto.hpp sql_generator.hpp pysqlite_Connection.h types.hpp) -set(libpyrocpd_source_sources functions.cpp interop.cpp otf2.cpp perfetto.cpp types.cpp) +set(libpyrocpd_source_sources functions.cpp interop.cpp perfetto.cpp types.cpp) foreach(_PYTHON_VERSION ${ROCPROFILER_PYTHON_VERSIONS}) rocprofiler_rocpd_python_bindings_target_sources( diff --git a/projects/rocprofiler-sdk/source/lib/python/rocpd/source/otf2.cpp b/projects/rocprofiler-sdk/source/lib/python/rocpd/source/otf2.cpp deleted file mode 100644 index 942dbf5802..0000000000 --- a/projects/rocprofiler-sdk/source/lib/python/rocpd/source/otf2.cpp +++ /dev/null @@ -1,905 +0,0 @@ -// MIT License -// -// Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#include "lib/python/rocpd/source/otf2.hpp" - -#include "lib/common/defines.hpp" -#include "lib/common/filesystem.hpp" -#include "lib/common/hasher.hpp" -#include "lib/common/mpl.hpp" -#include "lib/common/units.hpp" -#include "lib/common/utility.hpp" -#include "lib/output/generator.hpp" -#include "lib/output/metadata.hpp" -#include "lib/output/node_info.hpp" -#include "lib/output/output_config.hpp" -#include "lib/output/output_stream.hpp" -#include "lib/output/sql/common.hpp" -#include "lib/output/stream_info.hpp" -#include "lib/output/timestamps.hpp" -#include "lib/rocprofiler-sdk-tool/config.hpp" - -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define OTF2_CHECK(result) \ - { \ - OTF2_ErrorCode ROCPROFILER_VARIABLE(CHECKSTATUS, __LINE__) = result; \ - if(ROCPROFILER_VARIABLE(CHECKSTATUS, __LINE__) != OTF2_SUCCESS) \ - { \ - auto _err_name = OTF2_Error_GetName(ROCPROFILER_VARIABLE(CHECKSTATUS, __LINE__)); \ - auto _err_msg = \ - OTF2_Error_GetDescription(ROCPROFILER_VARIABLE(CHECKSTATUS, __LINE__)); \ - ROCP_FATAL << #result << " failed with error code " << _err_name \ - << " (code=" << ROCPROFILER_VARIABLE(CHECKSTATUS, __LINE__) \ - << ") :: " << _err_msg; \ - } \ - } - -namespace rocpd -{ -namespace output -{ -namespace -{ -template -struct array_hash -{ - size_t operator()(const std::array& _data) const - { - constexpr size_t seed = 0x9e3779b9; - size_t _val = 0; - for(const auto& itr : _data) - _val ^= std::hash{}(itr) + seed + (_val << 6) + (_val >> 2); - return _val; - } - - template - size_t operator()(Up... _data) const - { - static_assert(sizeof...(Up) == N, "Insufficient data"); - return operator()(std::array{std::forward(_data)...}); - } -}; - -struct region_info -{ - std::string name = {}; - OTF2_RegionRole_enum region_role = OTF2_REGION_ROLE_FUNCTION; - OTF2_Paradigm_enum paradigm = OTF2_PARADIGM_HIP; -}; - -OTF2_FlushType -pre_flush(void* userData, - OTF2_FileType fileType, - OTF2_LocationRef location, - void* callerData, - bool fini); - -OTF2_TimeStamp -post_flush(void* userData, OTF2_FileType fileType, OTF2_LocationRef location); - -template -void -consume_variables(Args&&...) -{} - -using event_writer_t = OTF2_EvtWriter; -using archive_t = OTF2_Archive; -using attribute_list_t = OTF2_AttributeList; -using hash_value_t = size_t; -using hash_map_t = std::unordered_map; - -auto main_tid = rocprofiler::common::get_tid(); -archive_t* archive = nullptr; -auto flush_callbacks = OTF2_FlushCallbacks{pre_flush, post_flush}; -OTF2_GlobalDefWriter* global_def_writer = nullptr; // shared between data bases (processes) - -enum rocprofiler_location_type_t -{ - ROCPROFILER_AGENT_NO_TYPE = 0, - ROCPROFILER_AGENT_MEMORY_COPY_TYPE, - ROCPROFILER_AGENT_DISPATCH_TYPE, - ROCPROFILER_AGENT_MEMORY_ALLOC_TYPE, - ROCPROFILER_AGENT_MEMORY_DEALLOC_TYPE -}; - -struct location_base -{ - uint64_t pid = 0; - uint64_t tid = 0; - uint64_t agent_handle = 0; - uint64_t queue_id = 0; - rocprofiler_location_type_t type = ROCPROFILER_AGENT_NO_TYPE; - - location_base(pid_t _pid, - pid_t _tid, - uint64_t _agent_handle = 0, - rocprofiler_location_type_t _type = ROCPROFILER_AGENT_NO_TYPE, - uint64_t _queue_id = 0) - : pid{static_cast(_pid)} - , tid{static_cast(_tid)} - , agent_handle{_agent_handle} - , queue_id{_queue_id} - , type{_type} - {} - - auto hash() const - { - return array_hash{}(pid, tid, agent_handle + 1, queue_id + 1, type); - } -}; - -bool -operator<(const location_base& lhs, const location_base& rhs) -{ - return std::tie(lhs.pid, lhs.tid, lhs.agent_handle, lhs.queue_id, lhs.type) < - std::tie(rhs.pid, rhs.tid, rhs.agent_handle, rhs.queue_id, rhs.type); -} - -struct location_data : location_base -{ - location_data(pid_t _pid, - pid_t _tid, - uint64_t _agent_handle = 0, - rocprofiler_location_type_t _type = ROCPROFILER_AGENT_NO_TYPE, - uint64_t _queue_id = 0) - : location_base{_pid, _tid, _agent_handle, _type, _queue_id} - , index{++index_counter} - , event_writer{OTF2_Archive_GetEvtWriter(CHECK_NOTNULL(archive), index)} - { - CHECK_NOTNULL(event_writer); - } - - using location_base::hash; - - static uint64_t index_counter; - - uint64_t index = 0; - event_writer_t* event_writer = nullptr; - - bool operator==(const location_base& rhs) const { return (hash() == rhs.hash()); } -}; - -uint64_t location_data::index_counter = 0; - -OTF2_TimeStamp -get_time() -{ - auto _ts = rocprofiler_timestamp_t{}; - rocprofiler_get_timestamp(&_ts); - return static_cast(_ts); -} - -auto& -get_locations() -{ - static auto _v = std::vector>{}; - return _v; -} - -const location_data* -get_location(const location_base& _location, bool _init = false) -{ - for(auto& itr : get_locations()) - if(*itr == _location) return itr.get(); - - if(_init) - return get_locations() - .emplace_back(std::make_unique(_location.pid, - _location.tid, - _location.agent_handle, - _location.type, - _location.queue_id)) - .get(); - - return nullptr; -} - -event_writer_t* -get_event_writer(const location_base& _location, bool _init = false) -{ - const auto* _loc = get_location(_location, _init); - return (_loc) ? _loc->event_writer : nullptr; -} - -OTF2_FlushType -pre_flush(void* userData, - OTF2_FileType fileType, - OTF2_LocationRef location, - void* callerData, - bool fini) -{ - consume_variables(userData, fileType, location, callerData, fini); - return OTF2_FLUSH; -} - -OTF2_TimeStamp -post_flush(void* userData, OTF2_FileType fileType, OTF2_LocationRef location) -{ - consume_variables(userData, fileType, location); - return get_time(); -} - -template -size_t -get_hash_id(Tp&& _val) -{ - using value_type = rocprofiler::common::mpl::unqualified_type_t; - - if constexpr(!std::is_pointer::value) - return std::hash{}(std::forward(_val)); - else if constexpr(std::is_same::value || - std::is_same::value) - return get_hash_id(std::string_view{_val}); - else - return get_hash_id(*_val); -} - -template -auto -add_event(std::string_view name, - const location_base& _location, - rocprofiler_callback_phase_t _phase, - OTF2_TimeStamp _ts, - attribute_list_t* _attributes = nullptr) -{ - auto* evt_writer = get_event_writer(_location, true); - auto _hash = get_hash_id(name); - - if(_phase == ROCPROFILER_CALLBACK_PHASE_ENTER) - OTF2_CHECK(OTF2_EvtWriter_Enter(evt_writer, _attributes, _ts, _hash)) - else if(_phase == ROCPROFILER_CALLBACK_PHASE_EXIT) - OTF2_CHECK(OTF2_EvtWriter_Leave(evt_writer, _attributes, _ts, _hash)) - else - ROCP_FATAL << "otf2::add_event phase is not enter or exit"; -} - -void -setup(const rocprofiler::tool::output_config& cfg, uint64_t min_start, uint64_t max_fini) -{ - namespace fs = rocprofiler::common::filesystem; - - auto _filename = rocprofiler::tool::get_output_filename(cfg, "results", std::string_view{}); - auto _filepath = fs::path{_filename}; - auto _name = _filepath.filename().string(); - auto _path = _filepath.parent_path().string(); - - if(fs::exists(_filepath)) fs::remove_all(_filepath); - - constexpr uint64_t evt_chunk_size = 2 * rocprofiler::common::units::MB; - constexpr uint64_t def_chunk_size = 8 * rocprofiler::common::units::MB; - - archive = OTF2_Archive_Open(_path.c_str(), - _name.c_str(), - OTF2_FILEMODE_WRITE, - evt_chunk_size, // event chunk size - def_chunk_size, // def chunk size - OTF2_SUBSTRATE_POSIX, - OTF2_COMPRESSION_NONE); - - OTF2_CHECK(OTF2_Archive_SetFlushCallbacks(archive, &flush_callbacks, nullptr)); - OTF2_CHECK(OTF2_Archive_SetSerialCollectiveCallbacks(archive)); - OTF2_CHECK(OTF2_Pthread_Archive_SetLockingCallbacks(archive, nullptr)); - OTF2_CHECK(OTF2_Archive_OpenEvtFiles(archive)); - - ROCP_ERROR << "Opened result file: " << _filename << ".otf2"; - - auto _timer_resolution = - rocprofiler::common::get_clock_period_ns_impl(rocprofiler::common::default_clock_id) * - std::nano::den; - auto _global_offset = min_start; - auto _max_trace_length = (max_fini - min_start); - - global_def_writer = OTF2_Archive_GetGlobalDefWriter(archive); - OTF2_CHECK(OTF2_GlobalDefWriter_WriteClockProperties( - global_def_writer, - _timer_resolution, - _global_offset, - _max_trace_length, - std::chrono::system_clock::now().time_since_epoch().count())); - - OTF2_CHECK(OTF2_GlobalDefWriter_WriteString(global_def_writer, 0, "")); - - auto add_write_string = [](size_t _hash, std::string_view _name_strv) { - static auto _existing = std::unordered_set{}; - if(_hash > 0 && _existing.count(_hash) == 0) - { - OTF2_CHECK( - OTF2_GlobalDefWriter_WriteString(global_def_writer, _hash, _name_strv.data())); - _existing.emplace(_hash); - } - }; - - auto add_write_string_val = [&add_write_string](std::string_view _name_v) { - auto _hash_v = get_hash_id(_name_v); - add_write_string(_hash_v, _name_v); - return _hash_v; - }; - - //(must be shared between processes) - auto _attr_name = std::string_view{"category"}; - auto _attr_desc = std::string_view{"tracing category"}; - - auto _attr_name_hash = add_write_string_val(_attr_name); - auto _attr_desc_hash = add_write_string_val(_attr_desc); - - OTF2_CHECK(OTF2_GlobalDefWriter_WriteAttribute( - global_def_writer, 0, _attr_name_hash, _attr_desc_hash, OTF2_TYPE_STRING)); -} - -void -shutdown() -{ - OTF2_CHECK(OTF2_Archive_Close(archive)); -} - -struct event_info -{ - explicit event_info(location_base&& _loc) - : m_location{output::get_location(std::forward(_loc), true)} - {} - - auto id() const { return m_location->index; } - auto hash() const { return m_location->hash(); } - const location_base* get_location() const { return m_location; } - - std::string name = {}; - uint64_t event_count = 0; - -private: - const location_data* m_location = nullptr; -}; - -attribute_list_t* -create_attribute_list_for_name(const char* name) -{ - auto* _val = OTF2_AttributeList_New(); - auto _hash = get_hash_id(name); - auto _attr_value = OTF2_AttributeValue{}; - _attr_value.stringRef = _hash; - OTF2_AttributeList_AddAttribute(_val, 0, OTF2_TYPE_STRING, _attr_value); - return _val; -} -} // namespace - -OTF2Session::OTF2Session(const tool::output_config& output_cfg, - uint64_t min_start, - uint64_t max_fini) -: config{output_cfg} -{ - setup(output_cfg, min_start, max_fini); -} - -OTF2Session::~OTF2Session() { shutdown(); } - -void -write_otf2(const OTF2Session& otf2_session, - const types::process& process, - const uint16_t tree_node_id, - const std::unordered_map& agent_data, - const tool::generator& thread_gen, - const tool::generator& api_gen, - const tool::generator& kernel_dispatch_gen, - const tool::generator& memory_copy_gen, - const tool::generator& memory_allocation_gen) -{ - const uint64_t _no_agent_handle = 0; - // std::numeric_limits::max() - 1; - const auto& ocfg = otf2_session.config; - - auto _app_ts = rocprofiler::tool::timestamps_t{process.start, process.fini}; - - auto thread_event_info = std::map{}; - auto agent_memcpy_info = - std::map>{}; // tid -> agent_handle ->evt - auto agent_memalloc_info = - std::map>{}; // // tid -> agent_handle ->evt - auto agent_dispatch_info = - std::map>>{}; // tid -> agent_handle - // -> quieue_id -> evt - auto _get_alloc_level_type_name = [](const std::string& level, - const std::string& type) -> std::string { - static const std::unordered_map> - name_map = { - {"REAL", - {{"ALLOC", "MEMORY_ALLOCATION_ALLOCATE"}, {"FREE", "MEMORY_ALLOCATION_FREE"}}}, - {"VIRTUAL", - {{"ALLOC", "MEMORY_ALLOCATION_VMEM_ALLOCATE"}, - {"FREE", "MEMORY_ALLOCATION_VMEM_FREE"}}}, - {"SCRATCH", {{"ALLOC", "SCRATCH_MEMORY_ALLOC"}, {"FREE", "SCRATCH_MEMORY_FREE"}}}}; - - if((name_map.count(level) != 0u) && (name_map.at(level).count(type) != 0u)) - return name_map.at(level).at(type); - - return level == "SCRATCH" || level == "REAL" || level == "VIRTUAL" ? level + "_MEMORY_NONE" - : "UNKNOWN_LEVEL"; - }; - - for(auto ditr : thread_gen) - for(const auto& itr : thread_gen.get(ditr)) - { - auto _evt_info = event_info{location_base{process.pid, itr.tid}}; - _evt_info.name = fmt::format("Thread {}", itr.tid); - thread_event_info.emplace(itr.tid, _evt_info); - } - - auto _hash_data = hash_map_t{}; - - struct evt_data - { - rocprofiler_callback_phase_t phase = ROCPROFILER_CALLBACK_PHASE_NONE; - std::string name = {}; - const location_base* location = nullptr; - uint64_t timestamp = 0; - OTF2_AttributeList* attributes = nullptr; - }; - - auto _data = std::deque{}; - auto _attr_str = std::unordered_map{}; - - // copypatse from perfetto. TODO: Move to a common place? - auto get_category_string = [](std::string_view _category) { - static auto buffer_names = rocprofiler::sdk::get_buffer_tracing_names(); - auto _category_idx = ROCPROFILER_BUFFER_TRACING_NONE; - for(const auto& citr : buffer_names) - { - if(_category == citr.name) _category_idx = citr.value; - } - return rocprofiler::sdk::get_perfetto_category(_category_idx); - }; - - auto get_category_attribute = [&get_category_string, - &_attr_str](const std::string& category) -> OTF2_AttributeList* { - const auto* _perfetto_category = get_category_string(category); - _attr_str.emplace(get_hash_id(_perfetto_category), _perfetto_category); - return create_attribute_list_for_name(_perfetto_category); - }; - - for(auto ditr : api_gen) - for(const auto& itr : api_gen.get(ditr)) - { - std::string _name = itr.name; - _hash_data.emplace(get_hash_id(_name), - region_info{_name, OTF2_REGION_ROLE_FUNCTION, OTF2_PARADIGM_HIP}); - - auto& _evt_info = thread_event_info.at(itr.tid); - _evt_info.event_count += 1; - - auto* attributes = get_category_attribute(itr.category); - _data.emplace_back(evt_data{ROCPROFILER_CALLBACK_PHASE_ENTER, - _name, - _evt_info.get_location(), - itr.start, - attributes}); - - if(!attributes) - { - ROCP_FATAL << "Undefined attributes for api call " << itr.name; - } - - _data.emplace_back(evt_data{ROCPROFILER_CALLBACK_PHASE_EXIT, - _name, - _evt_info.get_location(), - itr.end, - nullptr}); - } - - for(auto ditr : memory_copy_gen) - for(const auto& itr : memory_copy_gen.get(ditr)) - { - std::string _name = itr.name; - _hash_data.emplace( - get_hash_id(_name), - region_info{_name, OTF2_REGION_ROLE_DATA_TRANSFER, OTF2_PARADIGM_HIP}); - - const auto& _extended_agent = agent_data.at(itr.dst_agent_abs_index); - auto _agent_handle = _extended_agent.types_agent.id.handle; - auto _evt_info = event_info{location_base{ - process.pid, itr.tid, _agent_handle, ROCPROFILER_AGENT_MEMORY_COPY_TYPE}}; - - auto agent_index_info = _extended_agent.agent_index; - _evt_info.name = fmt::format("Thread {}, Copy to {} {}", - itr.tid, - agent_index_info.type, - agent_index_info.as_string("-")); - - _evt_info.event_count += 1; - - agent_memcpy_info[itr.tid].emplace(_agent_handle, _evt_info); - - const auto* _perfetto_name = - rocprofiler::sdk::perfetto_category::name; - - _attr_str.emplace(get_hash_id(_perfetto_name), _perfetto_name); - auto* _attrs = create_attribute_list_for_name(_perfetto_name); - _data.emplace_back(evt_data{ROCPROFILER_CALLBACK_PHASE_ENTER, - _name, - _evt_info.get_location(), - itr.start, - _attrs}); - _data.emplace_back(evt_data{ROCPROFILER_CALLBACK_PHASE_EXIT, - _name, - _evt_info.get_location(), - itr.end, - nullptr}); - }; - - for(auto ditr : memory_allocation_gen) - for(const auto& itr : memory_allocation_gen.get(ditr)) - { - std::string _alloc_operation = _get_alloc_level_type_name(itr.level, itr.type); - - const auto* _perfetto_name = rocprofiler::sdk::perfetto_category< - rocprofiler::sdk::category::memory_allocation>::name; - - _attr_str.emplace(get_hash_id(_perfetto_name), _perfetto_name); - auto* _attrs = create_attribute_list_for_name(_perfetto_name); - - if(itr.type == "ALLOC") - { - _hash_data.emplace( - get_hash_id(_alloc_operation), - region_info{_alloc_operation, OTF2_REGION_ROLE_ALLOCATE, OTF2_PARADIGM_HIP}); - - const auto& _extended_agent = agent_data.at(itr.agent_abs_index); - auto _handle = _extended_agent.types_agent.id.handle; - - auto _evt_info = event_info{location_base{ - process.pid, itr.tid, _handle, ROCPROFILER_AGENT_MEMORY_ALLOC_TYPE}}; - - auto agent_index_info = _extended_agent.agent_index; - _evt_info.name = fmt::format("Thread {}, Memory Allocate at {} {}", - itr.tid, - agent_index_info.type, - agent_index_info.as_string("-")); - - agent_memalloc_info[itr.tid].emplace(_handle, _evt_info); - _evt_info.event_count += 1; - - _data.emplace_back(evt_data{ROCPROFILER_CALLBACK_PHASE_ENTER, - _alloc_operation, - _evt_info.get_location(), - itr.start, - _attrs}); - _data.emplace_back(evt_data{ROCPROFILER_CALLBACK_PHASE_EXIT, - _alloc_operation, - _evt_info.get_location(), - itr.end, - nullptr}); - } - else if(itr.type == "FREE") // - { - _hash_data.emplace( - get_hash_id(_alloc_operation), - region_info{_alloc_operation, OTF2_REGION_ROLE_DEALLOCATE, OTF2_PARADIGM_HIP}); - - auto _evt_info = event_info{location_base{ - process.pid, itr.tid, _no_agent_handle, ROCPROFILER_AGENT_MEMORY_DEALLOC_TYPE}}; - _evt_info.name = fmt::format("Thread {}, Memory Deallocate (Free)", itr.tid); - - agent_memalloc_info[itr.tid].emplace(_no_agent_handle, _evt_info); - - _evt_info.event_count += 1; - - _data.emplace_back(evt_data{ROCPROFILER_CALLBACK_PHASE_ENTER, - _alloc_operation, - _evt_info.get_location(), - itr.start, - _attrs}); - _data.emplace_back(evt_data{ROCPROFILER_CALLBACK_PHASE_EXIT, - _alloc_operation, - _evt_info.get_location(), - itr.end, - nullptr}); - } - else - { - auto _evt_info = event_info{location_base{process.pid, itr.tid}}; - _evt_info.name = fmt::format("Thread {}, Memory Operation UNK", itr.tid); - _evt_info.event_count += 1; - agent_memalloc_info[itr.tid].emplace(_no_agent_handle, _evt_info); - _data.emplace_back(evt_data{ROCPROFILER_CALLBACK_PHASE_ENTER, - _alloc_operation, - _evt_info.get_location(), - itr.start, - _attrs}); - _data.emplace_back(evt_data{ROCPROFILER_CALLBACK_PHASE_EXIT, - _alloc_operation, - _evt_info.get_location(), - itr.end, - nullptr}); - } - } - - auto _queue_ids = std::map{}; - for(auto ditr : kernel_dispatch_gen) - for(const auto& itr : kernel_dispatch_gen.get(ditr)) - { - auto _name = fmt::format( - "{}", (ocfg.kernel_rename && !itr.region.empty()) ? itr.region : itr.name); - _hash_data.emplace(get_hash_id(_name), - region_info{_name, OTF2_REGION_ROLE_FUNCTION, OTF2_PARADIGM_HIP}); - - const auto* _perfetto_name = rocprofiler::sdk::perfetto_category< - rocprofiler::sdk::category::kernel_dispatch>::name; - - _attr_str.emplace(get_hash_id(_perfetto_name), _perfetto_name); - auto* _attrs = create_attribute_list_for_name(_perfetto_name); - - const auto& _extended_agent = agent_data.at(itr.agent_abs_index); - auto _handle = _extended_agent.types_agent.id.handle; - auto agent_index_info = _extended_agent.agent_index; - - auto _evt_info = event_info{location_base{ - process.pid, itr.tid, _handle, ROCPROFILER_AGENT_DISPATCH_TYPE, itr.queue_id}}; - - if(_queue_ids.count(itr.queue_id) == 0) - { - _queue_ids.emplace(itr.queue_id, _queue_ids.size()); - } - - _evt_info.name = fmt::format("Thread {}, Compute on {} {}, Queue {}", - - itr.tid, - agent_index_info.type, - agent_index_info.as_string("-"), - _queue_ids.at(itr.queue_id)); - - _evt_info.event_count += 1; - agent_dispatch_info[itr.tid][_handle].emplace(itr.queue_id, _evt_info); - - _data.emplace_back(evt_data{ROCPROFILER_CALLBACK_PHASE_ENTER, - _name, - _evt_info.get_location(), - itr.start, - _attrs}); - _data.emplace_back(evt_data{ROCPROFILER_CALLBACK_PHASE_EXIT, - _name, - _evt_info.get_location(), - itr.end, - nullptr}); - } - - std::sort(_data.begin(), _data.end(), [](const evt_data& lhs, const evt_data& rhs) { - if(lhs.timestamp != rhs.timestamp) return (lhs.timestamp < rhs.timestamp); - if(lhs.phase != rhs.phase) return (lhs.phase > rhs.phase); - return (*lhs.location < *rhs.location); - }); - - for(const auto& itr : _data) - { - add_event(itr.name, *itr.location, itr.phase, itr.timestamp, itr.attributes); - ROCP_ERROR_IF(itr.timestamp < _app_ts.app_start_time) - << "event found with timestamp < app start time by " - << (_app_ts.app_start_time - itr.timestamp) << " nsec :: " << itr.name; - ROCP_ERROR_IF(itr.timestamp > _app_ts.app_end_time) - << "event found with timestamp > app end time by " - << (itr.timestamp - _app_ts.app_end_time) << " nsec :: " << itr.name; - } - - for(const auto& itr : _data) - { - if(itr.attributes) OTF2_AttributeList_Delete(itr.attributes); - } - - OTF2_CHECK(OTF2_Archive_CloseEvtFiles(archive)); - - OTF2_CHECK(OTF2_Archive_OpenDefFiles(archive)); - for(auto& itr : get_locations()) - { - OTF2_DefWriter* def_writer = OTF2_Archive_GetDefWriter(archive, itr->index); - OTF2_Archive_CloseDefWriter(archive, def_writer); - } - OTF2_CHECK(OTF2_Archive_CloseDefFiles(archive)); - - for(const auto& itr : _hash_data) - { - if(itr.first != 0) - OTF2_CHECK(OTF2_GlobalDefWriter_WriteString( - global_def_writer, itr.first, itr.second.name.c_str())); - } - - for(const auto& itr : _hash_data) - { - if(itr.first != 0) - OTF2_CHECK(OTF2_GlobalDefWriter_WriteRegion(global_def_writer, - itr.first, - itr.first, - 0, - 0, - itr.second.region_role, - itr.second.paradigm, - OTF2_REGION_FLAG_NONE, - 0, - 0, - 0)); - } - - auto add_write_string = [](size_t _hash, std::string_view _name) { - static auto _existing = std::unordered_set{}; - if(_hash > 0 && _existing.count(_hash) == 0) - { - OTF2_CHECK(OTF2_GlobalDefWriter_WriteString(global_def_writer, _hash, _name.data())); - _existing.emplace(_hash); - } - }; - - for(const auto& itr : _attr_str) - add_write_string(itr.first, itr.second); - - std::istringstream command_line(process.command); - std::string _exe_name; - command_line >> _exe_name; // Extracts characters until whitespace - _exe_name = fmt::format("{}", _exe_name); - auto _exe_hash = get_hash_id(_exe_name); - add_write_string(_exe_hash, _exe_name); - - auto _node_name = std::string{"node"}; - { - if(!process.hostname.empty()) - { - if(process.hostname.length() >= PATH_MAX) - { - _node_name = process.hostname.substr(0, PATH_MAX - 1); - } - else - { - _node_name = process.hostname; - } - } - } - _node_name = fmt::format("{}", _node_name); - auto _node_hash = get_hash_id(_node_name); - add_write_string(_node_hash, _node_name); - - // debug - OTF2_CHECK(OTF2_GlobalDefWriter_WriteSystemTreeNode( - global_def_writer, tree_node_id, _exe_hash, _node_hash, OTF2_UNDEFINED_SYSTEM_TREE_NODE)); - - // Process - OTF2_CHECK(OTF2_GlobalDefWriter_WriteLocationGroup(global_def_writer, - tree_node_id, - _exe_hash, - OTF2_LOCATION_GROUP_TYPE_PROCESS, - tree_node_id, - OTF2_UNDEFINED_LOCATION_GROUP)); - - // Accelerators (must be shared between the processes) - for(const auto& [abs_idx, extended_agent] : agent_data) - { - auto _handle = extended_agent.types_agent.id.handle; - const auto _name = std::string_view{extended_agent.labeled_name}; - auto _hash = get_hash_id(_name); - - add_write_string(_hash, _name); - OTF2_CHECK(OTF2_GlobalDefWriter_WriteLocationGroup(global_def_writer, - _handle, - _hash, - OTF2_LOCATION_GROUP_TYPE_ACCELERATOR, - tree_node_id, - OTF2_UNDEFINED_LOCATION_GROUP)); - } - - // Thread Events - for(auto& [tid, evt] : thread_event_info) - { - auto _hash = get_hash_id(evt.name); - - add_write_string(_hash, evt.name); - OTF2_CHECK(OTF2_GlobalDefWriter_WriteLocation(global_def_writer, - evt.id(), // id - _hash, - OTF2_LOCATION_TYPE_CPU_THREAD, - 2 * evt.event_count, // # events - tree_node_id // location group - )); - } - - // Memcpy Events - for(auto& [tid, itr] : agent_memcpy_info) - { - for(auto& [agent_handle, evt] : itr) - { - auto _hash = get_hash_id(evt.name); - - add_write_string(_hash, evt.name); - OTF2_CHECK(OTF2_GlobalDefWriter_WriteLocation(global_def_writer, - evt.id(), // id - _hash, - OTF2_LOCATION_TYPE_ACCELERATOR_STREAM, - 2 * evt.event_count, // # events - agent_handle // location group - )); - } - } - - // Memalloc Events - for(auto& [tid, itr] : agent_memalloc_info) - { - for(auto& [agent_handle, evt] : itr) - { - auto _hash = get_hash_id(evt.name); - - add_write_string(_hash, evt.name); - - OTF2_CHECK(OTF2_GlobalDefWriter_WriteLocation(global_def_writer, - evt.id(), // id - _hash, - OTF2_LOCATION_TYPE_ACCELERATOR_STREAM, - 2 * evt.event_count, // # events - agent_handle // location group - )); - } - } - - // Dispatch Events - for(auto& [tid, itr] : agent_dispatch_info) - { - for(auto& [agent_handle, qitr] : itr) - { - for(auto& [queue_id, evt] : qitr) - { - auto _hash = get_hash_id(evt.name); - - add_write_string(_hash, evt.name); - - OTF2_CHECK(OTF2_GlobalDefWriter_WriteLocation(global_def_writer, - evt.id(), // id - _hash, - OTF2_LOCATION_TYPE_ACCELERATOR_STREAM, - 2 * evt.event_count, // # events - agent_handle // location group - )); - } - } - } -} - -} // namespace output -} // namespace rocpd diff --git a/projects/rocprofiler-sdk/source/lib/python/rocpd/source/otf2.hpp b/projects/rocprofiler-sdk/source/lib/python/rocpd/source/otf2.hpp deleted file mode 100644 index cf0ebb0a23..0000000000 --- a/projects/rocprofiler-sdk/source/lib/python/rocpd/source/otf2.hpp +++ /dev/null @@ -1,71 +0,0 @@ -// MIT License -// -// Copyright (c) 2025 Advanced Micro Devices, Inc. All Rights Reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. - -#pragma once - -#include "lib/python/rocpd/source/types.hpp" - -#include "lib/common/defines.hpp" -#include "lib/output/generator.hpp" -#include "lib/output/metadata.hpp" -#include "lib/output/node_info.hpp" -#include "lib/output/output_config.hpp" -#include "lib/output/sql/common.hpp" -#include "lib/output/stream_info.hpp" -#include "lib/rocprofiler-sdk-tool/config.hpp" - -#include - -namespace rocpd -{ -namespace output -{ -namespace tool = rocprofiler::tool; - -struct extended_agent -{ - const rocpd::types::agent& types_agent; - const tool::agent_index agent_index; - const std::string labeled_name; -}; - -struct OTF2Session -{ - OTF2Session(const tool::output_config& output_cfg, uint64_t min_start, uint64_t max_fini); - - ~OTF2Session(); - - const tool::output_config& config; -}; - -void -write_otf2(const OTF2Session& session, - const types::process& process, - const uint16_t tree_node_id, - const std::unordered_map& agent_data, - const tool::generator& thread_gen, - const tool::generator& api_gen, - const tool::generator& kernel_dispatch_gen, - const tool::generator& memory_copy_gen, - const tool::generator& memory_allocation_gen); -} // namespace output -} // namespace rocpd