From 363f85dc72ace00ff6771d005a8b434ef235be6f Mon Sep 17 00:00:00 2001 From: Mythreya Date: Mon, 11 Nov 2024 09:08:47 -0800 Subject: [PATCH] Report page migration events as start/end (#793) * Squashed commit of the following: commit b76f2635f4b65599f03812a73d0cf410f5ada213 Author: Mythreya Date: Fri Apr 26 00:29:09 2024 +0000 Changed for PR feedback commit bedb8ad566ff42fbf117b19202c26c507abcf8ac Author: Jonathan R. Madsen Date: Thu Apr 25 19:20:06 2024 -0500 Fix installation commit a98f8a69459a1450a1be9c98e20b3c1e7f2568c2 Author: Jonathan R. Madsen Date: Thu Apr 25 19:16:35 2024 -0500 Restructure the headers commit 46489a020ffafdd5f4ce3f580469ff233ef67fe1 Author: Mythreya Date: Tue Apr 23 23:31:10 2024 +0000 Update hsa include commit 8e795282cce348fc6aa736b7857b21aeb32aa20a Author: Mythreya Date: Tue Apr 23 23:02:32 2024 +0000 Report page migration events as start/end * Updated tests accordingly * Page migration events are reported independently commit 8784e5ad4895a626a2a8e4ac12f8021b34172bd4 Author: Mythreya Date: Tue Apr 16 17:01:57 2024 +0000 Update handling of dropped page migration events Previously, we dropped all locally buffered events when we detect that KFD has dropped some events. This may drop too many pending events too eagerly. When we receive an end event and cannot find the corresponding start, we can be sure that KFD has dropped some events in the immediate past. When this happens, we look through all locally buffered events and report the start events that are older than 10s as partial events --- they have no "end" information (we expect that the end events have been dropped). We also set the polling timeout to 10s to prevent the local buffer from getting too large with events waiting to be paired up. Updated tests commit 2e8e0b07eeda9b5990e1ae8d28dcd3a035ce38e1 Author: Mythreya Date: Tue Apr 16 17:01:31 2024 +0000 Docs for triggers * Fix page migration sample * Fix hasher, kfd install * Add hsa include * Install KFD include dir * Updates from code review - single timestamp field - node_id -> agent_id - from_node -> from_agent - to_node -> to_agent * Misc revisions * Remove page-migration install target * Update page-migration pytest * Tweak to serialization * Address PR comments * Update page-migration test * Add cli args, update iterations * Address PR comments * Add abi.cpp for static_asserts * Update page_migration gtest with only runtime tests * Moved helpers into utils.hpp --------- Co-authored-by: Jonathan R. Madsen --- samples/api_buffered_tracing/client.cpp | 93 +- source/include/rocprofiler-sdk/CMakeLists.txt | 1 + .../include/rocprofiler-sdk/buffer_tracing.h | 102 +-- source/include/rocprofiler-sdk/cxx/hash.hpp | 1 + .../rocprofiler-sdk/cxx/serialization.hpp | 194 +++-- source/include/rocprofiler-sdk/fwd.h | 46 +- source/include/rocprofiler-sdk/hsa.h | 1 + .../rocprofiler-sdk/hsa/CMakeLists.txt | 1 + .../rocprofiler-sdk/hsa/scratch_memory_id.h | 39 + .../rocprofiler-sdk/kfd/CMakeLists.txt | 11 + .../rocprofiler-sdk/kfd/page_migration_args.h | 104 +++ .../rocprofiler-sdk/kfd/page_migration_id.h | 83 ++ source/include/rocprofiler-sdk/rocprofiler.h | 2 +- .../page_migration/CMakeLists.txt | 2 +- .../rocprofiler-sdk/page_migration/abi.cpp | 106 +++ .../page_migration/defines.hpp | 44 +- .../page_migration/page_migration.cpp | 814 ++++++------------ .../page_migration/page_migration.def.cpp | 55 +- .../rocprofiler-sdk/page_migration/utils.hpp | 165 ++-- .../lib/rocprofiler-sdk/tests/CMakeLists.txt | 18 +- .../rocprofiler-sdk/tests/page_migration.cpp | 135 +-- tests/bin/page-migration/CMakeLists.txt | 5 - tests/bin/page-migration/page-migration.cpp | 100 ++- tests/page-migration/CMakeLists.txt | 2 +- tests/page-migration/conftest.py | 3 +- tests/page-migration/validate.py | 130 ++- tests/tools/json-tool.cpp | 2 +- 27 files changed, 1161 insertions(+), 1098 deletions(-) create mode 100644 source/include/rocprofiler-sdk/hsa/scratch_memory_id.h create mode 100644 source/include/rocprofiler-sdk/kfd/CMakeLists.txt create mode 100644 source/include/rocprofiler-sdk/kfd/page_migration_args.h create mode 100644 source/include/rocprofiler-sdk/kfd/page_migration_id.h create mode 100644 source/lib/rocprofiler-sdk/page_migration/abi.cpp diff --git a/samples/api_buffered_tracing/client.cpp b/samples/api_buffered_tracing/client.cpp index 1bb2e7428d..9dd5b13b00 100644 --- a/samples/api_buffered_tracing/client.cpp +++ b/samples/api_buffered_tracing/client.cpp @@ -86,6 +86,22 @@ rocprofiler_buffer_id_t client_buffer = {}; buffer_name_info client_name_info = {}; kernel_symbol_map_t client_kernels = {}; +template +std::string +as_hex(Tp _v, size_t _width = 16) +{ + uintptr_t _vp = 0; + if constexpr(std::is_pointer::value) + _vp = reinterpret_cast(_v); + else + _vp = _v; + + auto _ss = std::stringstream{}; + _ss.fill('0'); + _ss << "0x" << std::hex << std::setw(_width) << _vp; + return _ss.str(); +} + void print_call_stack(const call_stack_t& _call_stack) { @@ -290,43 +306,71 @@ tool_tracing_callback(rocprofiler_context_id_t context, auto info = std::stringstream{}; info << "kind=" << record->kind << ", operation=" << record->operation - << ", pid=" << record->pid << ", start=" << record->start_timestamp - << ", stop=" << record->end_timestamp - << ", name=" << client_name_info.at(record->kind, record->operation); + << ", pid=" << record->pid << ", timestamp=" << record->timestamp + << ", name=" << client_name_info.at(record->kind, record->operation) + << std::boolalpha; switch(record->operation) { - case ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE: + case ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_START: { - info << ", page_fault=(" << record->page_fault.read_fault << ", " - << record->page_fault.migrated << ", " << record->page_fault.node_id - << ", " << std::hex << "0x" << record->page_fault.address << ")"; + const auto& arg = record->args; + info << ", page_migrate_start=(" << as_hex(arg.page_migrate_start.start_addr) + << ", " << as_hex(arg.page_migrate_start.end_addr) << ", " + << arg.page_migrate_start.from_agent.handle << ", " + << arg.page_migrate_start.to_agent.handle << ", " + << arg.page_migrate_start.prefetch_agent.handle << ", " + << arg.page_migrate_start.preferred_agent.handle << ", " + << arg.page_migrate_start.trigger << ")"; break; } - case ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT: + case ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_END: { - info << ", page_migrate=(" << std::hex << "0x" - << record->page_migrate.start_addr << ", 0x" - << record->page_migrate.end_addr << ", " << std::dec - << record->page_migrate.from_node << ", " << record->page_migrate.to_node - << ", " << record->page_migrate.prefetch_node << ", " - << record->page_migrate.preferred_node << ", " - << record->page_migrate.trigger << ")"; + const auto& arg = record->args; + info << ", page_migrate_end=(" << as_hex(arg.page_migrate_end.start_addr) + << ", " << as_hex(arg.page_migrate_end.end_addr) << ", " + << arg.page_migrate_end.from_agent.handle << ", " + << arg.page_migrate_end.to_agent.handle << ", " + << arg.page_migrate_end.trigger << ")"; break; } - case ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND: + case ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT_START: { - info << ", queue_suspend=(" << record->queue_suspend.rescheduled << ", " - << record->queue_suspend.node_id << ", " << record->queue_suspend.trigger - << ")"; + const auto& arg = record->args; + info << ", page_fault_start=(" << arg.page_fault_start.read_fault << ", " + << arg.page_fault_start.agent_id.handle << ", " + << as_hex(arg.page_fault_start.address) << ")"; + break; + } + case ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT_END: + { + const auto& arg = record->args; + info << ", page_fault_end=(" << arg.page_fault_end.migrated << ", " + << arg.page_fault_end.agent_id.handle << ", " + << as_hex(arg.page_fault_end.address) << ")"; + break; + } + case ROCPROFILER_PAGE_MIGRATION_QUEUE_EVICTION: + { + const auto& arg = record->args; + info << ", queue_eviction=(" << arg.queue_eviction.agent_id.handle << ", " + << arg.queue_eviction.trigger << ")"; + break; + } + case ROCPROFILER_PAGE_MIGRATION_QUEUE_RESTORE: + { + const auto& arg = record->args; + info << ", queue_restore=(" << arg.queue_restore.rescheduled << ", " + << arg.queue_restore.agent_id.handle << ")"; break; } case ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU: { - info << ", unmap_from_gpu=(" << record->unmap_from_gpu.node_id << std::hex - << ", 0x" << record->unmap_from_gpu.start_addr << ", 0x" - << record->unmap_from_gpu.end_addr << ", " << std::dec - << record->unmap_from_gpu.trigger << ")"; + const auto& arg = record->args; + info << ", unmap_from_gpu=(" << as_hex(arg.unmap_from_gpu.start_addr) << ", " + << as_hex(arg.unmap_from_gpu.end_addr) << ", " + << arg.unmap_from_gpu.agent_id.handle << ", " << arg.unmap_from_gpu.trigger + << ")"; break; } case ROCPROFILER_PAGE_MIGRATION_NONE: @@ -337,8 +381,7 @@ tool_tracing_callback(rocprofiler_context_id_t context, } } - if(record->start_timestamp > record->end_timestamp) - throw std::runtime_error("page migration: start > end"); + if(record->timestamp == 0) throw std::runtime_error("page migration: timestamp == 0"); static_cast(user_data)->emplace_back( source_location{__FUNCTION__, __FILE__, __LINE__, kind_name + info.str()}); diff --git a/source/include/rocprofiler-sdk/CMakeLists.txt b/source/include/rocprofiler-sdk/CMakeLists.txt index cbf0d592e1..956a701fed 100644 --- a/source/include/rocprofiler-sdk/CMakeLists.txt +++ b/source/include/rocprofiler-sdk/CMakeLists.txt @@ -46,4 +46,5 @@ add_subdirectory(marker) add_subdirectory(openmp) add_subdirectory(rccl) add_subdirectory(cxx) +add_subdirectory(kfd) add_subdirectory(amd_detail) diff --git a/source/include/rocprofiler-sdk/buffer_tracing.h b/source/include/rocprofiler-sdk/buffer_tracing.h index ae6c67b73a..16b379bb17 100644 --- a/source/include/rocprofiler-sdk/buffer_tracing.h +++ b/source/include/rocprofiler-sdk/buffer_tracing.h @@ -25,6 +25,7 @@ #include #include #include +#include #include @@ -37,49 +38,6 @@ ROCPROFILER_EXTERN_C_INIT * @{ */ -/** - * @brief Page migration triggers - * - */ -typedef enum -{ - ROCPROFILER_PAGE_MIGRATION_TRIGGER_NONE = -1, - ROCPROFILER_PAGE_MIGRATION_TRIGGER_PREFETCH, - ROCPROFILER_PAGE_MIGRATION_TRIGGER_PAGEFAULT_GPU, - ROCPROFILER_PAGE_MIGRATION_TRIGGER_PAGEFAULT_CPU, - ROCPROFILER_PAGE_MIGRATION_TRIGGER_TTM_EVICTION, - ROCPROFILER_PAGE_MIGRATION_TRIGGER_LAST, -} rocprofiler_page_migration_trigger_t; - -/** - * @brief Page migration triggers causing the queue to suspend - * - */ -typedef enum -{ - ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_NONE = -1, - ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_SVM, - ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_USERPTR, - ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_TTM, - ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_SUSPEND, - ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_CRIU_CHECKPOINT, - ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_CRIU_RESTORE, - ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_LAST, -} rocprofiler_page_migration_queue_suspend_trigger_t; - -/** - * @brief Page migration triggers causing an unmap from the GPU - * - */ -typedef enum -{ - ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_NONE = -1, - ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_MMU_NOTIFY, - ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_MMU_NOTIFY_MIGRATE, - ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_UNMAP_FROM_CPU, - ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_LAST, -} rocprofiler_page_migration_unmap_from_gpu_trigger_t; - /** * @brief ROCProfiler Buffer HSA API Tracer Record. */ @@ -265,63 +223,17 @@ typedef struct rocprofiler_buffer_tracing_kernel_dispatch_record_t /// dispatch } rocprofiler_buffer_tracing_kernel_dispatch_record_t; -typedef struct -{ - uint8_t read_fault : 1; ///< Is the fault due to a read or a write - uint8_t migrated : 1; - uint32_t node_id; ///< GPU or CPU node ID which reports a page fault - uint64_t address; ///< Address access that caused the page fault -} rocprofiler_buffer_tracing_page_migration_page_fault_record_t; - -typedef struct -{ - uint64_t start_addr; ///< Start address of the page being migrated - uint64_t end_addr; ///< End address of the page being migrated - uint32_t from_node; ///< Source node - uint32_t to_node; ///< Destination node - uint32_t prefetch_node; ///< Node from which page was prefetched - uint32_t preferred_node; ///< Preferred destinaion node - rocprofiler_page_migration_trigger_t trigger; ///< Cause of migration -} rocprofiler_buffer_tracing_page_migration_page_migrate_record_t; - -typedef struct -{ - uint8_t rescheduled : 1; - uint32_t node_id; ///< GPU node from which the queue was suspended - rocprofiler_page_migration_queue_suspend_trigger_t trigger; ///< Cause of queue suspension -} rocprofiler_buffer_tracing_page_migration_queue_suspend_record_t; - -typedef struct -{ - uint32_t node_id; ///< Node ID from which page was unmapped - uint64_t start_addr; ///< Start address of unmapped page - uint64_t end_addr; ///< End address of unmapped page - rocprofiler_page_migration_unmap_from_gpu_trigger_t trigger; ///< Cause of unmap -} rocprofiler_buffer_tracing_page_migration_unmap_from_gpu_record_t; - /** * @brief ROCProfiler Buffer Page Migration Tracer Record */ typedef struct rocprofiler_buffer_tracing_page_migration_record_t { - uint64_t size; ///< size of this struct - rocprofiler_buffer_tracing_kind_t kind; ///< ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION - rocprofiler_tracing_operation_t operation; - rocprofiler_timestamp_t start_timestamp; ///< start time in nanoseconds - rocprofiler_timestamp_t end_timestamp; ///< end time in nanoseconds - uint32_t pid; - - union - { - rocprofiler_buffer_tracing_page_migration_page_fault_record_t page_fault; - rocprofiler_buffer_tracing_page_migration_page_migrate_record_t page_migrate; - rocprofiler_buffer_tracing_page_migration_queue_suspend_record_t queue_suspend; - rocprofiler_buffer_tracing_page_migration_unmap_from_gpu_record_t unmap_from_gpu; - struct - { - uint64_t reserved[12]; - }; - }; + uint64_t size; ///< size of this struct + rocprofiler_buffer_tracing_kind_t kind; ///< ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION + rocprofiler_page_migration_operation_t operation; + rocprofiler_timestamp_t timestamp; ///< start time in nanoseconds + uint32_t pid; + rocprofiler_page_migration_args_t args; } rocprofiler_buffer_tracing_page_migration_record_t; /** diff --git a/source/include/rocprofiler-sdk/cxx/hash.hpp b/source/include/rocprofiler-sdk/cxx/hash.hpp index 774942585a..f5e5306b47 100644 --- a/source/include/rocprofiler-sdk/cxx/hash.hpp +++ b/source/include/rocprofiler-sdk/cxx/hash.hpp @@ -24,6 +24,7 @@ #pragma once #include +#include #include namespace rocprofiler diff --git a/source/include/rocprofiler-sdk/cxx/serialization.hpp b/source/include/rocprofiler-sdk/cxx/serialization.hpp index a06893e16b..a8cdac371c 100644 --- a/source/include/rocprofiler-sdk/cxx/serialization.hpp +++ b/source/include/rocprofiler-sdk/cxx/serialization.hpp @@ -59,6 +59,7 @@ #include #include +#include #include #define ROCP_SDK_SAVE_DATA_FIELD(FIELD) ar(make_nvp(#FIELD, data.FIELD)) @@ -460,6 +461,122 @@ save(ArchiveT& ar, rocprofiler_buffer_tracing_memory_copy_record_t data) ROCP_SDK_SAVE_DATA_FIELD(bytes); } +template +void +save(ArchiveT& ar, const rocprofiler_page_migration_page_fault_start_t& data) +{ + ROCP_SDK_SAVE_DATA_BITFIELD("read_fault", read_fault); + ROCP_SDK_SAVE_DATA_FIELD(agent_id); + ROCP_SDK_SAVE_DATA_FIELD(address); +} + +template +void +save(ArchiveT& ar, const rocprofiler_page_migration_page_fault_end_t& data) +{ + ROCP_SDK_SAVE_DATA_BITFIELD("migrated", migrated); + ROCP_SDK_SAVE_DATA_FIELD(agent_id); + ROCP_SDK_SAVE_DATA_FIELD(address); +} + +template +void +save(ArchiveT& ar, const rocprofiler_page_migration_page_migrate_start_t& data) +{ + ROCP_SDK_SAVE_DATA_FIELD(start_addr); + ROCP_SDK_SAVE_DATA_FIELD(end_addr); + ROCP_SDK_SAVE_DATA_FIELD(from_agent); + ROCP_SDK_SAVE_DATA_FIELD(to_agent); + ROCP_SDK_SAVE_DATA_FIELD(prefetch_agent); + ROCP_SDK_SAVE_DATA_FIELD(preferred_agent); + ROCP_SDK_SAVE_DATA_FIELD(trigger); +} + +template +void +save(ArchiveT& ar, const rocprofiler_page_migration_page_migrate_end_t& data) +{ + ROCP_SDK_SAVE_DATA_FIELD(start_addr); + ROCP_SDK_SAVE_DATA_FIELD(end_addr); + ROCP_SDK_SAVE_DATA_FIELD(from_agent); + ROCP_SDK_SAVE_DATA_FIELD(to_agent); + ROCP_SDK_SAVE_DATA_FIELD(trigger); +} + +template +void +save(ArchiveT& ar, const rocprofiler_page_migration_queue_eviction_t& data) +{ + ROCP_SDK_SAVE_DATA_FIELD(agent_id); + ROCP_SDK_SAVE_DATA_FIELD(trigger); +} + +template +void +save(ArchiveT& ar, const rocprofiler_page_migration_queue_restore_t& data) +{ + ROCP_SDK_SAVE_DATA_BITFIELD("rescheduled", rescheduled); + ROCP_SDK_SAVE_DATA_FIELD(agent_id); +} + +template +void +save(ArchiveT& ar, const rocprofiler_page_migration_unmap_from_gpu_t& data) +{ + ROCP_SDK_SAVE_DATA_FIELD(start_addr); + ROCP_SDK_SAVE_DATA_FIELD(end_addr); + ROCP_SDK_SAVE_DATA_FIELD(agent_id); + ROCP_SDK_SAVE_DATA_FIELD(trigger); +} + +namespace details +{ +template +struct save_page_migration_arg; + +#define ROCP_SDK_SPECIALIZE_PAGE_MIGRATION_ARG(ENUM_VALUE, UNION_ARG) \ + template <> \ + struct save_page_migration_arg \ + { \ + static constexpr auto value = ROCPROFILER_PAGE_MIGRATION_##ENUM_VALUE; \ + template \ + void operator()(ArchiveT& ar, rocprofiler_page_migration_args_t args) \ + { \ + ar(make_nvp(#UNION_ARG, args.UNION_ARG)); \ + } \ + }; + +ROCP_SDK_SPECIALIZE_PAGE_MIGRATION_ARG(NONE, none) +ROCP_SDK_SPECIALIZE_PAGE_MIGRATION_ARG(PAGE_MIGRATE_START, page_migrate_start) +ROCP_SDK_SPECIALIZE_PAGE_MIGRATION_ARG(PAGE_MIGRATE_END, page_migrate_end) +ROCP_SDK_SPECIALIZE_PAGE_MIGRATION_ARG(PAGE_FAULT_START, page_fault_start) +ROCP_SDK_SPECIALIZE_PAGE_MIGRATION_ARG(PAGE_FAULT_END, page_fault_end) +ROCP_SDK_SPECIALIZE_PAGE_MIGRATION_ARG(QUEUE_EVICTION, queue_eviction) +ROCP_SDK_SPECIALIZE_PAGE_MIGRATION_ARG(QUEUE_RESTORE, queue_restore) +ROCP_SDK_SPECIALIZE_PAGE_MIGRATION_ARG(UNMAP_FROM_GPU, unmap_from_gpu) + +#undef ROCP_SDK_SPECIALIZE_PAGE_MIGRATION_ARG + +template +void +save_page_migration_args(ArchiveT& ar, + rocprofiler_page_migration_operation_t op, + rocprofiler_page_migration_args_t args, + std::index_sequence) +{ + using save_page_migration_type = save_page_migration_arg; + if(op == save_page_migration_type::value) + { + if constexpr(save_page_migration_type::value != ROCPROFILER_PAGE_MIGRATION_NONE) + save_page_migration_type{}(ar, args); + } + else if constexpr(sizeof...(IdxTail) > 0) + { + save_page_migration_args(ar, op, args, std::index_sequence{}); + } +} +} // namespace details + template void save(ArchiveT& ar, const rocprofiler_buffer_tracing_page_migration_record_t& data) @@ -467,81 +584,10 @@ save(ArchiveT& ar, const rocprofiler_buffer_tracing_page_migration_record_t& dat ROCP_SDK_SAVE_DATA_FIELD(size); ROCP_SDK_SAVE_DATA_FIELD(kind); ROCP_SDK_SAVE_DATA_FIELD(operation); - ROCP_SDK_SAVE_DATA_FIELD(start_timestamp); - ROCP_SDK_SAVE_DATA_FIELD(end_timestamp); + ROCP_SDK_SAVE_DATA_FIELD(timestamp); ROCP_SDK_SAVE_DATA_FIELD(pid); - - switch(data.operation) - { - case ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT: - { - ar(make_nvp("page_fault", data.page_fault)); - break; - } - case ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE: - { - ar(make_nvp("page_migrate", data.page_migrate)); - break; - } - case ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND: - { - ar(make_nvp("queue_suspend", data.queue_suspend)); - break; - } - case ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU: - { - ar(make_nvp("unmap_from_gpu", data.unmap_from_gpu)); - break; - } - case ROCPROFILER_PAGE_MIGRATION_NONE: - case ROCPROFILER_PAGE_MIGRATION_LAST: - { - throw std::runtime_error{"unsupported page migration operation type"}; - break; - } - } -} - -template -void -save(ArchiveT& ar, const rocprofiler_buffer_tracing_page_migration_page_fault_record_t& data) -{ - ROCP_SDK_SAVE_DATA_FIELD(node_id); - ROCP_SDK_SAVE_DATA_FIELD(address); - ROCP_SDK_SAVE_DATA_FIELD(read_fault); - ROCP_SDK_SAVE_DATA_FIELD(migrated); -} - -template -void -save(ArchiveT& ar, const rocprofiler_buffer_tracing_page_migration_page_migrate_record_t& data) -{ - ROCP_SDK_SAVE_DATA_FIELD(start_addr); - ROCP_SDK_SAVE_DATA_FIELD(end_addr); - ROCP_SDK_SAVE_DATA_FIELD(from_node); - ROCP_SDK_SAVE_DATA_FIELD(to_node); - ROCP_SDK_SAVE_DATA_FIELD(prefetch_node); - ROCP_SDK_SAVE_DATA_FIELD(preferred_node); - ROCP_SDK_SAVE_DATA_FIELD(trigger); -} - -template -void -save(ArchiveT& ar, const rocprofiler_buffer_tracing_page_migration_queue_suspend_record_t& data) -{ - ROCP_SDK_SAVE_DATA_FIELD(node_id); - ROCP_SDK_SAVE_DATA_FIELD(trigger); - ROCP_SDK_SAVE_DATA_FIELD(rescheduled); -} - -template -void -save(ArchiveT& ar, const rocprofiler_buffer_tracing_page_migration_unmap_from_gpu_record_t& data) -{ - ROCP_SDK_SAVE_DATA_FIELD(node_id); - ROCP_SDK_SAVE_DATA_FIELD(start_addr); - ROCP_SDK_SAVE_DATA_FIELD(end_addr); - ROCP_SDK_SAVE_DATA_FIELD(trigger); + details::save_page_migration_args( + ar, data.operation, data.args, std::make_index_sequence{}); } template diff --git a/source/include/rocprofiler-sdk/fwd.h b/source/include/rocprofiler-sdk/fwd.h index a61f074d7b..1c2297b081 100644 --- a/source/include/rocprofiler-sdk/fwd.h +++ b/source/include/rocprofiler-sdk/fwd.h @@ -24,8 +24,6 @@ #include -#include - #include #include @@ -226,21 +224,6 @@ typedef enum // NOLINT(performance-enum-size) ROCPROFILER_MEMORY_COPY_LAST, } rocprofiler_memory_copy_operation_t; -/** - * @brief Page migration event. - */ -typedef enum // NOLINT(performance-enum-size) -{ - ROCPROFILER_PAGE_MIGRATION_NONE = 0, ///< Unknown event - ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE, - ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT, - ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND, - ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU, - // Any and all events, from all processes. Requires superuser - // ROCPROFILER_PAGE_MIGRATION_ANY_ALL_PROCESSES, - ROCPROFILER_PAGE_MIGRATION_LAST, -} rocprofiler_page_migration_operation_t; - /** * @brief ROCProfiler Kernel Dispatch Tracing Operation Types. */ @@ -321,6 +304,22 @@ typedef enum // NOLINT(performance-enum-size) ROCPROFILER_BUFFER_POLICY_LAST, } rocprofiler_buffer_policy_t; +/** + * @brief Page migration event. + */ +typedef enum // NOLINT(performance-enum-size) +{ + ROCPROFILER_PAGE_MIGRATION_NONE = 0, ///< Unknown event + ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_START, + ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_END, + ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT_START, + ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT_END, + ROCPROFILER_PAGE_MIGRATION_QUEUE_EVICTION, + ROCPROFILER_PAGE_MIGRATION_QUEUE_RESTORE, + ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU, + ROCPROFILER_PAGE_MIGRATION_LAST, +} rocprofiler_page_migration_operation_t; + /** * @brief Scratch event kind */ @@ -333,19 +332,6 @@ typedef enum ROCPROFILER_SCRATCH_MEMORY_LAST, } rocprofiler_scratch_memory_operation_t; -/** - * @brief Allocation flags for @see rocprofiler_buffer_tracing_scratch_memory_record_t - */ -typedef enum -{ - ROCPROFILER_SCRATCH_ALLOC_FLAG_NONE = 0, - ROCPROFILER_SCRATCH_ALLOC_FLAG_USE_ONCE = - HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_USE_ONCE, ///< This scratch allocation is only valid for 1 - ///< dispatch. - ROCPROFILER_SCRATCH_ALLOC_FLAG_ALT = - HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_ALT, ///< Used alternate scratch instead of main scratch -} rocprofiler_scratch_alloc_flag_t; - /** * @brief Enumeration for specifying runtime libraries supported by rocprofiler. This enumeration is * used for thread creation callbacks. @see INTERNAL_THREADING. diff --git a/source/include/rocprofiler-sdk/hsa.h b/source/include/rocprofiler-sdk/hsa.h index df075a6e18..0fda1d75b3 100644 --- a/source/include/rocprofiler-sdk/hsa.h +++ b/source/include/rocprofiler-sdk/hsa.h @@ -39,6 +39,7 @@ #include #include #include +#include #include #if defined(ROCPROFILER_DEFINED_AMD_INTERNAL_BUILD) && ROCPROFILER_DEFINED_AMD_INTERNAL_BUILD > 0 diff --git a/source/include/rocprofiler-sdk/hsa/CMakeLists.txt b/source/include/rocprofiler-sdk/hsa/CMakeLists.txt index 8801e5c22a..f4229b9631 100644 --- a/source/include/rocprofiler-sdk/hsa/CMakeLists.txt +++ b/source/include/rocprofiler-sdk/hsa/CMakeLists.txt @@ -12,6 +12,7 @@ set(ROCPROFILER_HSA_HEADER_FILES finalize_ext_api_id.h image_ext_api_id.h scratch_memory_args.h + scratch_memory_id.h table_id.h) install( diff --git a/source/include/rocprofiler-sdk/hsa/scratch_memory_id.h b/source/include/rocprofiler-sdk/hsa/scratch_memory_id.h new file mode 100644 index 0000000000..ce6fe4de78 --- /dev/null +++ b/source/include/rocprofiler-sdk/hsa/scratch_memory_id.h @@ -0,0 +1,39 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#include + +/** + * @brief Allocation flags for @see rocprofiler_buffer_tracing_scratch_memory_record_t + */ +// NOLINTNEXTLINE(performance-enum-size) +typedef enum +{ + ROCPROFILER_SCRATCH_ALLOC_FLAG_NONE = 0, + ROCPROFILER_SCRATCH_ALLOC_FLAG_USE_ONCE = + HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_USE_ONCE, ///< This scratch allocation is only valid for 1 + ///< dispatch. + ROCPROFILER_SCRATCH_ALLOC_FLAG_ALT = + HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_ALT, ///< Used alternate scratch instead of main scratch +} rocprofiler_scratch_alloc_flag_t; diff --git a/source/include/rocprofiler-sdk/kfd/CMakeLists.txt b/source/include/rocprofiler-sdk/kfd/CMakeLists.txt new file mode 100644 index 0000000000..2a63111257 --- /dev/null +++ b/source/include/rocprofiler-sdk/kfd/CMakeLists.txt @@ -0,0 +1,11 @@ +# +# +# Installation of public KFD headers +# +# +set(ROCPROFILER_KFD_HEADER_FILES page_migration_args.h page_migration_id.h) + +install( + FILES ${ROCPROFILER_KFD_HEADER_FILES} + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/rocprofiler-sdk/kfd + COMPONENT development) diff --git a/source/include/rocprofiler-sdk/kfd/page_migration_args.h b/source/include/rocprofiler-sdk/kfd/page_migration_args.h new file mode 100644 index 0000000000..d1eed33d9f --- /dev/null +++ b/source/include/rocprofiler-sdk/kfd/page_migration_args.h @@ -0,0 +1,104 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#include +#include + +#include + +ROCPROFILER_EXTERN_C_INIT + +typedef struct rocprofiler_page_migration_none_t +{ + char empty; +} rocprofiler_page_migration_none_t; + +typedef struct rocprofiler_page_migration_page_migrate_start_t +{ + uint64_t start_addr; + uint64_t end_addr; + rocprofiler_agent_id_t from_agent; + rocprofiler_agent_id_t to_agent; + rocprofiler_agent_id_t prefetch_agent; + rocprofiler_agent_id_t preferred_agent; + rocprofiler_page_migration_trigger_t trigger; +} rocprofiler_page_migration_page_migrate_start_t; + +typedef struct rocprofiler_page_migration_page_migrate_end_t +{ + uint64_t start_addr; + uint64_t end_addr; + rocprofiler_agent_id_t from_agent; + rocprofiler_agent_id_t to_agent; + rocprofiler_page_migration_trigger_t trigger; +} rocprofiler_page_migration_page_migrate_end_t; + +typedef struct rocprofiler_page_migration_page_fault_start_t +{ + uint32_t read_fault : 1; + rocprofiler_agent_id_t agent_id; + uint64_t address; +} rocprofiler_page_migration_page_fault_start_t; + +typedef struct rocprofiler_page_migration_page_fault_end_t +{ + uint32_t migrated : 1; + rocprofiler_agent_id_t agent_id; + uint64_t address; +} rocprofiler_page_migration_page_fault_end_t; + +typedef struct rocprofiler_page_migration_queue_eviction_t +{ + rocprofiler_agent_id_t agent_id; + rocprofiler_page_migration_queue_suspend_trigger_t trigger; +} rocprofiler_page_migration_queue_eviction_t; + +typedef struct rocprofiler_page_migration_queue_restore_t +{ + uint32_t rescheduled : 1; + rocprofiler_agent_id_t agent_id; +} rocprofiler_page_migration_queue_restore_t; + +typedef struct rocprofiler_page_migration_unmap_from_gpu_t +{ + uint64_t start_addr; + uint64_t end_addr; + rocprofiler_agent_id_t agent_id; + rocprofiler_page_migration_unmap_from_gpu_trigger_t trigger; +} rocprofiler_page_migration_unmap_from_gpu_t; + +typedef union +{ + rocprofiler_page_migration_none_t none; + rocprofiler_page_migration_page_migrate_start_t page_migrate_start; + rocprofiler_page_migration_page_migrate_end_t page_migrate_end; + rocprofiler_page_migration_page_fault_start_t page_fault_start; + rocprofiler_page_migration_page_fault_end_t page_fault_end; + rocprofiler_page_migration_queue_eviction_t queue_eviction; + rocprofiler_page_migration_queue_restore_t queue_restore; + rocprofiler_page_migration_unmap_from_gpu_t unmap_from_gpu; + uint64_t reserved[16]; +} rocprofiler_page_migration_args_t; + +ROCPROFILER_EXTERN_C_FINI diff --git a/source/include/rocprofiler-sdk/kfd/page_migration_id.h b/source/include/rocprofiler-sdk/kfd/page_migration_id.h new file mode 100644 index 0000000000..7f21c01675 --- /dev/null +++ b/source/include/rocprofiler-sdk/kfd/page_migration_id.h @@ -0,0 +1,83 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#include +#include +#include +#include +#include + +#include + +ROCPROFILER_EXTERN_C_INIT + +/** + * @brief Page migration triggers + * + */ +typedef enum +{ + ROCPROFILER_PAGE_MIGRATION_TRIGGER_NONE = -1, + ROCPROFILER_PAGE_MIGRATION_TRIGGER_PREFETCH, ///< Migration triggered by a prefetch + ROCPROFILER_PAGE_MIGRATION_TRIGGER_PAGEFAULT_GPU, ///< Triggered by a page fault on the GPU + ROCPROFILER_PAGE_MIGRATION_TRIGGER_PAGEFAULT_CPU, ///< Triggered by a page fault on the CPU + ROCPROFILER_PAGE_MIGRATION_TRIGGER_TTM_EVICTION, ///< Page evicted by linux TTM (Translation + ///< Table Manager) + ROCPROFILER_PAGE_MIGRATION_TRIGGER_LAST, +} rocprofiler_page_migration_trigger_t; + +/** + * @brief Page migration triggers causing the queue to suspend + * + */ +typedef enum +{ + ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_NONE = -1, + ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_SVM, + ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_USERPTR, + ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_TTM, ///< Queue suspended by TTM (Translation + ///< Table Manager) operation + ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_SUSPEND, + ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_CRIU_CHECKPOINT, ///< Queues evicted due to + ///< process save + ///< (checkpoint) by CRIU + ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_CRIU_RESTORE, ///< Queues restored during + ///< process restore by CRIU + ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_LAST, +} rocprofiler_page_migration_queue_suspend_trigger_t; + +/** + * @brief Page migration triggers causing an unmap from the GPU + * + */ +typedef enum +{ + ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_NONE = -1, + ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_MMU_NOTIFY, + ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_MMU_NOTIFY_MIGRATE, + ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_UNMAP_FROM_CPU, + ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_LAST, +} rocprofiler_page_migration_unmap_from_gpu_trigger_t; + +ROCPROFILER_EXTERN_C_FINI diff --git a/source/include/rocprofiler-sdk/rocprofiler.h b/source/include/rocprofiler-sdk/rocprofiler.h index 1681cef966..e129b92f12 100644 --- a/source/include/rocprofiler-sdk/rocprofiler.h +++ b/source/include/rocprofiler-sdk/rocprofiler.h @@ -77,7 +77,7 @@ ROCPROFILER_EXTERN_C_FINI #include "rocprofiler-sdk/hsa.h" #include "rocprofiler-sdk/intercept_table.h" #include "rocprofiler-sdk/internal_threading.h" -// #include "rocprofiler-sdk/marker.h" +#include "rocprofiler-sdk/marker.h" #include "rocprofiler-sdk/pc_sampling.h" #include "rocprofiler-sdk/profile_config.h" // #include "rocprofiler-sdk/spm.h" diff --git a/source/lib/rocprofiler-sdk/page_migration/CMakeLists.txt b/source/lib/rocprofiler-sdk/page_migration/CMakeLists.txt index f72dd12e2a..a789aeac64 100644 --- a/source/lib/rocprofiler-sdk/page_migration/CMakeLists.txt +++ b/source/lib/rocprofiler-sdk/page_migration/CMakeLists.txt @@ -1,6 +1,6 @@ # # -set(ROCPROFILER_LIB_UVM_SOURCES page_migration.cpp) +set(ROCPROFILER_LIB_UVM_SOURCES abi.cpp page_migration.cpp) set(ROCPROFILER_LIB_UVM_HEADERS defines.hpp page_migration.hpp utils.hpp) target_sources(rocprofiler-sdk-object-library PRIVATE ${ROCPROFILER_LIB_UVM_SOURCES} diff --git a/source/lib/rocprofiler-sdk/page_migration/abi.cpp b/source/lib/rocprofiler-sdk/page_migration/abi.cpp new file mode 100644 index 0000000000..6293474040 --- /dev/null +++ b/source/lib/rocprofiler-sdk/page_migration/abi.cpp @@ -0,0 +1,106 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include + +#include "lib/common/container/small_vector.hpp" +#include "lib/common/defines.hpp" +#include "lib/common/mpl.hpp" +#include "lib/rocprofiler-sdk/details/kfd_ioctl.h" +#include "lib/rocprofiler-sdk/page_migration/utils.hpp" + +#define ASSERT_SAME(A, B) static_assert(static_cast(A) == static_cast(B)) + +#define ROCPROFILER_LIB_ROCPROFILER_SDK_PAGE_MIGRATION_PAGE_MIGRATION_CPP_IMPL 1 +#include "lib/rocprofiler-sdk/page_migration/page_migration.def.cpp" +#undef ROCPROFILER_LIB_ROCPROFILER_SDK_PAGE_MIGRATION_PAGE_MIGRATION_CPP_IMPL + +namespace rocprofiler +{ +namespace page_migration +{ +using namespace rocprofiler::page_migration; +using namespace rocprofiler::common::container; +using rocprofiler_page_migration_seq_t = std::make_index_sequence; + +static_assert(KFD_SMI_EVENT_NONE == 0); +static_assert(KFD_SMI_EVENT_MIGRATE_START == 5); +static_assert(KFD_SMI_EVENT_MIGRATE_END == 6); +static_assert(KFD_SMI_EVENT_PAGE_FAULT_START == 7); +static_assert(KFD_SMI_EVENT_PAGE_FAULT_END == 8); +static_assert(KFD_SMI_EVENT_QUEUE_EVICTION == 9); +static_assert(KFD_SMI_EVENT_QUEUE_RESTORE == 10); +static_assert(KFD_SMI_EVENT_UNMAP_FROM_GPU == 11); +static_assert(KFD_SMI_EVENT_ALL_PROCESS == 64); + +// Update page_migration.def.cpp with event mappings +// Update page_migration.cpp to parse and report new event +static_assert(ROCPROFILER_PAGE_MIGRATION_LAST == 8, + "New event added, update KFD to ROCPROFILER mappings"); + +// clang-format off +ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_TRIGGER_PAGEFAULT_GPU, KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU ); +ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_TRIGGER_PAGEFAULT_CPU, KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU ); +ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_TRIGGER_TTM_EVICTION, KFD_MIGRATE_TRIGGER_TTM_EVICTION ); +ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_SVM, KFD_QUEUE_EVICTION_TRIGGER_SVM ); +ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_USERPTR, KFD_QUEUE_EVICTION_TRIGGER_USERPTR ); +ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_TTM, KFD_QUEUE_EVICTION_TRIGGER_TTM ); +ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_SUSPEND, KFD_QUEUE_EVICTION_TRIGGER_SUSPEND ); +ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_CRIU_CHECKPOINT, KFD_QUEUE_EVICTION_CRIU_CHECKPOINT ); +ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_CRIU_RESTORE, KFD_QUEUE_EVICTION_CRIU_RESTORE ); +ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_MMU_NOTIFY, KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY ); +ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_MMU_NOTIFY_MIGRATE, KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY_MIGRATE); +ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_UNMAP_FROM_CPU, KFD_SVM_UNMAP_TRIGGER_UNMAP_FROM_CPU ); +// clang-format on + +static_assert(kfd_bitmask(std::index_sequence()) == + (KFD_SMI_EVENT_MASK_FROM_INDEX(KFD_SMI_EVENT_PAGE_FAULT_START) | + KFD_SMI_EVENT_MASK_FROM_INDEX(KFD_SMI_EVENT_MIGRATE_END) | + KFD_SMI_EVENT_MASK_FROM_INDEX(KFD_SMI_EVENT_UNMAP_FROM_GPU))); + +static_assert((page_migration_info::kfd_bitmask | + page_migration_info::kfd_bitmask | + page_migration_info::kfd_bitmask) == + (KFD_SMI_EVENT_MASK_FROM_INDEX(KFD_SMI_EVENT_MIGRATE_END) | + KFD_SMI_EVENT_MASK_FROM_INDEX(KFD_SMI_EVENT_QUEUE_EVICTION) | + KFD_SMI_EVENT_MASK_FROM_INDEX(KFD_SMI_EVENT_UNMAP_FROM_GPU))); + +static_assert(kfd_to_rocprof_op(KFD_SMI_EVENT_MIGRATE_START, rocprofiler_page_migration_seq_t{}) == + ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_START); +static_assert(kfd_to_rocprof_op(KFD_SMI_EVENT_MIGRATE_END, rocprofiler_page_migration_seq_t{}) == + ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_END); +static_assert(kfd_to_rocprof_op(KFD_SMI_EVENT_PAGE_FAULT_START, + rocprofiler_page_migration_seq_t{}) == + ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT_START); +static_assert(kfd_to_rocprof_op(KFD_SMI_EVENT_PAGE_FAULT_END, rocprofiler_page_migration_seq_t{}) == + ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT_END); +static_assert(kfd_to_rocprof_op(KFD_SMI_EVENT_QUEUE_EVICTION, rocprofiler_page_migration_seq_t{}) == + ROCPROFILER_PAGE_MIGRATION_QUEUE_EVICTION); +static_assert(kfd_to_rocprof_op(KFD_SMI_EVENT_QUEUE_RESTORE, rocprofiler_page_migration_seq_t{}) == + ROCPROFILER_PAGE_MIGRATION_QUEUE_RESTORE); +static_assert(kfd_to_rocprof_op(KFD_SMI_EVENT_UNMAP_FROM_GPU, rocprofiler_page_migration_seq_t{}) == + ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU); + +} // namespace page_migration +} // namespace rocprofiler diff --git a/source/lib/rocprofiler-sdk/page_migration/defines.hpp b/source/lib/rocprofiler-sdk/page_migration/defines.hpp index 28efe6c5cc..6719e0d68c 100644 --- a/source/lib/rocprofiler-sdk/page_migration/defines.hpp +++ b/source/lib/rocprofiler-sdk/page_migration/defines.hpp @@ -44,43 +44,17 @@ } \ } while(0) -#define APPEND_UVM_EVENT(X) ROCPROFILER_UVM_EVENT_##X -#define APPEND_1(X) APPEND_UVM_EVENT(X) -#define CONCAT(X, Y) X##Y -#define APPEND_2(A1, A2) APPEND_1(A1), APPEND_1(A2) -#define APPEND_3(A1, A2, A3) APPEND_2(A1, A2), APPEND_1(A3) -#define APPEND_4(A1, A2, A3, A4) APPEND_3(A1, A2, A3), APPEND_1(A4) -#define APPEND_5(A1, A2, A3, A4, A5) APPEND_4(A1, A2, A3, A4), APPEND_1(A5) - -#define MACRO_N(MACRO, N, ...) CONCAT(MACRO, N)(__VA_ARGS__) -#define APPLY_N(MACRO, ...) MACRO_N(MACRO, IMPL_DETAIL_FOR_EACH_NARG(__VA_ARGS__), __VA_ARGS__) - -#define GET_UVM_ENUMS(...) APPLY_N(APPEND_, __VA_ARGS__) - -// static constexpr size_t uvm_event = UVM_ENUM; -#define SPECIALIZE_UVM_KFD_EVENT(UVM_ENUM, KFD_ENUM, FORMAT_STRING) \ - template <> \ - struct uvm_event_info \ - { \ - static constexpr size_t kfd_event = KFD_ENUM; \ - static constexpr std::string_view format_str{FORMAT_STRING}; \ +#define SPECIALIZE_PAGE_MIGRATION_INFO(ROCPROF_NAME, KFD_NAME, FORMAT_STRING) \ + template <> \ + struct page_migration_info \ + { \ + static constexpr auto name = "PAGE_MIGRATION_" #ROCPROF_NAME; \ + static constexpr size_t operation = ROCPROFILER_PAGE_MIGRATION_##ROCPROF_NAME; \ + static constexpr size_t kfd_operation = KFD_SMI_EVENT_##KFD_NAME; \ + static constexpr size_t kfd_bitmask = bitmask(KFD_SMI_EVENT_##KFD_NAME); \ + static constexpr std::string_view format_str = FORMAT_STRING; \ }; -#define SPECIALIZE_PAGE_MIGRATION_INFO(TYPE, ...) \ - template <> \ - struct page_migration_info \ - { \ - static constexpr auto operation_idx = ROCPROFILER_PAGE_MIGRATION_##TYPE; \ - static constexpr auto name = "PAGE_MIGRATION_" #TYPE; \ - static constexpr size_t uvm_bitmask = \ - bitmask(std::index_sequence()); \ - static constexpr size_t kfd_bitmask = \ - to_kfd_bitmask(std::index_sequence()); \ - } - -#define COPY_FROM_START_1(MEMBER) end.MEMBER = start.MEMBER; -#define COPY_FROM_START_2(UNION_TYPE, MEMBER) end.UNION_TYPE.MEMBER = start.UNION_TYPE.MEMBER; - #define SPECIALIZE_KFD_IOC_IOCTL(STRUCT, ARG_IOC) \ template <> \ struct IOC_event \ diff --git a/source/lib/rocprofiler-sdk/page_migration/page_migration.cpp b/source/lib/rocprofiler-sdk/page_migration/page_migration.cpp index 07d2fe39f4..50cb702b25 100644 --- a/source/lib/rocprofiler-sdk/page_migration/page_migration.cpp +++ b/source/lib/rocprofiler-sdk/page_migration/page_migration.cpp @@ -21,9 +21,9 @@ // THE SOFTWARE. #include "lib/rocprofiler-sdk/page_migration/page_migration.hpp" +#include "lib/common/logging.hpp" #include "lib/common/mpl.hpp" #include "lib/common/static_object.hpp" -#include "lib/common/utility.hpp" #include "lib/rocprofiler-sdk/agent.hpp" #include "lib/rocprofiler-sdk/buffer.hpp" #include "lib/rocprofiler-sdk/context/context.hpp" @@ -44,14 +44,15 @@ #include #include #include -#include #include +#include #include #include #include #include #include #include +#include #include #include #include @@ -76,25 +77,14 @@ namespace page_migration template using small_vector = common::container::small_vector; -using context_t = context::context; -using context_array_t = common::container::small_vector; -using kfd_event_id_t = decltype(KFD_SMI_EVENT_NONE); -using page_migration_record_t = rocprofiler_buffer_tracing_page_migration_record_t; -using migrate_trigger_t = rocprofiler_page_migration_trigger_t; -using qsuspend_trigger_t = rocprofiler_page_migration_queue_suspend_trigger_t; -using unmap_trigger_t = rocprofiler_page_migration_unmap_from_gpu_trigger_t; +using context_t = context::context; +using context_array_t = common::container::small_vector; -// Parsing and utilities -namespace -{ -using namespace page_migration; +template +struct page_migration_info; -constexpr auto -page_to_bytes(size_t val) -{ - // each page is 4KB = 4096 bytes - return val << 12; -} +template +struct kfd_event_info; template struct page_migration_enum_info; @@ -102,122 +92,88 @@ struct page_migration_enum_info; template struct page_migration_bounds; -#define SPECIALIZE_PM_ENUM_INFO(TYPE, TRIGGER_CATEGORY, NAME) \ - template <> \ - struct page_migration_enum_info \ - { \ - static constexpr auto name = #NAME; \ - }; - -#define SPECIALIZE_PM_ENUM_BOUNDS(TYPE, TRIGGER_CATEGORY) \ - template <> \ - struct page_migration_bounds \ - { \ - static constexpr auto last = ROCPROFILER_PAGE_MIGRATION_##TRIGGER_CATEGORY##_LAST; \ - }; - -using queue_suspend_trigger_t = rocprofiler_page_migration_queue_suspend_trigger_t; -using unmap_from_gpu_trigger_t = rocprofiler_page_migration_unmap_from_gpu_trigger_t; - -SPECIALIZE_PM_ENUM_BOUNDS(rocprofiler_page_migration_trigger_t, TRIGGER) -SPECIALIZE_PM_ENUM_BOUNDS(queue_suspend_trigger_t, QUEUE_SUSPEND_TRIGGER) -SPECIALIZE_PM_ENUM_BOUNDS(unmap_from_gpu_trigger_t, UNMAP_FROM_GPU_TRIGGER) - -SPECIALIZE_PM_ENUM_INFO(rocprofiler_page_migration_trigger_t, TRIGGER, PREFETCH) -SPECIALIZE_PM_ENUM_INFO(rocprofiler_page_migration_trigger_t, TRIGGER, PAGEFAULT_GPU) -SPECIALIZE_PM_ENUM_INFO(rocprofiler_page_migration_trigger_t, TRIGGER, PAGEFAULT_CPU) -SPECIALIZE_PM_ENUM_INFO(rocprofiler_page_migration_trigger_t, TRIGGER, TTM_EVICTION) - -SPECIALIZE_PM_ENUM_INFO(queue_suspend_trigger_t, QUEUE_SUSPEND_TRIGGER, SVM) -SPECIALIZE_PM_ENUM_INFO(queue_suspend_trigger_t, QUEUE_SUSPEND_TRIGGER, USERPTR) -SPECIALIZE_PM_ENUM_INFO(queue_suspend_trigger_t, QUEUE_SUSPEND_TRIGGER, TTM) -SPECIALIZE_PM_ENUM_INFO(queue_suspend_trigger_t, QUEUE_SUSPEND_TRIGGER, SUSPEND) -SPECIALIZE_PM_ENUM_INFO(queue_suspend_trigger_t, QUEUE_SUSPEND_TRIGGER, CRIU_CHECKPOINT) -SPECIALIZE_PM_ENUM_INFO(queue_suspend_trigger_t, QUEUE_SUSPEND_TRIGGER, CRIU_RESTORE) - -SPECIALIZE_PM_ENUM_INFO(unmap_from_gpu_trigger_t, UNMAP_FROM_GPU_TRIGGER, MMU_NOTIFY) -SPECIALIZE_PM_ENUM_INFO(unmap_from_gpu_trigger_t, UNMAP_FROM_GPU_TRIGGER, MMU_NOTIFY_MIGRATE) -SPECIALIZE_PM_ENUM_INFO(unmap_from_gpu_trigger_t, UNMAP_FROM_GPU_TRIGGER, UNMAP_FROM_CPU) - -using trigger_type_list_t = common::mpl::type_list; - -template -std::string_view -to_string_impl(EnumT val, std::index_sequence) +// Parsing and utilities +namespace { - if(val == Idx) return page_migration_enum_info::name; - if constexpr(sizeof...(IdxTail) > 0) - return to_string_impl(val, std::index_sequence{}); - else - return std::string_view{}; -} - -template -std::string_view -to_string(EnumT val, - std::enable_if_t::value && - common::mpl::is_one_of::value, - int> = 0) +constexpr auto +page_to_bytes(size_t val) { - constexpr auto last = page_migration_bounds::last; - return to_string_impl(val, std::make_index_sequence{}); + // each page is 4KB = 4096 bytes + return val << 12; } template -page_migration_record_t parse_uvm_event(std::string_view) +page_migration_record_t parse_event(std::string_view) { - ROCP_FATAL_IF(false) << uvm_event_info::format_str; + ROCP_FATAL_IF(false) << page_migration_info::format_str; return {}; } +auto +get_node_agent_id(uint32_t _node_id) +{ + using agent_id_map_t = std::unordered_map; + static auto*& _data = static_object::construct([]() { + auto _v = std::unordered_map{}; + for(const auto* agent : agent::get_agents()) + _v.emplace(agent->gpu_id, agent->id); + return _v; + }()); + + CHECK(_data != nullptr); + ROCP_FATAL_IF(_data->count(_node_id) == 0) << "page_migration: unknown node id: " << _node_id; + return _data->at(_node_id); +} + template <> page_migration_record_t -parse_uvm_event(std::string_view str) +parse_event(std::string_view str) { - page_migration_record_t rec{}; - auto& e = rec.page_fault; - uint32_t kind{}; + auto rec = page_migration_record_t{}; + auto& e = rec.args.page_fault_start; + uint32_t kind{}; + uint32_t _node_id = 0; char fault; std::sscanf(str.data(), - uvm_event_info::format_str.data(), + page_migration_info::format_str.data(), &kind, - &rec.start_timestamp, + &rec.timestamp, &rec.pid, &e.address, - &e.node_id, + &_node_id, &fault); e.read_fault = (fault == 'R'); e.address = page_to_bytes(e.address); + e.agent_id = get_node_agent_id(_node_id); - ROCP_INFO << fmt::format("Page fault start [ ts: {} pid: {} addr: 0x{:X} node: {} ] \n", - rec.start_timestamp, - rec.pid, - e.address, - e.node_id); + ROCP_TRACE << fmt::format("Page fault start [ ts: {} pid: {} addr: 0x{:X} node: {} ] \n", + rec.timestamp, + rec.pid, + e.address, + e.agent_id.handle); return rec; } template <> page_migration_record_t -parse_uvm_event(std::string_view str) +parse_event(std::string_view str) { - page_migration_record_t rec{}; - auto& e = rec.page_fault; - uint32_t kind{}; + auto rec = page_migration_record_t{}; + auto& e = rec.args.page_fault_end; + uint32_t kind{}; + uint32_t _node_id = 0; char migrated; std::sscanf(str.data(), - uvm_event_info::format_str.data(), + page_migration_info::format_str.data(), &kind, - &rec.end_timestamp, + &rec.timestamp, &rec.pid, &e.address, - &e.node_id, + &_node_id, &migrated); // M or U -> migrated / unmigrated? @@ -225,16 +181,18 @@ parse_uvm_event(std::string_view str) e.migrated = true; else if(migrated == 'U') e.migrated = false; - // else - // throw std::runtime_error("Invalid SVM memory migrate type"); - e.address = page_to_bytes(e.address); + else + ROCP_WARNING << "Unknown PAGE_FAULT_END migrated/unmigrated state"; - ROCP_INFO << fmt::format( + e.address = page_to_bytes(e.address); + e.agent_id = get_node_agent_id(_node_id); + + ROCP_TRACE << fmt::format( "Page fault end [ ts: {} pid: {} addr: 0x{:X} node: {} migrated: {} ] \n", - rec.end_timestamp, + rec.timestamp, rec.pid, e.address, - e.node_id, + e.agent_id.handle, migrated); return rec; @@ -242,507 +200,310 @@ parse_uvm_event(std::string_view str) template <> page_migration_record_t -parse_uvm_event(std::string_view str) +parse_event(std::string_view str) { - page_migration_record_t rec{}; - auto& e = rec.page_migrate; - uint32_t kind{}; - uint32_t trigger{}; + auto rec = page_migration_record_t{}; + auto& e = rec.args.page_migrate_start; + uint32_t kind{}; + uint32_t trigger{}; + uint32_t _from_node = 0; + uint32_t _to_node = 0; + uint32_t _prefetch_node = 0; + uint32_t _preferred_node = 0; - std::sscanf(str.data(), - uvm_event_info::format_str.data(), - &kind, - &rec.start_timestamp, - &rec.pid, - &e.start_addr, - &e.end_addr, - &e.from_node, - &e.to_node, - &e.prefetch_node, - &e.preferred_node, - &trigger); + std::sscanf( + str.data(), + page_migration_info::format_str.data(), + &kind, + &rec.timestamp, + &rec.pid, + &e.start_addr, + &e.end_addr, + &_from_node, + &_to_node, + &_prefetch_node, + &_preferred_node, + &trigger); e.end_addr += e.start_addr; - e.trigger = static_cast(trigger); - e.start_addr = page_to_bytes(e.start_addr); - e.end_addr = page_to_bytes(e.end_addr) - 1; + e.trigger = static_cast(trigger); + e.start_addr = page_to_bytes(e.start_addr); + e.end_addr = page_to_bytes(e.end_addr) - 1; + e.from_agent = get_node_agent_id(_from_node); + e.to_agent = get_node_agent_id(_to_node); + e.prefetch_agent = get_node_agent_id(_prefetch_node); + e.preferred_agent = get_node_agent_id(_preferred_node); - ROCP_INFO << fmt::format( + ROCP_TRACE << fmt::format( "Page migrate start [ ts: {} pid: {} addr s: 0x{:X} addr " "e: 0x{:X} size: {}B from node: {} to node: {} prefetch node: {} preferred node: {} " "trigger: {} ] \n", - rec.start_timestamp, + rec.timestamp, rec.pid, e.start_addr, e.end_addr, (e.end_addr - e.start_addr), - e.from_node, - e.to_node, - e.prefetch_node, - e.preferred_node, - to_string(e.trigger)); + e.from_agent.handle, + e.to_agent.handle, + e.prefetch_agent.handle, + e.preferred_agent.handle, + trigger); return rec; } template <> page_migration_record_t -parse_uvm_event(std::string_view str) +parse_event(std::string_view str) { - page_migration_record_t rec{}; - auto& e = rec.page_migrate; - uint32_t kind{}; - uint32_t trigger{}; + auto rec = page_migration_record_t{}; + auto& e = rec.args.page_migrate_end; + uint32_t kind{}; + uint32_t trigger{}; + uint32_t _from_node = 0; + uint32_t _to_node = 0; std::sscanf(str.data(), - uvm_event_info::format_str.data(), + page_migration_info::format_str.data(), &kind, - &rec.end_timestamp, + &rec.timestamp, &rec.pid, &e.start_addr, &e.end_addr, - &e.from_node, - &e.to_node, + &_from_node, + &_to_node, &trigger); e.end_addr += e.start_addr; e.trigger = static_cast(trigger); e.start_addr = page_to_bytes(e.start_addr); e.end_addr = page_to_bytes(e.end_addr) - 1; + e.from_agent = get_node_agent_id(_from_node); + e.to_agent = get_node_agent_id(_to_node); - ROCP_INFO << fmt::format("Page migrate end [ ts: {} pid: {} addr s: 0x{:X} addr e: " - "0x{:X} from node: {} to node: {} trigger: {} ] \n", - rec.end_timestamp, - rec.pid, - e.start_addr, - e.end_addr, - e.from_node, - e.to_node, - to_string(e.trigger)); + ROCP_TRACE << fmt::format("Page migrate end [ ts: {} pid: {} addr s: 0x{:X} addr e: " + "0x{:X} from node: {} to node: {} trigger: {} ] \n", + rec.timestamp, + rec.pid, + e.start_addr, + e.end_addr, + e.from_agent.handle, + e.to_agent.handle, + trigger); return rec; } template <> page_migration_record_t -parse_uvm_event(std::string_view str) +parse_event(std::string_view str) { - page_migration_record_t rec{}; - auto& e = rec.queue_suspend; - uint32_t kind{}; - uint32_t trigger{}; + auto rec = page_migration_record_t{}; + auto& e = rec.args.queue_eviction; + uint32_t kind{}; + uint32_t trigger{}; + uint32_t _node_id = 0; std::sscanf(str.data(), - uvm_event_info::format_str.data(), + page_migration_info::format_str.data(), &kind, - &rec.start_timestamp, + &rec.timestamp, &rec.pid, - &e.node_id, + &_node_id, &trigger); - rec.queue_suspend.trigger = static_cast(trigger); + e.trigger = static_cast(trigger); + e.agent_id = get_node_agent_id(_node_id); - ROCP_INFO << fmt::format("Queue evict [ ts: {} pid: {} node: {} trigger: {} ] \n", - rec.start_timestamp, - rec.pid, - e.node_id, - to_string(e.trigger)); + ROCP_TRACE << fmt::format("Queue evict [ ts: {} pid: {} node: {} trigger: {} ] \n", + rec.timestamp, + rec.pid, + e.agent_id.handle, + trigger); return rec; } template <> page_migration_record_t -parse_uvm_event(std::string_view str) +parse_event(std::string_view str) { - page_migration_record_t rec{}; - auto& e = rec.queue_suspend; - uint32_t kind{}; + auto rec = page_migration_record_t{}; + auto& e = rec.args.queue_restore; + uint32_t kind{}; + uint32_t _node_id = 0; std::sscanf(str.data(), - uvm_event_info::format_str.data(), + page_migration_info::format_str.data(), &kind, - &rec.end_timestamp, + &rec.timestamp, &rec.pid, - &e.node_id); + &_node_id); // check if we have a valid char at the end. -1 has \0 if(str[str.size() - 2] == 'R') e.rescheduled = true; else e.rescheduled = false; + e.agent_id = get_node_agent_id(_node_id); - ROCP_INFO << fmt::format( - "Queue restore [ ts: {} pid: {} node: {} ] \n", rec.end_timestamp, rec.pid, e.node_id); + ROCP_TRACE << fmt::format( + "Queue restore [ ts: {} pid: {} node: {} ] \n", rec.timestamp, rec.pid, e.agent_id.handle); return rec; } template <> page_migration_record_t -parse_uvm_event(std::string_view str) +parse_event(std::string_view str) { - page_migration_record_t rec{}; - auto& e = rec.unmap_from_gpu; - uint32_t kind{}; - uint32_t trigger{}; + auto rec = page_migration_record_t{}; + auto& e = rec.args.unmap_from_gpu; + uint32_t kind{}; + uint32_t trigger{}; + uint32_t _node_id = 0; std::sscanf(str.data(), - uvm_event_info::format_str.data(), + page_migration_info::format_str.data(), &kind, - &rec.start_timestamp, + &rec.timestamp, &rec.pid, &e.start_addr, &e.end_addr, - &e.node_id, + &_node_id, &trigger); e.end_addr += e.start_addr; - rec.end_timestamp = rec.start_timestamp; - rec.unmap_from_gpu.trigger = static_cast(trigger); - e.start_addr = page_to_bytes(e.start_addr); - e.end_addr = page_to_bytes(e.end_addr); + e.trigger = static_cast(trigger); + e.start_addr = page_to_bytes(e.start_addr); + e.end_addr = page_to_bytes(e.end_addr); + e.agent_id = get_node_agent_id(_node_id); - ROCP_INFO << fmt::format("Unmap from GPU [ ts: {} pid: {} start addr: 0x{:X} end addr: 0x{:X} " - "node: {} trigger {} ] \n", - rec.start_timestamp, - rec.pid, - e.start_addr, - e.end_addr, - e.node_id, - to_string(e.trigger)); + ROCP_TRACE << fmt::format( + "Unmap from GPU [ ts: {} pid: {} start addr: 0x{:X} end addr: 0x{:X} " + "node: {} trigger {} ] \n", + rec.timestamp, + rec.pid, + e.start_addr, + e.end_addr, + e.agent_id.handle, + trigger); return rec; } +template <> +page_migration_record_t parse_event(std::string_view) +{ + throw std::runtime_error( + "ROCPROFILER_PAGE_MIGRATION_NONE for parsing page migration events should not happen"); +} + template page_migration_record_t -parse_uvm_event(uvm_event_id_t event_id, - std::string_view strn, - std::index_sequence) +parse_event(size_t event_id, std::string_view strn, std::index_sequence) { if(OpInx == static_cast(event_id)) { - auto rec = parse_uvm_event(strn); + auto rec = parse_event(strn); rec.size = sizeof(page_migration_record_t); rec.kind = ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION; - rec.operation = to_rocprof_op(OpInx); + rec.operation = static_cast(OpInx); return rec; } - else if constexpr(sizeof...(OpInxs) > 0) - return parse_uvm_event(event_id, strn, std::index_sequence{}); - else - return page_migration_record_t{}; + + if constexpr(sizeof...(OpInxs) > 0) + return parse_event(event_id, strn, std::index_sequence{}); + + return page_migration_record_t{}; } /* -----------------------------------------------------------------------------------*/ -template -void -update_end(const page_migration_record_t& start, page_migration_record_t& end); - -template <> -void -update_end(const page_migration_record_t& start, - page_migration_record_t& end) -{ - CHECK(start.pid == end.pid); - CHECK(start.page_fault.address == end.page_fault.address); - CHECK(start.page_fault.node_id == end.page_fault.node_id); - COPY_FROM_START_1(start_timestamp); - COPY_FROM_START_2(page_fault, migrated); -} - -template <> -void -update_end(const page_migration_record_t& start, - page_migration_record_t& end) -{ - CHECK(start.pid == end.pid); - CHECK(start.page_migrate.start_addr == end.page_migrate.start_addr); - CHECK(start.page_migrate.end_addr == end.page_migrate.end_addr); - CHECK(start.page_migrate.from_node == end.page_migrate.from_node); - CHECK(start.page_migrate.to_node == end.page_migrate.to_node); - CHECK(start.page_migrate.trigger == end.page_migrate.trigger); - COPY_FROM_START_1(start_timestamp); - COPY_FROM_START_2(page_migrate, prefetch_node); - COPY_FROM_START_2(page_migrate, preferred_node); -} - -template <> -void -update_end(const page_migration_record_t& start, - page_migration_record_t& end) -{ - CHECK(start.pid == end.pid); - CHECK(start.queue_suspend.node_id == end.queue_suspend.node_id); - COPY_FROM_START_1(start_timestamp); - COPY_FROM_START_2(queue_suspend, trigger); -} - -/* -----------------------------------------------------------------------------------*/ - -template -uint64_t -get_key(const rocprofiler_buffer_tracing_page_migration_record_t& rec) = delete; - -template <> -uint64_t -get_key( - const rocprofiler_buffer_tracing_page_migration_record_t& rec) -{ - // page migrate, use address as identifier - return rec.page_migrate.start_addr; -} - -template <> -uint64_t -get_key( - const rocprofiler_buffer_tracing_page_migration_record_t& rec) -{ - // page fault, use address as identifier - return rec.page_fault.address; -} - -template <> -uint64_t -get_key( - const rocprofiler_buffer_tracing_page_migration_record_t& rec) -{ - // Queue suspend/evict. Node ID and pid are sufficient as in kfd, - // eviction is reference-counted per process-device. - uint64_t node_id = rec.queue_suspend.node_id; - return (node_id << 32) | rec.pid; -} - -/* -----------------------------------------------------------------------------------*/ - -template <> -page_migration_record_t parse_uvm_event<0>(std::string_view) -{ - throw std::runtime_error("None Op for parsing UVM events should not happen"); -} - -template <> -void -update_end(const page_migration_record_t&, page_migration_record_t&) -{ - throw std::runtime_error("None Op for parsing UVM events should not happen"); -} - -template <> -uint64_t -get_key(const page_migration_record_t&) -{ - throw std::runtime_error("None Op for parsing UVM events should not happen"); -} - -/* -----------------------------------------------------------------------------------*/ - -template -void -update_end(uvm_event_id_t event_id, - const page_migration_record_t& start, - page_migration_record_t& end, - std::index_sequence) -{ - if(OpInx == static_cast(event_id)) - update_end(start, end); - else if constexpr(sizeof...(OpInxs) > 0) - update_end(event_id, start, end, std::index_sequence{}); - else - return; -} - -template -uint64_t -get_key(uvm_event_id_t event_id, - const page_migration_record_t& record, - std::index_sequence) -{ - if constexpr(OpInx == ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU) - return {}; - else if(is_rocprof_uvm_map(event_id)) - return get_key::operation_idx>(record); - else if constexpr(sizeof...(OpInxs) > 0) - return get_key(event_id, record, std::index_sequence{}); - else - return {}; -} - -void -update_end(uvm_event_id_t event_id, - const page_migration_record_t& start, - page_migration_record_t& end) -{ - update_end(event_id, - start, - end, - std::index_sequence{}); -} } // namespace +size_t +get_rocprof_op(const std::string_view event_data) +{ + uint32_t kfd_event_id{}; + std::sscanf(event_data.data(), "%x ", &kfd_event_id); + + auto rocprof_id = + kfd_to_rocprof_op(static_cast(kfd_event_id), + std::make_index_sequence{}); + + ROCP_CI_LOG_IF(WARNING, rocprof_id == 0) + << fmt::format("Failed to parse KFD event ID {}. Parsed ID: {}, SDK ID: {}\n", + event_data[0], + kfd_event_id, + rocprof_id); + + return rocprof_id; +} + +void +kfd_readlines(const std::string_view str, void(handler)(std::string_view)) +{ + const auto find_newline = [&](auto b) { return std::find(b, str.cend(), '\n'); }; + const auto* cursor = str.cbegin(); + + for(const auto* pos = find_newline(cursor); pos != str.cend(); pos = find_newline(cursor)) + { + size_t char_count = pos - cursor; + assert(char_count > 0); + std::string_view event_str{cursor, char_count}; + + ROCP_INFO << fmt::format("KFD event: [{}]", event_str); + handler(event_str); + + cursor = pos + 1; + } +} + // Event capture and reporting namespace { -// Support seems to have been added in kfdv > 1.10 -static_assert(KFD_IOCTL_MAJOR_VERSION == 1, "KFD API major version changed"); -static_assert(KFD_IOCTL_MINOR_VERSION >= 10, "KFD SMI support missing in kfd_ioctl.h"); +constexpr auto kfd_ioctl_version = (1000 * KFD_IOCTL_MAJOR_VERSION) + KFD_IOCTL_MINOR_VERSION; +// Support has been added in kfdv >= 1.10+ +static_assert(kfd_ioctl_version >= 1010, "KFD SMI support missing in kfd_ioctl.h"); -// Convert from public events to KFD enum config - -template -constexpr size_t -kfd_bitmask_impl(size_t uvm_event_id, std::index_sequence) +auto +get_contexts(int operation) { - if(uvm_event_id == OpInx) return page_migration_info::kfd_bitmask; - if constexpr(sizeof...(OpInxs) > 0) - return kfd_bitmask_impl(uvm_event_id, std::index_sequence{}); - else - return 0; -} + auto active_contexts = context::get_active_contexts([](const auto* ctx) { + return (ctx->buffered_tracer && + ctx->buffered_tracer->domains(ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION)); + }); + auto operation_ctxs = context::context_array_t{}; -template -constexpr auto -kfd_bitmask(const small_vector& rocprof_event_ids, std::index_sequence) -{ - uint64_t m{}; - for(const size_t& event_id : rocprof_event_ids) + for(const auto* itr : active_contexts) { - m |= kfd_bitmask_impl(event_id, std::index_sequence{}); - } - return m; -} - -template -constexpr size_t -to_uvm_op_impl(size_t kfd_id, std::index_sequence) -{ - // if(kfd_id == uvm_event_info::kfd_event) return uvm_event_info::uvm_event; - if(kfd_id == uvm_event_info::kfd_event) return OpInx; - if constexpr(sizeof...(OpInxs) > 0) - return to_uvm_op_impl(kfd_id, std::index_sequence{}); - else - return 0; -} - -constexpr uvm_event_id_t -kfd_to_uvm_op(kfd_event_id_t kfd_id) -{ - return static_cast( - to_uvm_op_impl(kfd_id, std::make_index_sequence{})); -} - -struct buffered_context_data -{ - const context::context* ctx = nullptr; -}; - -void -populate_contexts(int operation_idx, std::vector& buffered_contexts) -{ - buffered_contexts.clear(); - - auto active_contexts = context::context_array_t{}; - for(const auto* itr : context::get_active_contexts(active_contexts)) - { - if(itr->buffered_tracer) + // if the given domain + op is not enabled, skip this context + if(itr->buffered_tracer->domains(ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION, operation)) { - // if the given domain + op is not enabled, skip this context - if(itr->buffered_tracer->domains(ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION, - operation_idx)) - buffered_contexts.emplace_back(buffered_context_data{itr}); + operation_ctxs.emplace_back(itr); } } -} -void -remove_events(events_cache_t& events, size_t timestamp) -{ - for(auto map : events) - { - for(auto i = map.begin(); i != map.end(); ++i) - { - if(i->second.start_timestamp < timestamp) map.erase(i); - } - } -} - -bool -report_event(uvm_event_id_t event_id, - rocprofiler_buffer_tracing_page_migration_record_t& end_record) -{ - using rocprofiler_page_migr_seq = std::make_index_sequence; - static thread_local events_cache_t EVENTS_CACHE{}; - - auto& events_map = EVENTS_CACHE[to_rocprof_op(event_id)]; - - switch(static_cast(event_id)) - { - case ROCPROFILER_UVM_EVENT_MIGRATE_START: [[fallthrough]]; - case ROCPROFILER_UVM_EVENT_PAGE_FAULT_START: [[fallthrough]]; - case ROCPROFILER_UVM_EVENT_QUEUE_EVICTION: - { - // insert into map - auto key = get_key(event_id, end_record, rocprofiler_page_migr_seq{}); - events_map[key] = end_record; - return false; - } - // End events. Pair up and report - case ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU: - { - return true; - } - case ROCPROFILER_UVM_EVENT_MIGRATE_END: [[fallthrough]]; - case ROCPROFILER_UVM_EVENT_PAGE_FAULT_END: [[fallthrough]]; - case ROCPROFILER_UVM_EVENT_QUEUE_RESTORE: - { - auto key = get_key(event_id, end_record, rocprofiler_page_migr_seq{}); - if(auto start_rec = events_map.find(key); start_rec != events_map.end()) - { - update_end(event_id, start_rec->second, end_record); - } - else - { - // we got an end record and can't find the start record - // drop everything in the map before this timestamp - remove_events(EVENTS_CACHE, end_record.end_timestamp); - } - return true; - } - default: throw std::runtime_error("Invalid page migration event"); - } + return operation_ctxs; } void handle_reporting(std::string_view event_data) { - uint32_t kfd_event_id; - std::sscanf(event_data.data(), "%x ", &kfd_event_id); - std::vector buffered_contexts{}; - - auto uvm_event_op = kfd_to_uvm_op(static_cast(kfd_event_id)); - - populate_contexts(uvm_event_op, buffered_contexts); + const auto op_inx = get_rocprof_op(event_data); + auto buffered_contexts = get_contexts(op_inx); if(buffered_contexts.empty()) return; // Parse and process the event - auto record = parse_uvm_event( - uvm_event_op, event_data, std::make_index_sequence{}); + auto record = parse_event( + op_inx, event_data, std::make_index_sequence{}); - // pair up start and end and only then insert it into the buffer - if(report_event(uvm_event_op, record)) + for(const auto& itr : buffered_contexts) { - for(const auto& itr : buffered_contexts) - { - auto* _buffer = buffer::get_buffer(itr.ctx->buffered_tracer->buffer_data.at( - ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION)); - CHECK_NOTNULL(_buffer)->emplace(ROCPROFILER_BUFFER_CATEGORY_TRACING, - ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION, - record); - } + auto* buffer = buffer::get_buffer( + itr->buffered_tracer->buffer_data.at(ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION)); + CHECK_NOTNULL(buffer)->emplace( + ROCPROFILER_BUFFER_CATEGORY_TRACING, ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION, record); } } @@ -751,8 +512,7 @@ handle_reporting(std::string_view event_data) // KFD utils namespace kfd { -void -poll_events(small_vector, bool); +void poll_events(small_vector); using fd_flags_t = decltype(EFD_NONBLOCK); using fd_t = decltype(pollfd::fd); @@ -824,26 +584,26 @@ struct poll_kfd_t struct gpu_fd_t { - unsigned int node_id{}; - fd_t fd{}; - const rocprofiler_agent_t* agent{}; + unsigned int node_id = 0; + fd_t fd = {}; + const rocprofiler_agent_t* agent = nullptr; }; - kfd_device_fd kfd_fd{}; - small_vector file_handles{}; - pollfd thread_notify{}; - std::thread bg_thread; - bool active{false}; + kfd_device_fd kfd_fd = {}; + small_vector file_handles = {}; + pollfd thread_notify = {}; + std::thread bg_thread = {}; + bool active = {false}; poll_kfd_t() = default; - poll_kfd_t(const small_vector& rprof_ev, bool non_blocking) + poll_kfd_t(const small_vector& rprof_ev) : kfd_fd{kfd_device_fd{}} { const auto kfd_flags = kfd_bitmask(rprof_ev, std::make_index_sequence{}); - ROCP_INFO << fmt::format("Setting KFD flags to [0b{:b}] \n", kfd_flags); + ROCP_TRACE << fmt::format("Setting KFD flags to [0b{:b}] \n", kfd_flags); // Create fd for notifying thread when we want to wake it up, and an eventfd for any events // to this thread @@ -876,7 +636,7 @@ struct poll_kfd_t { auto gpu_event_fd = get_node_fd(agent->gpu_id); file_handles.emplace_back(pollfd{gpu_event_fd, POLLIN, 0}); - ROCP_INFO << fmt::format( + ROCP_TRACE << fmt::format( "GPU node {} with fd {} added\n", agent->gpu_id, gpu_event_fd); } } @@ -886,14 +646,14 @@ struct poll_kfd_t { auto& fd = file_handles[i]; auto write_size = write(fd.fd, &kfd_flags, sizeof(kfd_flags)); - ROCP_INFO << fmt::format( + ROCP_TRACE << fmt::format( "Writing {} to GPU fd {} ({} bytes)\n", kfd_flags, fd.fd, write_size); CHECK(write_size == sizeof(kfd_flags)); } // start bg thread internal_threading::notify_pre_internal_thread_create(ROCPROFILER_LIBRARY); - bg_thread = std::thread{poll_events, file_handles, non_blocking}; + bg_thread = std::thread{poll_events, file_handles}; internal_threading::notify_post_internal_thread_create(ROCPROFILER_LIBRARY); active = true; @@ -949,7 +709,7 @@ get_config() kfd::poll_kfd_t::~poll_kfd_t() { - ROCP_INFO << fmt::format("Terminating poll_kfd\n"); + ROCP_TRACE << fmt::format("Terminating poll_kfd\n"); if(!active) return; // wake thread up @@ -961,7 +721,7 @@ kfd::poll_kfd_t::~poll_kfd_t() } while(bytes_written == -1 && (errno == EINTR || errno == EAGAIN)); if(bg_thread.joinable()) bg_thread.join(); - ROCP_INFO << fmt::format("Background thread terminated\n"); + ROCP_TRACE << fmt::format("Background thread terminated\n"); for(const auto& f : file_handles) close(f.fd); @@ -969,34 +729,28 @@ kfd::poll_kfd_t::~poll_kfd_t() } // namespace void -poll_events(small_vector file_handles, bool non_blocking) +poll_events(small_vector file_handles) { // storage to write records to, 1MB constexpr size_t PREALLOCATE_ELEMENT_COUNT{1024 * 128}; std::string scratch_buffer(PREALLOCATE_ELEMENT_COUNT, '\0'); - auto& exitfd = file_handles[1]; - const auto timeout_val = non_blocking == true ? 0 : -1; + auto& exitfd = file_handles[1]; // Wait or spin on events. // 0 -> return immediately even if no events // -1 -> wait indefinitely - ROCP_INFO << fmt::format("{} polling = {}, polling with timeout = {}", - non_blocking ? "Non-blocking" : "Blocking", - non_blocking, - timeout_val); - pthread_setname_np(pthread_self(), "bg:pagemigr"); for(auto& fd : file_handles) { - ROCP_INFO << fmt::format( + ROCP_TRACE << fmt::format( "Handle = {}, events = {}, revents = {}\n", fd.fd, fd.events, fd.revents); } while(!kfd::get_config().should_exit()) { - auto poll_ret = poll(file_handles.data(), file_handles.size(), timeout_val); + auto poll_ret = poll(file_handles.data(), file_handles.size(), -1); if(poll_ret == -1) throw std::runtime_error{"Background thread file descriptors are invalid"}; @@ -1009,6 +763,7 @@ poll_events(small_vector file_handles, bool non_blocking) using namespace std::chrono_literals; + // 0 and 1 are for generic and pipe-notify handles for(size_t i = 2; i < file_handles.size(); ++i) { auto& fd = file_handles[i]; @@ -1016,15 +771,9 @@ poll_events(small_vector file_handles, bool non_blocking) // We have data to read, perhaps multiple events if((fd.revents & POLLIN) != 0) { - size_t status_size = read(fd.fd, scratch_buffer.data(), scratch_buffer.size()); - - // ROCP_INFO << fmt::format( - // "status_size: {} size {}\n", status_size, scratch_buffer.size()); - std::string_view event_strings{scratch_buffer.data(), status_size}; - - // ROCP_INFO << fmt::format("Raw KFD string [({})]\n", - // event_strings.data()); - KFD_EVENT_PARSE_EVENTS(event_strings, handle_reporting); + size_t status_size = read(fd.fd, scratch_buffer.data(), scratch_buffer.size()); + auto event_strings = std::string_view{scratch_buffer.data(), status_size}; + kfd_readlines(event_strings, handle_reporting); } fd.revents = 0; } @@ -1051,7 +800,7 @@ get_ids(std::vector& _id_list, std::index_sequence) if(_v < static_cast(ROCPROFILER_HSA_AMD_EXT_API_ID_LAST)) _vec.emplace_back(_v); }; - (_emplace(_id_list, page_migration_info::operation_idx), ...); + (_emplace(_id_list, page_migration_info::operation), ...); } bool @@ -1069,13 +818,13 @@ to_bitmask(small_vector& _id_list, std::index_sequence) if(_v < static_cast(ROCPROFILER_HSA_AMD_EXT_API_ID_LAST)) _vec.emplace_back(_v); }; - (_emplace(_id_list, page_migration_info::operation_idx), ...); + (_emplace(_id_list, page_migration_info::operation), ...); } namespace { rocprofiler_status_t -init(const small_vector& event_ids, bool non_blocking) +init(const small_vector& event_ids) { // Check if version is more than 1.11 auto ver = kfd::get_version(); @@ -1084,7 +833,7 @@ init(const small_vector& event_ids, bool non_blocking) if(!context::get_registered_contexts(context_filter).empty()) { if(!kfd::get_config().kfd_handle) - kfd::get_config().kfd_handle = new kfd::poll_kfd_t{event_ids, non_blocking}; + kfd::get_config().kfd_handle = new kfd::poll_kfd_t{event_ids}; } return ROCPROFILER_STATUS_SUCCESS; } @@ -1104,12 +853,13 @@ rocprofiler_status_t init() { // Testing page migration - return init({ROCPROFILER_PAGE_MIGRATION_NONE, - ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT, - ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE, - ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND, - ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU}, - rocprofiler::common::get_env("ROCPROF_PAGE_MIGRATION_NON_BLOCKING", false)); + return init({ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_START, + ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_END, + ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT_START, + ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT_END, + ROCPROFILER_PAGE_MIGRATION_QUEUE_EVICTION, + ROCPROFILER_PAGE_MIGRATION_QUEUE_RESTORE, + ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU}); } void diff --git a/source/lib/rocprofiler-sdk/page_migration/page_migration.def.cpp b/source/lib/rocprofiler-sdk/page_migration/page_migration.def.cpp index ace2dae896..ab726c4bae 100644 --- a/source/lib/rocprofiler-sdk/page_migration/page_migration.def.cpp +++ b/source/lib/rocprofiler-sdk/page_migration/page_migration.def.cpp @@ -30,45 +30,30 @@ namespace rocprofiler { namespace page_migration { +using namespace rocprofiler::common; + +using kfd_event_id_t = decltype(KFD_SMI_EVENT_NONE); +using migrate_trigger_t = rocprofiler_page_migration_trigger_t; +using page_migration_record_t = rocprofiler_buffer_tracing_page_migration_record_t; +using queue_suspend_trigger_t = rocprofiler_page_migration_queue_suspend_trigger_t; +using unmap_from_gpu_trigger_t = rocprofiler_page_migration_unmap_from_gpu_trigger_t; + +using trigger_type_list_t = common::mpl::type_list; // clang-format off // Map ROCPROF UVM enums to KFD enums -SPECIALIZE_UVM_KFD_EVENT(ROCPROFILER_UVM_EVENT_NONE, KFD_SMI_EVENT_NONE, "Error: Invalid UVM event from KFD" ); -SPECIALIZE_UVM_KFD_EVENT(ROCPROFILER_UVM_EVENT_MIGRATE_START, KFD_SMI_EVENT_MIGRATE_START, "%x %ld -%d @%lx(%lx) %x->%x %x:%x %d\n" ); -SPECIALIZE_UVM_KFD_EVENT(ROCPROFILER_UVM_EVENT_MIGRATE_END, KFD_SMI_EVENT_MIGRATE_END, "%x %ld -%d @%lx(%lx) %x->%x %d\n" ); -SPECIALIZE_UVM_KFD_EVENT(ROCPROFILER_UVM_EVENT_PAGE_FAULT_START, KFD_SMI_EVENT_PAGE_FAULT_START, "%x %ld -%d @%lx(%x) %c\n" ); -SPECIALIZE_UVM_KFD_EVENT(ROCPROFILER_UVM_EVENT_PAGE_FAULT_END, KFD_SMI_EVENT_PAGE_FAULT_END, "%x %ld -%d @%lx(%x) %c\n" ); -SPECIALIZE_UVM_KFD_EVENT(ROCPROFILER_UVM_EVENT_QUEUE_EVICTION, KFD_SMI_EVENT_QUEUE_EVICTION, "%x %ld -%d %x %d\n" ); -SPECIALIZE_UVM_KFD_EVENT(ROCPROFILER_UVM_EVENT_QUEUE_RESTORE, KFD_SMI_EVENT_QUEUE_RESTORE, "%x %ld -%d %x\n" ); -SPECIALIZE_UVM_KFD_EVENT(ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU, KFD_SMI_EVENT_UNMAP_FROM_GPU, "%x %ld -%d @%lx(%lx) %x %d\n" ); +SPECIALIZE_PAGE_MIGRATION_INFO(NONE, NONE, "Error: Invalid UVM event from KFD" ); +SPECIALIZE_PAGE_MIGRATION_INFO(PAGE_MIGRATE_START, MIGRATE_START, "%x %ld -%d @%lx(%lx) %x->%x %x:%x %d\n"); +SPECIALIZE_PAGE_MIGRATION_INFO(PAGE_MIGRATE_END, MIGRATE_END, "%x %ld -%d @%lx(%lx) %x->%x %d\n" ); +SPECIALIZE_PAGE_MIGRATION_INFO(PAGE_FAULT_START, PAGE_FAULT_START, "%x %ld -%d @%lx(%x) %c\n" ); +SPECIALIZE_PAGE_MIGRATION_INFO(PAGE_FAULT_END, PAGE_FAULT_END, "%x %ld -%d @%lx(%x) %c\n" ); +SPECIALIZE_PAGE_MIGRATION_INFO(QUEUE_EVICTION, QUEUE_EVICTION, "%x %ld -%d %x %d\n" ); +SPECIALIZE_PAGE_MIGRATION_INFO(QUEUE_RESTORE, QUEUE_RESTORE, "%x %ld -%d %x\n" ); +SPECIALIZE_PAGE_MIGRATION_INFO(UNMAP_FROM_GPU, UNMAP_FROM_GPU, "%x %ld -%d @%lx(%lx) %x %d\n" ); +#undef SPECIALIZE_PAGE_MIGRATION_INFO // clang-format on -# undef SPECIALIZE_UVM_KFD_EVENT -SPECIALIZE_PAGE_MIGRATION_INFO(NONE, NONE); -SPECIALIZE_PAGE_MIGRATION_INFO(PAGE_MIGRATE, MIGRATE_START, MIGRATE_END); -SPECIALIZE_PAGE_MIGRATION_INFO(PAGE_FAULT, PAGE_FAULT_START, PAGE_FAULT_END); -SPECIALIZE_PAGE_MIGRATION_INFO(QUEUE_SUSPEND, QUEUE_EVICTION, QUEUE_RESTORE); -SPECIALIZE_PAGE_MIGRATION_INFO(UNMAP_FROM_GPU, UNMAP_FROM_GPU); - -template -constexpr size_t to_rocprof_op_impl(std::index_sequence) -{ - return ((((bitmask(UvmOpInx) & page_migration_info::uvm_bitmask) != 0) * OpInxs) + ...); -} - -template -constexpr auto _to_rocprof_op_impl(std::index_sequence) -{ - return std::array{ - to_rocprof_op_impl(std::make_index_sequence{})...}; -} - -constexpr auto -to_rocprof_op(size_t pos) -{ - using rop = rocprofiler_page_migration_operation_t; - return static_cast( - _to_rocprof_op_impl(std::make_index_sequence{})[pos]); -} } // namespace page_migration } // namespace rocprofiler #endif diff --git a/source/lib/rocprofiler-sdk/page_migration/utils.hpp b/source/lib/rocprofiler-sdk/page_migration/utils.hpp index 65efda40ee..d519e255d7 100644 --- a/source/lib/rocprofiler-sdk/page_migration/utils.hpp +++ b/source/lib/rocprofiler-sdk/page_migration/utils.hpp @@ -22,12 +22,15 @@ #pragma once +#include "lib/common/container/small_vector.hpp" #include "lib/rocprofiler-sdk/details/kfd_ioctl.h" #include #include +#include #include +#include #include #include @@ -35,38 +38,32 @@ namespace rocprofiler { namespace page_migration { -// serves as an overview of what events we capture and report -enum fault_type_t -{ - NONE, - READ, - WRITE, -}; +/* serves as an overview of what events we capture and report -struct uvm_event_page_fault_start_t -{ - int kind; - uint64_t start_timestamp; - int pid; - int node_id; - uint64_t address; - fault_type_t fault; -}; - -struct uvm_event_page_fault_end_t +struct event_page_fault_start_t { int kind; - uint64_t end_timestamp; + uint64_t timestamp; + int pid; + int node_id; + uint64_t address; + fault_t fault; +}; + +struct event_page_fault_end_t +{ + int kind; + uint64_t timestamp; uint32_t pid; int node_id; uint64_t address; bool migrated; }; -struct uvm_event_migrate_start_t +struct event_migrate_start_t { int kind; - uint64_t start_timestamp; + uint64_t timestamp; uint32_t pid; uint64_t start; uint64_t end_offset; @@ -77,10 +74,10 @@ struct uvm_event_migrate_start_t uint32_t trigger; }; -struct uvm_event_migrate_end_t +struct event_migrate_end_t { int kind; - uint64_t end_timestamp; + uint64_t timestamp; uint32_t pid; uint64_t start; uint64_t end_offset; @@ -89,25 +86,25 @@ struct uvm_event_migrate_end_t uint32_t trigger; }; -struct uvm_event_queue_eviction_t +struct event_queue_eviction_t { int kind; - uint64_t start_timestamp; + uint64_t timestamp; uint32_t pid; int node_id; uint32_t trigger; }; -struct uvm_event_queue_restore_t +struct event_queue_restore_t { int kind; - uint64_t end_timestamp; + uint64_t timestamp; uint32_t pid; int node_id; bool rescheduled; }; -struct uvm_event_unmap_from_gpu_t +struct event_unmap_from_gpu_t { int kind; uint64_t timestamp; @@ -117,13 +114,13 @@ struct uvm_event_unmap_from_gpu_t int node_id; uint32_t trigger; }; - -template -struct uvm_event_info; +*/ template struct page_migration_info; +using namespace rocprofiler::common; + namespace kfd { template @@ -145,74 +142,56 @@ constexpr size_t bitmask(std::index_sequence) return (bitmask(Args) | ...); } -enum uvm_event_id_t +template +constexpr size_t kfd_bitmask(std::index_sequence) { - ROCPROFILER_UVM_EVENT_NONE, - ROCPROFILER_UVM_EVENT_MIGRATE_START, - ROCPROFILER_UVM_EVENT_MIGRATE_END, - ROCPROFILER_UVM_EVENT_PAGE_FAULT_START, - ROCPROFILER_UVM_EVENT_PAGE_FAULT_END, - ROCPROFILER_UVM_EVENT_QUEUE_EVICTION, - ROCPROFILER_UVM_EVENT_QUEUE_RESTORE, - ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU, - ROCPROFILER_UVM_EVENT_LAST, -}; + return (page_migration_info::kfd_bitmask | ...); +} -static_assert(KFD_SMI_EVENT_NONE == 0); -static_assert(KFD_SMI_EVENT_MIGRATE_START == 5); -static_assert(KFD_SMI_EVENT_MIGRATE_END == 6); -static_assert(KFD_SMI_EVENT_PAGE_FAULT_START == 7); -static_assert(KFD_SMI_EVENT_PAGE_FAULT_END == 8); -static_assert(KFD_SMI_EVENT_QUEUE_EVICTION == 9); -static_assert(KFD_SMI_EVENT_QUEUE_RESTORE == 10); -static_assert(KFD_SMI_EVENT_UNMAP_FROM_GPU == 11); -static_assert(KFD_SMI_EVENT_ALL_PROCESS == 64); +template +constexpr size_t +kfd_bitmask_impl(size_t rocprof_op, std::index_sequence) +{ + if(rocprof_op == OpInx) return page_migration_info::kfd_bitmask; + if constexpr(sizeof...(OpInxs) > 0) + return kfd_bitmask_impl(rocprof_op, std::index_sequence{}); + else + return 0; +} + +template +constexpr auto +kfd_bitmask(const container::small_vector& rocprof_event_ids, + std::index_sequence) +{ + uint64_t m{}; + for(const size_t& event_id : rocprof_event_ids) + { + m |= kfd_bitmask_impl(event_id, std::index_sequence{}); + } + return m; +} + +template +constexpr size_t +kfd_to_rocprof_op(size_t kfd_id, std::index_sequence) +{ + if(kfd_id == page_migration_info::kfd_operation) return OpInx; + if constexpr(sizeof...(OpInxs) > 0) + return kfd_to_rocprof_op(kfd_id, std::index_sequence{}); + else + return 0; +} + +size_t +get_rocprof_op(const std::string_view event_data); + +void +kfd_readlines(const std::string_view str, void(handler)(std::string_view)); using rocprof_buffer_op_t = rocprofiler_page_migration_operation_t; using node_fd_t = int; -using event_map_t = - std::unordered_map; -using events_cache_t = std::array; - -template -constexpr size_t to_kfd_bitmask(std::index_sequence) -{ - return bitmask(std::index_sequence::kfd_event...>()); -} - -template -constexpr size_t to_uvm_bitmask(std::index_sequence) -{ - return bitmask(std::index_sequence(Ops)...>()); -} - -template -constexpr bool -is_rocprof_uvm_map() -{ - return page_migration_info::uvm_bitmask & bitmask(UvmOpIdx); -} - -template -constexpr bool -_is_rocprof_uvm_map(size_t uvm_event, std::index_sequence) -{ - if(OpInx == uvm_event) - return is_rocprof_uvm_map(); - else if constexpr(sizeof...(OpInxs) > 0) - return _is_rocprof_uvm_map(uvm_event, std::index_sequence{}); - else - return false; -} - -template -constexpr bool -is_rocprof_uvm_map(size_t uvm_event) -{ - return _is_rocprof_uvm_map( - uvm_event, std::make_index_sequence{}); -} } // namespace page_migration } // namespace rocprofiler diff --git a/source/lib/rocprofiler-sdk/tests/CMakeLists.txt b/source/lib/rocprofiler-sdk/tests/CMakeLists.txt index 3b43509f13..e08b02193b 100644 --- a/source/lib/rocprofiler-sdk/tests/CMakeLists.txt +++ b/source/lib/rocprofiler-sdk/tests/CMakeLists.txt @@ -11,8 +11,16 @@ include(GoogleTest) # # -------------------------------------------------------------------------------------- # -set(rocprofiler_lib_sources agent.cpp buffer.cpp contexts.cpp hsa.cpp naming.cpp - timestamp.cpp version.cpp hsa_barrier.cpp) +set(rocprofiler_lib_sources + agent.cpp + buffer.cpp + contexts.cpp + hsa.cpp + naming.cpp + timestamp.cpp + version.cpp + hsa_barrier.cpp + page_migration.cpp) add_executable(rocprofiler-sdk-lib-tests) target_sources(rocprofiler-sdk-lib-tests PRIVATE ${rocprofiler_lib_sources} @@ -23,6 +31,7 @@ target_link_libraries( rocprofiler-sdk::rocprofiler-sdk-common-library rocprofiler-sdk::counter-test-constants rocprofiler-sdk::rocprofiler-sdk-hsa-runtime + rocprofiler-sdk::rocprofiler-sdk-drm GTest::gtest GTest::gtest_main) @@ -40,9 +49,8 @@ set_tests_properties(${lib_TESTS} PROPERTIES TIMEOUT 30 LABELS "unittests") # # -------------------------------------------------------------------------------------- # -set(rocprofiler_shared_lib_sources - external_correlation.cpp intercept_table.cpp page_migration.cpp registration.cpp - roctx.cpp status.cpp) +set(rocprofiler_shared_lib_sources external_correlation.cpp intercept_table.cpp + registration.cpp roctx.cpp status.cpp) add_executable(rocprofiler-sdk-lib-tests-shared) target_sources(rocprofiler-sdk-lib-tests-shared PRIVATE ${rocprofiler_shared_lib_sources}) diff --git a/source/lib/rocprofiler-sdk/tests/page_migration.cpp b/source/lib/rocprofiler-sdk/tests/page_migration.cpp index 696bf03078..e2f0ca31cb 100644 --- a/source/lib/rocprofiler-sdk/tests/page_migration.cpp +++ b/source/lib/rocprofiler-sdk/tests/page_migration.cpp @@ -20,25 +20,26 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. +#include "lib/common/container/small_vector.hpp" #include "lib/common/defines.hpp" -#include "lib/rocprofiler-sdk/details/kfd_ioctl.h" +#include "lib/common/mpl.hpp" #include "lib/rocprofiler-sdk/page_migration/utils.hpp" +#include #include #include -#include +#include #include #include #include +#include #define ROCPROFILER_LIB_ROCPROFILER_SDK_PAGE_MIGRATION_PAGE_MIGRATION_CPP_IMPL 1 #include "lib/rocprofiler-sdk/page_migration/page_migration.def.cpp" #undef ROCPROFILER_LIB_ROCPROFILER_SDK_PAGE_MIGRATION_PAGE_MIGRATION_CPP_IMPL -#define ASSERT_SAME(A, B) static_assert(static_cast(A) == static_cast(B)) - namespace { constexpr std::string_view MULTILINE_STRING = "This is 0 Line 0\n" @@ -61,7 +62,7 @@ return_line(const std::string_view line) auto parse_lines() { - KFD_EVENT_PARSE_EVENTS(MULTILINE_STRING, return_line); + rocprofiler::page_migration::kfd_readlines(MULTILINE_STRING, return_line); } TEST(page_migration, readlines) @@ -70,107 +71,35 @@ TEST(page_migration, readlines) parse_lines(); } -TEST(page_migration, parse_kvm_events) -{ - // Ensure all lines are read - parse_lines(); -} - TEST(page_migtation, rocprof_kfd_map) { - using namespace ::rocprofiler::page_migration; + using namespace rocprofiler::page_migration; + using namespace rocprofiler::common::container; + + using rocprofiler_page_migration_seq_t = + std::make_index_sequence; + + const small_vector vec{ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_END, + ROCPROFILER_PAGE_MIGRATION_QUEUE_EVICTION, + ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU}; + + EXPECT_EQ((page_migration_info::kfd_bitmask | + page_migration_info::kfd_bitmask | + page_migration_info::kfd_bitmask), + kfd_bitmask(vec, rocprofiler_page_migration_seq_t{})); + + const auto to_kfd_str = [](kfd_smi_event e) { + std::string str = fmt::format("{:x} ", static_cast(e)); + return rocprofiler::page_migration::get_rocprof_op({str}); + }; // clang-format off - static_assert( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT, ROCPROFILER_UVM_EVENT_PAGE_FAULT_START >() ); - static_assert( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT, ROCPROFILER_UVM_EVENT_PAGE_FAULT_END >() ); - static_assert( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE, ROCPROFILER_UVM_EVENT_MIGRATE_START >() ); - static_assert( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE, ROCPROFILER_UVM_EVENT_MIGRATE_END >() ); - static_assert( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND, ROCPROFILER_UVM_EVENT_QUEUE_EVICTION >() ); - static_assert( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND, ROCPROFILER_UVM_EVENT_QUEUE_RESTORE >() ); - static_assert( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU, ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU >() ); - - EXPECT_TRUE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT >(ROCPROFILER_UVM_EVENT_PAGE_FAULT_START) ); - EXPECT_TRUE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT >(ROCPROFILER_UVM_EVENT_PAGE_FAULT_END ) ); - EXPECT_TRUE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE >(ROCPROFILER_UVM_EVENT_MIGRATE_START ) ); - EXPECT_TRUE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE >(ROCPROFILER_UVM_EVENT_MIGRATE_END ) ); - EXPECT_TRUE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND >(ROCPROFILER_UVM_EVENT_QUEUE_EVICTION ) ); - EXPECT_TRUE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND >(ROCPROFILER_UVM_EVENT_QUEUE_RESTORE ) ); - EXPECT_TRUE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU >(ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU ) ); - - - static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE, ROCPROFILER_UVM_EVENT_QUEUE_EVICTION >() ); - static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE, ROCPROFILER_UVM_EVENT_QUEUE_RESTORE >() ); - static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE, ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU >() ); - static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE, ROCPROFILER_UVM_EVENT_PAGE_FAULT_START >() ); - static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE, ROCPROFILER_UVM_EVENT_PAGE_FAULT_END >() ); - - static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT, ROCPROFILER_UVM_EVENT_MIGRATE_START >() ); - static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT, ROCPROFILER_UVM_EVENT_MIGRATE_END >() ); - static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT, ROCPROFILER_UVM_EVENT_QUEUE_EVICTION >() ); - static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT, ROCPROFILER_UVM_EVENT_QUEUE_RESTORE >() ); - static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT, ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU >() ); - - static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND, ROCPROFILER_UVM_EVENT_MIGRATE_START >() ); - static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND, ROCPROFILER_UVM_EVENT_MIGRATE_END >() ); - static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND, ROCPROFILER_UVM_EVENT_PAGE_FAULT_START >() ); - static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND, ROCPROFILER_UVM_EVENT_PAGE_FAULT_END >() ); - static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND, ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU >() ); - - static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU, ROCPROFILER_UVM_EVENT_MIGRATE_START >() ); - static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU, ROCPROFILER_UVM_EVENT_MIGRATE_END >() ); - static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU, ROCPROFILER_UVM_EVENT_PAGE_FAULT_START >() ); - static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU, ROCPROFILER_UVM_EVENT_PAGE_FAULT_END >() ); - static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU, ROCPROFILER_UVM_EVENT_QUEUE_EVICTION >() ); - static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU, ROCPROFILER_UVM_EVENT_QUEUE_RESTORE >() ); - - EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE >(ROCPROFILER_UVM_EVENT_QUEUE_EVICTION ) ); - EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE >(ROCPROFILER_UVM_EVENT_QUEUE_RESTORE ) ); - EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE >(ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU ) ); - EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE >(ROCPROFILER_UVM_EVENT_PAGE_FAULT_START) ); - EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE >(ROCPROFILER_UVM_EVENT_PAGE_FAULT_END ) ); - - EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT >(ROCPROFILER_UVM_EVENT_MIGRATE_START ) ); - EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT >(ROCPROFILER_UVM_EVENT_MIGRATE_END ) ); - EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT >(ROCPROFILER_UVM_EVENT_QUEUE_EVICTION ) ); - EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT >(ROCPROFILER_UVM_EVENT_QUEUE_RESTORE ) ); - EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT >(ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU ) ); - - EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND >(ROCPROFILER_UVM_EVENT_MIGRATE_START ) ); - EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND >(ROCPROFILER_UVM_EVENT_MIGRATE_END ) ); - EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND >(ROCPROFILER_UVM_EVENT_PAGE_FAULT_START) ); - EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND >(ROCPROFILER_UVM_EVENT_PAGE_FAULT_END ) ); - EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND >(ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU ) ); - - EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU >(ROCPROFILER_UVM_EVENT_MIGRATE_START ) ); - EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU >(ROCPROFILER_UVM_EVENT_MIGRATE_END ) ); - EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU >(ROCPROFILER_UVM_EVENT_PAGE_FAULT_START) ); - EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU >(ROCPROFILER_UVM_EVENT_PAGE_FAULT_END ) ); - EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU >(ROCPROFILER_UVM_EVENT_QUEUE_EVICTION ) ); - EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU >(ROCPROFILER_UVM_EVENT_QUEUE_RESTORE ) ); - - static_assert(to_rocprof_op(ROCPROFILER_UVM_EVENT_MIGRATE_START) == ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE ); - static_assert(to_rocprof_op(ROCPROFILER_UVM_EVENT_PAGE_FAULT_END) == ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT ); - static_assert(to_rocprof_op(ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU) == ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU ); - static_assert(to_rocprof_op(ROCPROFILER_UVM_EVENT_QUEUE_EVICTION) == ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND ); - - ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_TRIGGER_PREFETCH, KFD_MIGRATE_TRIGGER_PREFETCH ); - ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_TRIGGER_PAGEFAULT_GPU, KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU ); - ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_TRIGGER_PAGEFAULT_CPU, KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU ); - ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_TRIGGER_TTM_EVICTION, KFD_MIGRATE_TRIGGER_TTM_EVICTION ); - ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_SVM, KFD_QUEUE_EVICTION_TRIGGER_SVM ); - ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_USERPTR, KFD_QUEUE_EVICTION_TRIGGER_USERPTR ); - ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_TTM, KFD_QUEUE_EVICTION_TRIGGER_TTM ); - ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_SUSPEND, KFD_QUEUE_EVICTION_TRIGGER_SUSPEND ); - ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_CRIU_CHECKPOINT, KFD_QUEUE_EVICTION_CRIU_CHECKPOINT ); - ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_CRIU_RESTORE, KFD_QUEUE_EVICTION_CRIU_RESTORE ); - ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_MMU_NOTIFY, KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY ); - ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_MMU_NOTIFY_MIGRATE, KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY_MIGRATE); - ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_UNMAP_FROM_CPU, KFD_SVM_UNMAP_TRIGGER_UNMAP_FROM_CPU ); - - static_assert(to_kfd_bitmask(std::index_sequence< - ROCPROFILER_UVM_EVENT_PAGE_FAULT_START, ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU>()) == - (KFD_SMI_EVENT_MASK_FROM_INDEX(KFD_SMI_EVENT_PAGE_FAULT_START) - | KFD_SMI_EVENT_MASK_FROM_INDEX(KFD_SMI_EVENT_UNMAP_FROM_GPU))); - + EXPECT_EQ(to_kfd_str(KFD_SMI_EVENT_MIGRATE_START), ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_START); + EXPECT_EQ(to_kfd_str(KFD_SMI_EVENT_MIGRATE_END), ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_END); + EXPECT_EQ(to_kfd_str(KFD_SMI_EVENT_PAGE_FAULT_START), ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT_START); + EXPECT_EQ(to_kfd_str(KFD_SMI_EVENT_PAGE_FAULT_END), ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT_END); + EXPECT_EQ(to_kfd_str(KFD_SMI_EVENT_QUEUE_EVICTION), ROCPROFILER_PAGE_MIGRATION_QUEUE_EVICTION); + EXPECT_EQ(to_kfd_str(KFD_SMI_EVENT_QUEUE_RESTORE), ROCPROFILER_PAGE_MIGRATION_QUEUE_RESTORE); + EXPECT_EQ(to_kfd_str(KFD_SMI_EVENT_UNMAP_FROM_GPU), ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU); // clang-format on } diff --git a/tests/bin/page-migration/CMakeLists.txt b/tests/bin/page-migration/CMakeLists.txt index 1406a9e580..e2f1b9f503 100644 --- a/tests/bin/page-migration/CMakeLists.txt +++ b/tests/bin/page-migration/CMakeLists.txt @@ -40,8 +40,3 @@ target_compile_options(page-migration PRIVATE -W -Wall -Wextra -Wpedantic -Wshad find_package(Threads REQUIRED) target_link_libraries(page-migration PRIVATE Threads::Threads) - -install( - TARGETS page-migration - DESTINATION bin - COMPONENT tests) diff --git a/tests/bin/page-migration/page-migration.cpp b/tests/bin/page-migration/page-migration.cpp index 5cb3859cb3..5a38389dba 100644 --- a/tests/bin/page-migration/page-migration.cpp +++ b/tests/bin/page-migration/page-migration.cpp @@ -27,13 +27,14 @@ #include #include #include -#include #include #include #include #include #include #include +#include +#include #include #include @@ -117,14 +118,14 @@ private: }; int -main() +run_test(int num_iter) { using namespace std::chrono_literals; - static constexpr auto NUM_PAGES = 16; - const auto PAGE_SIZE_BYTES = ::sysconf(_SC_PAGE_SIZE); + constexpr size_t NUM_PAGES = 512; + const size_t PAGE_SIZE_BYTES = ::sysconf(_SC_PAGE_SIZE); - size_t elem_count = (NUM_PAGES * PAGE_SIZE_BYTES) / sizeof(size_t); // one page? + const size_t elem_count = (NUM_PAGES * PAGE_SIZE_BYTES) / sizeof(size_t); auto alloc = mmap_allocator(NUM_PAGES); void* data_v = alloc.get(); @@ -142,8 +143,10 @@ main() HIP_API_CALL(hipHostRegister(data, elem_count * sizeof(size_t), hipHostRegisterDefault)); - char maps[1024 * 1024]; - std::memset(maps, '\0', 1024 * 1024); + constexpr size_t MAPS_BUFFER_SIZE = 1024 * 1024; + + char maps[MAPS_BUFFER_SIZE]; + std::memset(maps, '\0', MAPS_BUFFER_SIZE); auto fd = open("/proc/self/maps", O_RDONLY | O_CLOEXEC); if(fd == -1) @@ -153,7 +156,7 @@ main() exit(-1); } - auto bytes = read(fd, maps, 1024 * 1024 - 1); + auto bytes = read(fd, maps, MAPS_BUFFER_SIZE - 1); if(bytes == -1) { auto ecode = errno; @@ -190,33 +193,96 @@ main() } } - for(int iter = 0; iter < 1000; ++iter) + hipStream_t stream{}; + HIP_API_CALL(hipStreamCreate(&stream)); + + for(int iter = 0; iter < num_iter; ++iter) { for(size_t i = 0; i < elem_count; ++i) + { data[i] = i; + } // std::cout << "launching..." << std::endl; - hipLaunchKernelGGL(kernel, 128, 64, 0, 0, data, elem_count); + hipLaunchKernelGGL(kernel, 1024, 1024, 0, stream, data, elem_count); - // std::cout << "syncing..." << std::endl; - HIP_API_CALL(hipDeviceSynchronize()); + HIP_API_CALL(hipStreamSynchronize(stream)); - // std::cout << "checking..." << std::endl; for(size_t i = 0; i < elem_count; ++i) { - if(data[i] != (i * 2)) + const auto data_i = data[i]; + if(data_i != (i * 2)) { auto msg = std::stringstream{}; msg << "GPU computed value at " << i << " in iteration " << iter - << " is incorrect. Expected " << (i * 2) << ", found " << data[i]; + << " is incorrect. Expected " << (i * 2) << ", found " << data_i; throw std::runtime_error{msg.str()}; } } - - std::cout << "Iteration " << std::setw(2) << iter << ": correct\n" << std::flush; } + HIP_API_CALL(hipStreamDestroy(stream)); HIP_API_CALL(hipDeviceSynchronize()); return 0; } + +int +main(int argc, const char** argv) +{ + using namespace std::chrono_literals; + + const auto usage_msg = [](const char** _argv) { + fprintf(stderr, "usage: %s \n", _argv[0]); + }; + + if(argc != 3) + { + usage_msg(argv); + exit(EXIT_FAILURE); + } + + for(int i = 1; i < argc; ++i) + { + auto _arg = std::string{argv[i]}; + if(_arg == "?" || _arg == "-h" || _arg == "--help") + { + usage_msg(argv); + exit(EXIT_SUCCESS); + } + } + + const auto num_threads = std::atoi(argv[1]); + if(num_threads < 1) + { + fprintf(stderr, "Error: Invalid value %d for num_threads (min 1)\n", num_threads); + exit(EXIT_FAILURE); + } + + const auto num_iter = std::atoi(argv[2]); + if(num_iter < 1) + { + fprintf(stderr, "Error: Invalid value %d for num_iter (min 1)\n", num_iter); + exit(EXIT_FAILURE); + } + + run_test(num_iter); + + std::vector threads; + threads.reserve(num_threads); + + std::cerr << "Running " << num_iter << " iterations/thread on " << num_threads << " threads\n"; + + for(auto i = 0; i < num_threads; ++i) + { + threads.emplace_back([_num_iter = num_iter]() { run_test(_num_iter); }); + } + + std::cerr << "Waiting for threads\n"; + for(auto& t : threads) + { + t.join(); + } + + return 0; +} diff --git a/tests/page-migration/CMakeLists.txt b/tests/page-migration/CMakeLists.txt index d7c0650841..8f0fd6d9f7 100644 --- a/tests/page-migration/CMakeLists.txt +++ b/tests/page-migration/CMakeLists.txt @@ -17,7 +17,7 @@ else() set(PRELOAD_ENV "LD_PRELOAD=$") endif() -add_test(NAME test-page-migration-execute COMMAND $) +add_test(NAME test-page-migration-execute COMMAND $ 4 1024) set(page-migration-env "${PRELOAD_ENV}" diff --git a/tests/page-migration/conftest.py b/tests/page-migration/conftest.py index 15b059dc42..a65124ec3b 100644 --- a/tests/page-migration/conftest.py +++ b/tests/page-migration/conftest.py @@ -2,6 +2,7 @@ import json import pytest +from rocprofiler_sdk.pytest_utils.dotdict import dotdict def pytest_addoption(parser): @@ -24,4 +25,4 @@ def input_data(request): return pytest.skip( "Skipping test because KFD does not support SVM event reporting" ) - return data + return dotdict(data) diff --git a/tests/page-migration/validate.py b/tests/page-migration/validate.py index ddf799c4b2..15619d96b2 100644 --- a/tests/page-migration/validate.py +++ b/tests/page-migration/validate.py @@ -271,71 +271,113 @@ def get_allocated_pages(callback_records): assert "hostPtr" in itr["args"].keys(), f"{itr}" host_register_record.append(itr) - assert len(host_register_record) == 1 - alloc_size = int(host_register_record[0]["args"]["sizeBytes"], 10) - start_addr = int(host_register_record[0]["args"]["hostPtr"], 16) - end_addr = start_addr + alloc_size + num_host_register_calls = len(host_register_record) + assert num_host_register_calls == 5, "Expected 5 hipHostRegister calls in test" - return start_addr, end_addr, alloc_size + ret = [] + for i in range(num_host_register_calls): + alloc_size = int(host_register_record[0]["args"]["sizeBytes"], 10) + start_addr = int(host_register_record[0]["args"]["hostPtr"], 16) + end_addr = start_addr + alloc_size + ret.append((start_addr, end_addr)) + return ret + + +def validate_node(id, nodes): + assert id.handle in nodes def test_page_migration_data(input_data): data = input_data sdk_data = data["rocprofiler-sdk-json-tool"] - buffer_records = sdk_data["buffer_records"] - callback_records = sdk_data["callback_records"] - page_migration_buffers = buffer_records["page_migration"] + buffer_records = sdk_data.buffer_records + callback_records = sdk_data.callback_records + page_migtation_buffers = buffer_records.page_migration _, bf_op_names = get_operation(buffer_records, "PAGE_MIGRATION") assert bf_op_names[0] == "PAGE_MIGRATION_NONE" - assert "PAGE_MIGRATION_PAGE_MIGRATE" in bf_op_names - assert len(bf_op_names) == 5 - node_ids = set(x["gpu_id"] for x in sdk_data["agents"]) - start_addr, end_addr, alloc_size = get_allocated_pages(callback_records) + for op_name in bf_op_names: + assert "PAGE_MIGRATION" in op_name - assert start_addr < end_addr and start_addr + alloc_size == end_addr - assert int(alloc_size) == 16 * 4096 # We allocated 16 pages in the test + assert len(bf_op_names) == 8 - # PID must be same - assert len(set(r["pid"] for r in page_migration_buffers)) == 1 + nodes = set(x.id.handle for x in sdk_data.agents) + allocations = get_allocated_pages(callback_records) - for r in page_migration_buffers: - op = r["operation"] + for start_addr, end_addr in allocations: - assert r["size"] == 136 - assert op != 0 and bf_op_names[op] != "PAGE_MIGRATION_NONE" - assert bf_op_names[op].lower().replace("page_migration_", "") in r.keys() + assert ( + start_addr < end_addr + ), "Expected start address less than end address for mmap range" + alloc_size = end_addr - start_addr + assert int(alloc_size) == 512 * 4096 # We allocated 512 pages in the test - if "page_migrate" in r: - assert r["page_migrate"]["from_node"] in node_ids - assert r["page_migrate"]["to_node"] in node_ids - assert r["page_migrate"]["prefetch_node"] in node_ids - assert r["page_migrate"]["preferred_node"] in node_ids - assert r["page_migrate"]["trigger"] >= 0 + # PID must be same + assert len(set(r.pid for r in page_migtation_buffers)) == 1 - if "queue_suspend" in r: - assert r["queue_suspend"]["trigger"] >= 0 - assert r["queue_suspend"]["node_id"] in node_ids + for r in page_migtation_buffers: + op = r.operation - if "unmap_from_gpu" in r: - assert r["unmap_from_gpu"]["trigger"] >= 0 - # unmap is "instantaneous" - assert 0 < r["start_timestamp"] == r["end_timestamp"] - else: - assert 0 < r["start_timestamp"] < r["end_timestamp"] + assert r.size == 160 + assert op != 0 and bf_op_names[op] != "PAGE_MIGRATION_NONE" + assert bf_op_names[op].lower().replace("page_migration_", "") in r.keys() - # Check for events with our page - for r in page_migration_buffers: + if "page_fault_start" in r: + arg = r.page_fault_start + assert arg.read_fault < 2 + validate_node(arg.agent_id, nodes) + assert arg.address > 0 - if "page_migrate" in r and r["page_migrate"]["start_addr"] == start_addr: - assert end_addr == r["page_migrate"]["end_addr"] + if "page_fault_end" in r: + arg = r.page_fault_end + assert arg.migrated < 2 + validate_node(arg.agent_id, nodes) + assert arg.address > 0 - if "unmap_from_gpu" in r and r["unmap_from_gpu"]["start_addr"] == start_addr: - assert end_addr == r["unmap_from_gpu"]["end_addr"] + if "page_migrate_start" in r: + arg = r.page_migrate_start + assert ( + 0 < arg.start_addr < arg.end_addr + ), "Expected start addr to be less than end addr" + if arg.start_addr == start_addr: + assert arg.end_addr == end_addr + validate_node(arg.from_agent, nodes) + validate_node(arg.to_agent, nodes) + validate_node(arg.prefetch_agent, nodes) + validate_node(arg.preferred_agent, nodes) + assert 0 <= arg.trigger < 4 - # TODO: Check if a migrate a->b is paired up with b->a - # It may not always be reported towards app finalization + if "page_migrate_end" in r: + arg = r.page_migrate_end + assert ( + 0 < arg.start_addr < arg.end_addr + ), "Expected start addr to be less than end addr" + if arg.start_addr == start_addr: + assert arg.end_addr == end_addr + validate_node(arg.from_agent, nodes) + validate_node(arg.to_agent, nodes) + assert 0 <= arg.trigger < 4 + + if "queue_eviction" in r: + arg = r.queue_eviction + validate_node(arg.agent_id, nodes) + assert 0 <= arg.trigger < 6 + + if "queue_restore" in r: + arg = r.queue_restore + assert arg.rescheduled < 2 + validate_node(arg.agent_id, nodes) + + if "unmap_from_gpu" in r: + arg = r.unmap_from_gpu + assert ( + 0 < arg.start_addr < arg.end_addr + ), "Expected start addr to be less than end addr" + if arg.start_addr == start_addr: + assert arg.end_addr == end_addr + validate_node(arg.agent_id, nodes) + assert 0 <= arg.trigger < 3 if __name__ == "__main__": diff --git a/tests/tools/json-tool.cpp b/tests/tools/json-tool.cpp index b698de2886..597bf20844 100644 --- a/tests/tools/json-tool.cpp +++ b/tests/tools/json-tool.cpp @@ -1277,7 +1277,7 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data) 0, page_migration_buffer); - constexpr auto message = "page migration service for memory copy configure"; + constexpr auto message = "buffer tracing service for page migration configure"; if(page_migration_status == ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_KERNEL) std::cerr << message << " failed: " << rocprofiler_get_status_string(page_migration_status)