diff --git a/projects/rocprofiler-sdk/samples/api_buffered_tracing/client.cpp b/projects/rocprofiler-sdk/samples/api_buffered_tracing/client.cpp index 1bb2e7428d..9dd5b13b00 100644 --- a/projects/rocprofiler-sdk/samples/api_buffered_tracing/client.cpp +++ b/projects/rocprofiler-sdk/samples/api_buffered_tracing/client.cpp @@ -86,6 +86,22 @@ rocprofiler_buffer_id_t client_buffer = {}; buffer_name_info client_name_info = {}; kernel_symbol_map_t client_kernels = {}; +template +std::string +as_hex(Tp _v, size_t _width = 16) +{ + uintptr_t _vp = 0; + if constexpr(std::is_pointer::value) + _vp = reinterpret_cast(_v); + else + _vp = _v; + + auto _ss = std::stringstream{}; + _ss.fill('0'); + _ss << "0x" << std::hex << std::setw(_width) << _vp; + return _ss.str(); +} + void print_call_stack(const call_stack_t& _call_stack) { @@ -290,43 +306,71 @@ tool_tracing_callback(rocprofiler_context_id_t context, auto info = std::stringstream{}; info << "kind=" << record->kind << ", operation=" << record->operation - << ", pid=" << record->pid << ", start=" << record->start_timestamp - << ", stop=" << record->end_timestamp - << ", name=" << client_name_info.at(record->kind, record->operation); + << ", pid=" << record->pid << ", timestamp=" << record->timestamp + << ", name=" << client_name_info.at(record->kind, record->operation) + << std::boolalpha; switch(record->operation) { - case ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE: + case ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_START: { - info << ", page_fault=(" << record->page_fault.read_fault << ", " - << record->page_fault.migrated << ", " << record->page_fault.node_id - << ", " << std::hex << "0x" << record->page_fault.address << ")"; + const auto& arg = record->args; + info << ", page_migrate_start=(" << as_hex(arg.page_migrate_start.start_addr) + << ", " << as_hex(arg.page_migrate_start.end_addr) << ", " + << arg.page_migrate_start.from_agent.handle << ", " + << arg.page_migrate_start.to_agent.handle << ", " + << arg.page_migrate_start.prefetch_agent.handle << ", " + << arg.page_migrate_start.preferred_agent.handle << ", " + << arg.page_migrate_start.trigger << ")"; break; } - case ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT: + case ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_END: { - info << ", page_migrate=(" << std::hex << "0x" - << record->page_migrate.start_addr << ", 0x" - << record->page_migrate.end_addr << ", " << std::dec - << record->page_migrate.from_node << ", " << record->page_migrate.to_node - << ", " << record->page_migrate.prefetch_node << ", " - << record->page_migrate.preferred_node << ", " - << record->page_migrate.trigger << ")"; + const auto& arg = record->args; + info << ", page_migrate_end=(" << as_hex(arg.page_migrate_end.start_addr) + << ", " << as_hex(arg.page_migrate_end.end_addr) << ", " + << arg.page_migrate_end.from_agent.handle << ", " + << arg.page_migrate_end.to_agent.handle << ", " + << arg.page_migrate_end.trigger << ")"; break; } - case ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND: + case ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT_START: { - info << ", queue_suspend=(" << record->queue_suspend.rescheduled << ", " - << record->queue_suspend.node_id << ", " << record->queue_suspend.trigger - << ")"; + const auto& arg = record->args; + info << ", page_fault_start=(" << arg.page_fault_start.read_fault << ", " + << arg.page_fault_start.agent_id.handle << ", " + << as_hex(arg.page_fault_start.address) << ")"; + break; + } + case ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT_END: + { + const auto& arg = record->args; + info << ", page_fault_end=(" << arg.page_fault_end.migrated << ", " + << arg.page_fault_end.agent_id.handle << ", " + << as_hex(arg.page_fault_end.address) << ")"; + break; + } + case ROCPROFILER_PAGE_MIGRATION_QUEUE_EVICTION: + { + const auto& arg = record->args; + info << ", queue_eviction=(" << arg.queue_eviction.agent_id.handle << ", " + << arg.queue_eviction.trigger << ")"; + break; + } + case ROCPROFILER_PAGE_MIGRATION_QUEUE_RESTORE: + { + const auto& arg = record->args; + info << ", queue_restore=(" << arg.queue_restore.rescheduled << ", " + << arg.queue_restore.agent_id.handle << ")"; break; } case ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU: { - info << ", unmap_from_gpu=(" << record->unmap_from_gpu.node_id << std::hex - << ", 0x" << record->unmap_from_gpu.start_addr << ", 0x" - << record->unmap_from_gpu.end_addr << ", " << std::dec - << record->unmap_from_gpu.trigger << ")"; + const auto& arg = record->args; + info << ", unmap_from_gpu=(" << as_hex(arg.unmap_from_gpu.start_addr) << ", " + << as_hex(arg.unmap_from_gpu.end_addr) << ", " + << arg.unmap_from_gpu.agent_id.handle << ", " << arg.unmap_from_gpu.trigger + << ")"; break; } case ROCPROFILER_PAGE_MIGRATION_NONE: @@ -337,8 +381,7 @@ tool_tracing_callback(rocprofiler_context_id_t context, } } - if(record->start_timestamp > record->end_timestamp) - throw std::runtime_error("page migration: start > end"); + if(record->timestamp == 0) throw std::runtime_error("page migration: timestamp == 0"); static_cast(user_data)->emplace_back( source_location{__FUNCTION__, __FILE__, __LINE__, kind_name + info.str()}); diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/CMakeLists.txt b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/CMakeLists.txt index cbf0d592e1..956a701fed 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/CMakeLists.txt +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/CMakeLists.txt @@ -46,4 +46,5 @@ add_subdirectory(marker) add_subdirectory(openmp) add_subdirectory(rccl) add_subdirectory(cxx) +add_subdirectory(kfd) add_subdirectory(amd_detail) diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/buffer_tracing.h b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/buffer_tracing.h index ae6c67b73a..16b379bb17 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/buffer_tracing.h +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/buffer_tracing.h @@ -25,6 +25,7 @@ #include #include #include +#include #include @@ -37,49 +38,6 @@ ROCPROFILER_EXTERN_C_INIT * @{ */ -/** - * @brief Page migration triggers - * - */ -typedef enum -{ - ROCPROFILER_PAGE_MIGRATION_TRIGGER_NONE = -1, - ROCPROFILER_PAGE_MIGRATION_TRIGGER_PREFETCH, - ROCPROFILER_PAGE_MIGRATION_TRIGGER_PAGEFAULT_GPU, - ROCPROFILER_PAGE_MIGRATION_TRIGGER_PAGEFAULT_CPU, - ROCPROFILER_PAGE_MIGRATION_TRIGGER_TTM_EVICTION, - ROCPROFILER_PAGE_MIGRATION_TRIGGER_LAST, -} rocprofiler_page_migration_trigger_t; - -/** - * @brief Page migration triggers causing the queue to suspend - * - */ -typedef enum -{ - ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_NONE = -1, - ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_SVM, - ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_USERPTR, - ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_TTM, - ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_SUSPEND, - ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_CRIU_CHECKPOINT, - ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_CRIU_RESTORE, - ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_LAST, -} rocprofiler_page_migration_queue_suspend_trigger_t; - -/** - * @brief Page migration triggers causing an unmap from the GPU - * - */ -typedef enum -{ - ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_NONE = -1, - ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_MMU_NOTIFY, - ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_MMU_NOTIFY_MIGRATE, - ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_UNMAP_FROM_CPU, - ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_LAST, -} rocprofiler_page_migration_unmap_from_gpu_trigger_t; - /** * @brief ROCProfiler Buffer HSA API Tracer Record. */ @@ -265,63 +223,17 @@ typedef struct rocprofiler_buffer_tracing_kernel_dispatch_record_t /// dispatch } rocprofiler_buffer_tracing_kernel_dispatch_record_t; -typedef struct -{ - uint8_t read_fault : 1; ///< Is the fault due to a read or a write - uint8_t migrated : 1; - uint32_t node_id; ///< GPU or CPU node ID which reports a page fault - uint64_t address; ///< Address access that caused the page fault -} rocprofiler_buffer_tracing_page_migration_page_fault_record_t; - -typedef struct -{ - uint64_t start_addr; ///< Start address of the page being migrated - uint64_t end_addr; ///< End address of the page being migrated - uint32_t from_node; ///< Source node - uint32_t to_node; ///< Destination node - uint32_t prefetch_node; ///< Node from which page was prefetched - uint32_t preferred_node; ///< Preferred destinaion node - rocprofiler_page_migration_trigger_t trigger; ///< Cause of migration -} rocprofiler_buffer_tracing_page_migration_page_migrate_record_t; - -typedef struct -{ - uint8_t rescheduled : 1; - uint32_t node_id; ///< GPU node from which the queue was suspended - rocprofiler_page_migration_queue_suspend_trigger_t trigger; ///< Cause of queue suspension -} rocprofiler_buffer_tracing_page_migration_queue_suspend_record_t; - -typedef struct -{ - uint32_t node_id; ///< Node ID from which page was unmapped - uint64_t start_addr; ///< Start address of unmapped page - uint64_t end_addr; ///< End address of unmapped page - rocprofiler_page_migration_unmap_from_gpu_trigger_t trigger; ///< Cause of unmap -} rocprofiler_buffer_tracing_page_migration_unmap_from_gpu_record_t; - /** * @brief ROCProfiler Buffer Page Migration Tracer Record */ typedef struct rocprofiler_buffer_tracing_page_migration_record_t { - uint64_t size; ///< size of this struct - rocprofiler_buffer_tracing_kind_t kind; ///< ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION - rocprofiler_tracing_operation_t operation; - rocprofiler_timestamp_t start_timestamp; ///< start time in nanoseconds - rocprofiler_timestamp_t end_timestamp; ///< end time in nanoseconds - uint32_t pid; - - union - { - rocprofiler_buffer_tracing_page_migration_page_fault_record_t page_fault; - rocprofiler_buffer_tracing_page_migration_page_migrate_record_t page_migrate; - rocprofiler_buffer_tracing_page_migration_queue_suspend_record_t queue_suspend; - rocprofiler_buffer_tracing_page_migration_unmap_from_gpu_record_t unmap_from_gpu; - struct - { - uint64_t reserved[12]; - }; - }; + uint64_t size; ///< size of this struct + rocprofiler_buffer_tracing_kind_t kind; ///< ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION + rocprofiler_page_migration_operation_t operation; + rocprofiler_timestamp_t timestamp; ///< start time in nanoseconds + uint32_t pid; + rocprofiler_page_migration_args_t args; } rocprofiler_buffer_tracing_page_migration_record_t; /** diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/cxx/hash.hpp b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/cxx/hash.hpp index 774942585a..f5e5306b47 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/cxx/hash.hpp +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/cxx/hash.hpp @@ -24,6 +24,7 @@ #pragma once #include +#include #include namespace rocprofiler diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/cxx/serialization.hpp b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/cxx/serialization.hpp index a06893e16b..a8cdac371c 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/cxx/serialization.hpp +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/cxx/serialization.hpp @@ -59,6 +59,7 @@ #include #include +#include #include #define ROCP_SDK_SAVE_DATA_FIELD(FIELD) ar(make_nvp(#FIELD, data.FIELD)) @@ -460,6 +461,122 @@ save(ArchiveT& ar, rocprofiler_buffer_tracing_memory_copy_record_t data) ROCP_SDK_SAVE_DATA_FIELD(bytes); } +template +void +save(ArchiveT& ar, const rocprofiler_page_migration_page_fault_start_t& data) +{ + ROCP_SDK_SAVE_DATA_BITFIELD("read_fault", read_fault); + ROCP_SDK_SAVE_DATA_FIELD(agent_id); + ROCP_SDK_SAVE_DATA_FIELD(address); +} + +template +void +save(ArchiveT& ar, const rocprofiler_page_migration_page_fault_end_t& data) +{ + ROCP_SDK_SAVE_DATA_BITFIELD("migrated", migrated); + ROCP_SDK_SAVE_DATA_FIELD(agent_id); + ROCP_SDK_SAVE_DATA_FIELD(address); +} + +template +void +save(ArchiveT& ar, const rocprofiler_page_migration_page_migrate_start_t& data) +{ + ROCP_SDK_SAVE_DATA_FIELD(start_addr); + ROCP_SDK_SAVE_DATA_FIELD(end_addr); + ROCP_SDK_SAVE_DATA_FIELD(from_agent); + ROCP_SDK_SAVE_DATA_FIELD(to_agent); + ROCP_SDK_SAVE_DATA_FIELD(prefetch_agent); + ROCP_SDK_SAVE_DATA_FIELD(preferred_agent); + ROCP_SDK_SAVE_DATA_FIELD(trigger); +} + +template +void +save(ArchiveT& ar, const rocprofiler_page_migration_page_migrate_end_t& data) +{ + ROCP_SDK_SAVE_DATA_FIELD(start_addr); + ROCP_SDK_SAVE_DATA_FIELD(end_addr); + ROCP_SDK_SAVE_DATA_FIELD(from_agent); + ROCP_SDK_SAVE_DATA_FIELD(to_agent); + ROCP_SDK_SAVE_DATA_FIELD(trigger); +} + +template +void +save(ArchiveT& ar, const rocprofiler_page_migration_queue_eviction_t& data) +{ + ROCP_SDK_SAVE_DATA_FIELD(agent_id); + ROCP_SDK_SAVE_DATA_FIELD(trigger); +} + +template +void +save(ArchiveT& ar, const rocprofiler_page_migration_queue_restore_t& data) +{ + ROCP_SDK_SAVE_DATA_BITFIELD("rescheduled", rescheduled); + ROCP_SDK_SAVE_DATA_FIELD(agent_id); +} + +template +void +save(ArchiveT& ar, const rocprofiler_page_migration_unmap_from_gpu_t& data) +{ + ROCP_SDK_SAVE_DATA_FIELD(start_addr); + ROCP_SDK_SAVE_DATA_FIELD(end_addr); + ROCP_SDK_SAVE_DATA_FIELD(agent_id); + ROCP_SDK_SAVE_DATA_FIELD(trigger); +} + +namespace details +{ +template +struct save_page_migration_arg; + +#define ROCP_SDK_SPECIALIZE_PAGE_MIGRATION_ARG(ENUM_VALUE, UNION_ARG) \ + template <> \ + struct save_page_migration_arg \ + { \ + static constexpr auto value = ROCPROFILER_PAGE_MIGRATION_##ENUM_VALUE; \ + template \ + void operator()(ArchiveT& ar, rocprofiler_page_migration_args_t args) \ + { \ + ar(make_nvp(#UNION_ARG, args.UNION_ARG)); \ + } \ + }; + +ROCP_SDK_SPECIALIZE_PAGE_MIGRATION_ARG(NONE, none) +ROCP_SDK_SPECIALIZE_PAGE_MIGRATION_ARG(PAGE_MIGRATE_START, page_migrate_start) +ROCP_SDK_SPECIALIZE_PAGE_MIGRATION_ARG(PAGE_MIGRATE_END, page_migrate_end) +ROCP_SDK_SPECIALIZE_PAGE_MIGRATION_ARG(PAGE_FAULT_START, page_fault_start) +ROCP_SDK_SPECIALIZE_PAGE_MIGRATION_ARG(PAGE_FAULT_END, page_fault_end) +ROCP_SDK_SPECIALIZE_PAGE_MIGRATION_ARG(QUEUE_EVICTION, queue_eviction) +ROCP_SDK_SPECIALIZE_PAGE_MIGRATION_ARG(QUEUE_RESTORE, queue_restore) +ROCP_SDK_SPECIALIZE_PAGE_MIGRATION_ARG(UNMAP_FROM_GPU, unmap_from_gpu) + +#undef ROCP_SDK_SPECIALIZE_PAGE_MIGRATION_ARG + +template +void +save_page_migration_args(ArchiveT& ar, + rocprofiler_page_migration_operation_t op, + rocprofiler_page_migration_args_t args, + std::index_sequence) +{ + using save_page_migration_type = save_page_migration_arg; + if(op == save_page_migration_type::value) + { + if constexpr(save_page_migration_type::value != ROCPROFILER_PAGE_MIGRATION_NONE) + save_page_migration_type{}(ar, args); + } + else if constexpr(sizeof...(IdxTail) > 0) + { + save_page_migration_args(ar, op, args, std::index_sequence{}); + } +} +} // namespace details + template void save(ArchiveT& ar, const rocprofiler_buffer_tracing_page_migration_record_t& data) @@ -467,81 +584,10 @@ save(ArchiveT& ar, const rocprofiler_buffer_tracing_page_migration_record_t& dat ROCP_SDK_SAVE_DATA_FIELD(size); ROCP_SDK_SAVE_DATA_FIELD(kind); ROCP_SDK_SAVE_DATA_FIELD(operation); - ROCP_SDK_SAVE_DATA_FIELD(start_timestamp); - ROCP_SDK_SAVE_DATA_FIELD(end_timestamp); + ROCP_SDK_SAVE_DATA_FIELD(timestamp); ROCP_SDK_SAVE_DATA_FIELD(pid); - - switch(data.operation) - { - case ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT: - { - ar(make_nvp("page_fault", data.page_fault)); - break; - } - case ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE: - { - ar(make_nvp("page_migrate", data.page_migrate)); - break; - } - case ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND: - { - ar(make_nvp("queue_suspend", data.queue_suspend)); - break; - } - case ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU: - { - ar(make_nvp("unmap_from_gpu", data.unmap_from_gpu)); - break; - } - case ROCPROFILER_PAGE_MIGRATION_NONE: - case ROCPROFILER_PAGE_MIGRATION_LAST: - { - throw std::runtime_error{"unsupported page migration operation type"}; - break; - } - } -} - -template -void -save(ArchiveT& ar, const rocprofiler_buffer_tracing_page_migration_page_fault_record_t& data) -{ - ROCP_SDK_SAVE_DATA_FIELD(node_id); - ROCP_SDK_SAVE_DATA_FIELD(address); - ROCP_SDK_SAVE_DATA_FIELD(read_fault); - ROCP_SDK_SAVE_DATA_FIELD(migrated); -} - -template -void -save(ArchiveT& ar, const rocprofiler_buffer_tracing_page_migration_page_migrate_record_t& data) -{ - ROCP_SDK_SAVE_DATA_FIELD(start_addr); - ROCP_SDK_SAVE_DATA_FIELD(end_addr); - ROCP_SDK_SAVE_DATA_FIELD(from_node); - ROCP_SDK_SAVE_DATA_FIELD(to_node); - ROCP_SDK_SAVE_DATA_FIELD(prefetch_node); - ROCP_SDK_SAVE_DATA_FIELD(preferred_node); - ROCP_SDK_SAVE_DATA_FIELD(trigger); -} - -template -void -save(ArchiveT& ar, const rocprofiler_buffer_tracing_page_migration_queue_suspend_record_t& data) -{ - ROCP_SDK_SAVE_DATA_FIELD(node_id); - ROCP_SDK_SAVE_DATA_FIELD(trigger); - ROCP_SDK_SAVE_DATA_FIELD(rescheduled); -} - -template -void -save(ArchiveT& ar, const rocprofiler_buffer_tracing_page_migration_unmap_from_gpu_record_t& data) -{ - ROCP_SDK_SAVE_DATA_FIELD(node_id); - ROCP_SDK_SAVE_DATA_FIELD(start_addr); - ROCP_SDK_SAVE_DATA_FIELD(end_addr); - ROCP_SDK_SAVE_DATA_FIELD(trigger); + details::save_page_migration_args( + ar, data.operation, data.args, std::make_index_sequence{}); } template diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/fwd.h b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/fwd.h index a61f074d7b..1c2297b081 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/fwd.h +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/fwd.h @@ -24,8 +24,6 @@ #include -#include - #include #include @@ -226,21 +224,6 @@ typedef enum // NOLINT(performance-enum-size) ROCPROFILER_MEMORY_COPY_LAST, } rocprofiler_memory_copy_operation_t; -/** - * @brief Page migration event. - */ -typedef enum // NOLINT(performance-enum-size) -{ - ROCPROFILER_PAGE_MIGRATION_NONE = 0, ///< Unknown event - ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE, - ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT, - ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND, - ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU, - // Any and all events, from all processes. Requires superuser - // ROCPROFILER_PAGE_MIGRATION_ANY_ALL_PROCESSES, - ROCPROFILER_PAGE_MIGRATION_LAST, -} rocprofiler_page_migration_operation_t; - /** * @brief ROCProfiler Kernel Dispatch Tracing Operation Types. */ @@ -321,6 +304,22 @@ typedef enum // NOLINT(performance-enum-size) ROCPROFILER_BUFFER_POLICY_LAST, } rocprofiler_buffer_policy_t; +/** + * @brief Page migration event. + */ +typedef enum // NOLINT(performance-enum-size) +{ + ROCPROFILER_PAGE_MIGRATION_NONE = 0, ///< Unknown event + ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_START, + ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_END, + ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT_START, + ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT_END, + ROCPROFILER_PAGE_MIGRATION_QUEUE_EVICTION, + ROCPROFILER_PAGE_MIGRATION_QUEUE_RESTORE, + ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU, + ROCPROFILER_PAGE_MIGRATION_LAST, +} rocprofiler_page_migration_operation_t; + /** * @brief Scratch event kind */ @@ -333,19 +332,6 @@ typedef enum ROCPROFILER_SCRATCH_MEMORY_LAST, } rocprofiler_scratch_memory_operation_t; -/** - * @brief Allocation flags for @see rocprofiler_buffer_tracing_scratch_memory_record_t - */ -typedef enum -{ - ROCPROFILER_SCRATCH_ALLOC_FLAG_NONE = 0, - ROCPROFILER_SCRATCH_ALLOC_FLAG_USE_ONCE = - HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_USE_ONCE, ///< This scratch allocation is only valid for 1 - ///< dispatch. - ROCPROFILER_SCRATCH_ALLOC_FLAG_ALT = - HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_ALT, ///< Used alternate scratch instead of main scratch -} rocprofiler_scratch_alloc_flag_t; - /** * @brief Enumeration for specifying runtime libraries supported by rocprofiler. This enumeration is * used for thread creation callbacks. @see INTERNAL_THREADING. diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hsa.h b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hsa.h index df075a6e18..0fda1d75b3 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hsa.h +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hsa.h @@ -39,6 +39,7 @@ #include #include #include +#include #include #if defined(ROCPROFILER_DEFINED_AMD_INTERNAL_BUILD) && ROCPROFILER_DEFINED_AMD_INTERNAL_BUILD > 0 diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hsa/CMakeLists.txt b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hsa/CMakeLists.txt index 8801e5c22a..f4229b9631 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hsa/CMakeLists.txt +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hsa/CMakeLists.txt @@ -12,6 +12,7 @@ set(ROCPROFILER_HSA_HEADER_FILES finalize_ext_api_id.h image_ext_api_id.h scratch_memory_args.h + scratch_memory_id.h table_id.h) install( diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hsa/scratch_memory_id.h b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hsa/scratch_memory_id.h new file mode 100644 index 0000000000..ce6fe4de78 --- /dev/null +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/hsa/scratch_memory_id.h @@ -0,0 +1,39 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#include + +/** + * @brief Allocation flags for @see rocprofiler_buffer_tracing_scratch_memory_record_t + */ +// NOLINTNEXTLINE(performance-enum-size) +typedef enum +{ + ROCPROFILER_SCRATCH_ALLOC_FLAG_NONE = 0, + ROCPROFILER_SCRATCH_ALLOC_FLAG_USE_ONCE = + HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_USE_ONCE, ///< This scratch allocation is only valid for 1 + ///< dispatch. + ROCPROFILER_SCRATCH_ALLOC_FLAG_ALT = + HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_ALT, ///< Used alternate scratch instead of main scratch +} rocprofiler_scratch_alloc_flag_t; diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/kfd/CMakeLists.txt b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/kfd/CMakeLists.txt new file mode 100644 index 0000000000..2a63111257 --- /dev/null +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/kfd/CMakeLists.txt @@ -0,0 +1,11 @@ +# +# +# Installation of public KFD headers +# +# +set(ROCPROFILER_KFD_HEADER_FILES page_migration_args.h page_migration_id.h) + +install( + FILES ${ROCPROFILER_KFD_HEADER_FILES} + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/rocprofiler-sdk/kfd + COMPONENT development) diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/kfd/page_migration_args.h b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/kfd/page_migration_args.h new file mode 100644 index 0000000000..d1eed33d9f --- /dev/null +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/kfd/page_migration_args.h @@ -0,0 +1,104 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#include +#include + +#include + +ROCPROFILER_EXTERN_C_INIT + +typedef struct rocprofiler_page_migration_none_t +{ + char empty; +} rocprofiler_page_migration_none_t; + +typedef struct rocprofiler_page_migration_page_migrate_start_t +{ + uint64_t start_addr; + uint64_t end_addr; + rocprofiler_agent_id_t from_agent; + rocprofiler_agent_id_t to_agent; + rocprofiler_agent_id_t prefetch_agent; + rocprofiler_agent_id_t preferred_agent; + rocprofiler_page_migration_trigger_t trigger; +} rocprofiler_page_migration_page_migrate_start_t; + +typedef struct rocprofiler_page_migration_page_migrate_end_t +{ + uint64_t start_addr; + uint64_t end_addr; + rocprofiler_agent_id_t from_agent; + rocprofiler_agent_id_t to_agent; + rocprofiler_page_migration_trigger_t trigger; +} rocprofiler_page_migration_page_migrate_end_t; + +typedef struct rocprofiler_page_migration_page_fault_start_t +{ + uint32_t read_fault : 1; + rocprofiler_agent_id_t agent_id; + uint64_t address; +} rocprofiler_page_migration_page_fault_start_t; + +typedef struct rocprofiler_page_migration_page_fault_end_t +{ + uint32_t migrated : 1; + rocprofiler_agent_id_t agent_id; + uint64_t address; +} rocprofiler_page_migration_page_fault_end_t; + +typedef struct rocprofiler_page_migration_queue_eviction_t +{ + rocprofiler_agent_id_t agent_id; + rocprofiler_page_migration_queue_suspend_trigger_t trigger; +} rocprofiler_page_migration_queue_eviction_t; + +typedef struct rocprofiler_page_migration_queue_restore_t +{ + uint32_t rescheduled : 1; + rocprofiler_agent_id_t agent_id; +} rocprofiler_page_migration_queue_restore_t; + +typedef struct rocprofiler_page_migration_unmap_from_gpu_t +{ + uint64_t start_addr; + uint64_t end_addr; + rocprofiler_agent_id_t agent_id; + rocprofiler_page_migration_unmap_from_gpu_trigger_t trigger; +} rocprofiler_page_migration_unmap_from_gpu_t; + +typedef union +{ + rocprofiler_page_migration_none_t none; + rocprofiler_page_migration_page_migrate_start_t page_migrate_start; + rocprofiler_page_migration_page_migrate_end_t page_migrate_end; + rocprofiler_page_migration_page_fault_start_t page_fault_start; + rocprofiler_page_migration_page_fault_end_t page_fault_end; + rocprofiler_page_migration_queue_eviction_t queue_eviction; + rocprofiler_page_migration_queue_restore_t queue_restore; + rocprofiler_page_migration_unmap_from_gpu_t unmap_from_gpu; + uint64_t reserved[16]; +} rocprofiler_page_migration_args_t; + +ROCPROFILER_EXTERN_C_FINI diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/kfd/page_migration_id.h b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/kfd/page_migration_id.h new file mode 100644 index 0000000000..7f21c01675 --- /dev/null +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/kfd/page_migration_id.h @@ -0,0 +1,83 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#include +#include +#include +#include +#include + +#include + +ROCPROFILER_EXTERN_C_INIT + +/** + * @brief Page migration triggers + * + */ +typedef enum +{ + ROCPROFILER_PAGE_MIGRATION_TRIGGER_NONE = -1, + ROCPROFILER_PAGE_MIGRATION_TRIGGER_PREFETCH, ///< Migration triggered by a prefetch + ROCPROFILER_PAGE_MIGRATION_TRIGGER_PAGEFAULT_GPU, ///< Triggered by a page fault on the GPU + ROCPROFILER_PAGE_MIGRATION_TRIGGER_PAGEFAULT_CPU, ///< Triggered by a page fault on the CPU + ROCPROFILER_PAGE_MIGRATION_TRIGGER_TTM_EVICTION, ///< Page evicted by linux TTM (Translation + ///< Table Manager) + ROCPROFILER_PAGE_MIGRATION_TRIGGER_LAST, +} rocprofiler_page_migration_trigger_t; + +/** + * @brief Page migration triggers causing the queue to suspend + * + */ +typedef enum +{ + ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_NONE = -1, + ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_SVM, + ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_USERPTR, + ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_TTM, ///< Queue suspended by TTM (Translation + ///< Table Manager) operation + ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_SUSPEND, + ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_CRIU_CHECKPOINT, ///< Queues evicted due to + ///< process save + ///< (checkpoint) by CRIU + ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_CRIU_RESTORE, ///< Queues restored during + ///< process restore by CRIU + ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_LAST, +} rocprofiler_page_migration_queue_suspend_trigger_t; + +/** + * @brief Page migration triggers causing an unmap from the GPU + * + */ +typedef enum +{ + ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_NONE = -1, + ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_MMU_NOTIFY, + ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_MMU_NOTIFY_MIGRATE, + ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_UNMAP_FROM_CPU, + ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_LAST, +} rocprofiler_page_migration_unmap_from_gpu_trigger_t; + +ROCPROFILER_EXTERN_C_FINI diff --git a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/rocprofiler.h b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/rocprofiler.h index 1681cef966..e129b92f12 100644 --- a/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/rocprofiler.h +++ b/projects/rocprofiler-sdk/source/include/rocprofiler-sdk/rocprofiler.h @@ -77,7 +77,7 @@ ROCPROFILER_EXTERN_C_FINI #include "rocprofiler-sdk/hsa.h" #include "rocprofiler-sdk/intercept_table.h" #include "rocprofiler-sdk/internal_threading.h" -// #include "rocprofiler-sdk/marker.h" +#include "rocprofiler-sdk/marker.h" #include "rocprofiler-sdk/pc_sampling.h" #include "rocprofiler-sdk/profile_config.h" // #include "rocprofiler-sdk/spm.h" diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/page_migration/CMakeLists.txt b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/page_migration/CMakeLists.txt index f72dd12e2a..a789aeac64 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/page_migration/CMakeLists.txt +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/page_migration/CMakeLists.txt @@ -1,6 +1,6 @@ # # -set(ROCPROFILER_LIB_UVM_SOURCES page_migration.cpp) +set(ROCPROFILER_LIB_UVM_SOURCES abi.cpp page_migration.cpp) set(ROCPROFILER_LIB_UVM_HEADERS defines.hpp page_migration.hpp utils.hpp) target_sources(rocprofiler-sdk-object-library PRIVATE ${ROCPROFILER_LIB_UVM_SOURCES} diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/page_migration/abi.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/page_migration/abi.cpp new file mode 100644 index 0000000000..6293474040 --- /dev/null +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/page_migration/abi.cpp @@ -0,0 +1,106 @@ +// MIT License +// +// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include + +#include "lib/common/container/small_vector.hpp" +#include "lib/common/defines.hpp" +#include "lib/common/mpl.hpp" +#include "lib/rocprofiler-sdk/details/kfd_ioctl.h" +#include "lib/rocprofiler-sdk/page_migration/utils.hpp" + +#define ASSERT_SAME(A, B) static_assert(static_cast(A) == static_cast(B)) + +#define ROCPROFILER_LIB_ROCPROFILER_SDK_PAGE_MIGRATION_PAGE_MIGRATION_CPP_IMPL 1 +#include "lib/rocprofiler-sdk/page_migration/page_migration.def.cpp" +#undef ROCPROFILER_LIB_ROCPROFILER_SDK_PAGE_MIGRATION_PAGE_MIGRATION_CPP_IMPL + +namespace rocprofiler +{ +namespace page_migration +{ +using namespace rocprofiler::page_migration; +using namespace rocprofiler::common::container; +using rocprofiler_page_migration_seq_t = std::make_index_sequence; + +static_assert(KFD_SMI_EVENT_NONE == 0); +static_assert(KFD_SMI_EVENT_MIGRATE_START == 5); +static_assert(KFD_SMI_EVENT_MIGRATE_END == 6); +static_assert(KFD_SMI_EVENT_PAGE_FAULT_START == 7); +static_assert(KFD_SMI_EVENT_PAGE_FAULT_END == 8); +static_assert(KFD_SMI_EVENT_QUEUE_EVICTION == 9); +static_assert(KFD_SMI_EVENT_QUEUE_RESTORE == 10); +static_assert(KFD_SMI_EVENT_UNMAP_FROM_GPU == 11); +static_assert(KFD_SMI_EVENT_ALL_PROCESS == 64); + +// Update page_migration.def.cpp with event mappings +// Update page_migration.cpp to parse and report new event +static_assert(ROCPROFILER_PAGE_MIGRATION_LAST == 8, + "New event added, update KFD to ROCPROFILER mappings"); + +// clang-format off +ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_TRIGGER_PAGEFAULT_GPU, KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU ); +ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_TRIGGER_PAGEFAULT_CPU, KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU ); +ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_TRIGGER_TTM_EVICTION, KFD_MIGRATE_TRIGGER_TTM_EVICTION ); +ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_SVM, KFD_QUEUE_EVICTION_TRIGGER_SVM ); +ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_USERPTR, KFD_QUEUE_EVICTION_TRIGGER_USERPTR ); +ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_TTM, KFD_QUEUE_EVICTION_TRIGGER_TTM ); +ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_SUSPEND, KFD_QUEUE_EVICTION_TRIGGER_SUSPEND ); +ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_CRIU_CHECKPOINT, KFD_QUEUE_EVICTION_CRIU_CHECKPOINT ); +ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_CRIU_RESTORE, KFD_QUEUE_EVICTION_CRIU_RESTORE ); +ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_MMU_NOTIFY, KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY ); +ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_MMU_NOTIFY_MIGRATE, KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY_MIGRATE); +ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_UNMAP_FROM_CPU, KFD_SVM_UNMAP_TRIGGER_UNMAP_FROM_CPU ); +// clang-format on + +static_assert(kfd_bitmask(std::index_sequence()) == + (KFD_SMI_EVENT_MASK_FROM_INDEX(KFD_SMI_EVENT_PAGE_FAULT_START) | + KFD_SMI_EVENT_MASK_FROM_INDEX(KFD_SMI_EVENT_MIGRATE_END) | + KFD_SMI_EVENT_MASK_FROM_INDEX(KFD_SMI_EVENT_UNMAP_FROM_GPU))); + +static_assert((page_migration_info::kfd_bitmask | + page_migration_info::kfd_bitmask | + page_migration_info::kfd_bitmask) == + (KFD_SMI_EVENT_MASK_FROM_INDEX(KFD_SMI_EVENT_MIGRATE_END) | + KFD_SMI_EVENT_MASK_FROM_INDEX(KFD_SMI_EVENT_QUEUE_EVICTION) | + KFD_SMI_EVENT_MASK_FROM_INDEX(KFD_SMI_EVENT_UNMAP_FROM_GPU))); + +static_assert(kfd_to_rocprof_op(KFD_SMI_EVENT_MIGRATE_START, rocprofiler_page_migration_seq_t{}) == + ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_START); +static_assert(kfd_to_rocprof_op(KFD_SMI_EVENT_MIGRATE_END, rocprofiler_page_migration_seq_t{}) == + ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_END); +static_assert(kfd_to_rocprof_op(KFD_SMI_EVENT_PAGE_FAULT_START, + rocprofiler_page_migration_seq_t{}) == + ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT_START); +static_assert(kfd_to_rocprof_op(KFD_SMI_EVENT_PAGE_FAULT_END, rocprofiler_page_migration_seq_t{}) == + ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT_END); +static_assert(kfd_to_rocprof_op(KFD_SMI_EVENT_QUEUE_EVICTION, rocprofiler_page_migration_seq_t{}) == + ROCPROFILER_PAGE_MIGRATION_QUEUE_EVICTION); +static_assert(kfd_to_rocprof_op(KFD_SMI_EVENT_QUEUE_RESTORE, rocprofiler_page_migration_seq_t{}) == + ROCPROFILER_PAGE_MIGRATION_QUEUE_RESTORE); +static_assert(kfd_to_rocprof_op(KFD_SMI_EVENT_UNMAP_FROM_GPU, rocprofiler_page_migration_seq_t{}) == + ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU); + +} // namespace page_migration +} // namespace rocprofiler diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/page_migration/defines.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/page_migration/defines.hpp index 28efe6c5cc..6719e0d68c 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/page_migration/defines.hpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/page_migration/defines.hpp @@ -44,43 +44,17 @@ } \ } while(0) -#define APPEND_UVM_EVENT(X) ROCPROFILER_UVM_EVENT_##X -#define APPEND_1(X) APPEND_UVM_EVENT(X) -#define CONCAT(X, Y) X##Y -#define APPEND_2(A1, A2) APPEND_1(A1), APPEND_1(A2) -#define APPEND_3(A1, A2, A3) APPEND_2(A1, A2), APPEND_1(A3) -#define APPEND_4(A1, A2, A3, A4) APPEND_3(A1, A2, A3), APPEND_1(A4) -#define APPEND_5(A1, A2, A3, A4, A5) APPEND_4(A1, A2, A3, A4), APPEND_1(A5) - -#define MACRO_N(MACRO, N, ...) CONCAT(MACRO, N)(__VA_ARGS__) -#define APPLY_N(MACRO, ...) MACRO_N(MACRO, IMPL_DETAIL_FOR_EACH_NARG(__VA_ARGS__), __VA_ARGS__) - -#define GET_UVM_ENUMS(...) APPLY_N(APPEND_, __VA_ARGS__) - -// static constexpr size_t uvm_event = UVM_ENUM; -#define SPECIALIZE_UVM_KFD_EVENT(UVM_ENUM, KFD_ENUM, FORMAT_STRING) \ - template <> \ - struct uvm_event_info \ - { \ - static constexpr size_t kfd_event = KFD_ENUM; \ - static constexpr std::string_view format_str{FORMAT_STRING}; \ +#define SPECIALIZE_PAGE_MIGRATION_INFO(ROCPROF_NAME, KFD_NAME, FORMAT_STRING) \ + template <> \ + struct page_migration_info \ + { \ + static constexpr auto name = "PAGE_MIGRATION_" #ROCPROF_NAME; \ + static constexpr size_t operation = ROCPROFILER_PAGE_MIGRATION_##ROCPROF_NAME; \ + static constexpr size_t kfd_operation = KFD_SMI_EVENT_##KFD_NAME; \ + static constexpr size_t kfd_bitmask = bitmask(KFD_SMI_EVENT_##KFD_NAME); \ + static constexpr std::string_view format_str = FORMAT_STRING; \ }; -#define SPECIALIZE_PAGE_MIGRATION_INFO(TYPE, ...) \ - template <> \ - struct page_migration_info \ - { \ - static constexpr auto operation_idx = ROCPROFILER_PAGE_MIGRATION_##TYPE; \ - static constexpr auto name = "PAGE_MIGRATION_" #TYPE; \ - static constexpr size_t uvm_bitmask = \ - bitmask(std::index_sequence()); \ - static constexpr size_t kfd_bitmask = \ - to_kfd_bitmask(std::index_sequence()); \ - } - -#define COPY_FROM_START_1(MEMBER) end.MEMBER = start.MEMBER; -#define COPY_FROM_START_2(UNION_TYPE, MEMBER) end.UNION_TYPE.MEMBER = start.UNION_TYPE.MEMBER; - #define SPECIALIZE_KFD_IOC_IOCTL(STRUCT, ARG_IOC) \ template <> \ struct IOC_event \ diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/page_migration/page_migration.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/page_migration/page_migration.cpp index 07d2fe39f4..50cb702b25 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/page_migration/page_migration.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/page_migration/page_migration.cpp @@ -21,9 +21,9 @@ // THE SOFTWARE. #include "lib/rocprofiler-sdk/page_migration/page_migration.hpp" +#include "lib/common/logging.hpp" #include "lib/common/mpl.hpp" #include "lib/common/static_object.hpp" -#include "lib/common/utility.hpp" #include "lib/rocprofiler-sdk/agent.hpp" #include "lib/rocprofiler-sdk/buffer.hpp" #include "lib/rocprofiler-sdk/context/context.hpp" @@ -44,14 +44,15 @@ #include #include #include -#include #include +#include #include #include #include #include #include #include +#include #include #include #include @@ -76,25 +77,14 @@ namespace page_migration template using small_vector = common::container::small_vector; -using context_t = context::context; -using context_array_t = common::container::small_vector; -using kfd_event_id_t = decltype(KFD_SMI_EVENT_NONE); -using page_migration_record_t = rocprofiler_buffer_tracing_page_migration_record_t; -using migrate_trigger_t = rocprofiler_page_migration_trigger_t; -using qsuspend_trigger_t = rocprofiler_page_migration_queue_suspend_trigger_t; -using unmap_trigger_t = rocprofiler_page_migration_unmap_from_gpu_trigger_t; +using context_t = context::context; +using context_array_t = common::container::small_vector; -// Parsing and utilities -namespace -{ -using namespace page_migration; +template +struct page_migration_info; -constexpr auto -page_to_bytes(size_t val) -{ - // each page is 4KB = 4096 bytes - return val << 12; -} +template +struct kfd_event_info; template struct page_migration_enum_info; @@ -102,122 +92,88 @@ struct page_migration_enum_info; template struct page_migration_bounds; -#define SPECIALIZE_PM_ENUM_INFO(TYPE, TRIGGER_CATEGORY, NAME) \ - template <> \ - struct page_migration_enum_info \ - { \ - static constexpr auto name = #NAME; \ - }; - -#define SPECIALIZE_PM_ENUM_BOUNDS(TYPE, TRIGGER_CATEGORY) \ - template <> \ - struct page_migration_bounds \ - { \ - static constexpr auto last = ROCPROFILER_PAGE_MIGRATION_##TRIGGER_CATEGORY##_LAST; \ - }; - -using queue_suspend_trigger_t = rocprofiler_page_migration_queue_suspend_trigger_t; -using unmap_from_gpu_trigger_t = rocprofiler_page_migration_unmap_from_gpu_trigger_t; - -SPECIALIZE_PM_ENUM_BOUNDS(rocprofiler_page_migration_trigger_t, TRIGGER) -SPECIALIZE_PM_ENUM_BOUNDS(queue_suspend_trigger_t, QUEUE_SUSPEND_TRIGGER) -SPECIALIZE_PM_ENUM_BOUNDS(unmap_from_gpu_trigger_t, UNMAP_FROM_GPU_TRIGGER) - -SPECIALIZE_PM_ENUM_INFO(rocprofiler_page_migration_trigger_t, TRIGGER, PREFETCH) -SPECIALIZE_PM_ENUM_INFO(rocprofiler_page_migration_trigger_t, TRIGGER, PAGEFAULT_GPU) -SPECIALIZE_PM_ENUM_INFO(rocprofiler_page_migration_trigger_t, TRIGGER, PAGEFAULT_CPU) -SPECIALIZE_PM_ENUM_INFO(rocprofiler_page_migration_trigger_t, TRIGGER, TTM_EVICTION) - -SPECIALIZE_PM_ENUM_INFO(queue_suspend_trigger_t, QUEUE_SUSPEND_TRIGGER, SVM) -SPECIALIZE_PM_ENUM_INFO(queue_suspend_trigger_t, QUEUE_SUSPEND_TRIGGER, USERPTR) -SPECIALIZE_PM_ENUM_INFO(queue_suspend_trigger_t, QUEUE_SUSPEND_TRIGGER, TTM) -SPECIALIZE_PM_ENUM_INFO(queue_suspend_trigger_t, QUEUE_SUSPEND_TRIGGER, SUSPEND) -SPECIALIZE_PM_ENUM_INFO(queue_suspend_trigger_t, QUEUE_SUSPEND_TRIGGER, CRIU_CHECKPOINT) -SPECIALIZE_PM_ENUM_INFO(queue_suspend_trigger_t, QUEUE_SUSPEND_TRIGGER, CRIU_RESTORE) - -SPECIALIZE_PM_ENUM_INFO(unmap_from_gpu_trigger_t, UNMAP_FROM_GPU_TRIGGER, MMU_NOTIFY) -SPECIALIZE_PM_ENUM_INFO(unmap_from_gpu_trigger_t, UNMAP_FROM_GPU_TRIGGER, MMU_NOTIFY_MIGRATE) -SPECIALIZE_PM_ENUM_INFO(unmap_from_gpu_trigger_t, UNMAP_FROM_GPU_TRIGGER, UNMAP_FROM_CPU) - -using trigger_type_list_t = common::mpl::type_list; - -template -std::string_view -to_string_impl(EnumT val, std::index_sequence) +// Parsing and utilities +namespace { - if(val == Idx) return page_migration_enum_info::name; - if constexpr(sizeof...(IdxTail) > 0) - return to_string_impl(val, std::index_sequence{}); - else - return std::string_view{}; -} - -template -std::string_view -to_string(EnumT val, - std::enable_if_t::value && - common::mpl::is_one_of::value, - int> = 0) +constexpr auto +page_to_bytes(size_t val) { - constexpr auto last = page_migration_bounds::last; - return to_string_impl(val, std::make_index_sequence{}); + // each page is 4KB = 4096 bytes + return val << 12; } template -page_migration_record_t parse_uvm_event(std::string_view) +page_migration_record_t parse_event(std::string_view) { - ROCP_FATAL_IF(false) << uvm_event_info::format_str; + ROCP_FATAL_IF(false) << page_migration_info::format_str; return {}; } +auto +get_node_agent_id(uint32_t _node_id) +{ + using agent_id_map_t = std::unordered_map; + static auto*& _data = static_object::construct([]() { + auto _v = std::unordered_map{}; + for(const auto* agent : agent::get_agents()) + _v.emplace(agent->gpu_id, agent->id); + return _v; + }()); + + CHECK(_data != nullptr); + ROCP_FATAL_IF(_data->count(_node_id) == 0) << "page_migration: unknown node id: " << _node_id; + return _data->at(_node_id); +} + template <> page_migration_record_t -parse_uvm_event(std::string_view str) +parse_event(std::string_view str) { - page_migration_record_t rec{}; - auto& e = rec.page_fault; - uint32_t kind{}; + auto rec = page_migration_record_t{}; + auto& e = rec.args.page_fault_start; + uint32_t kind{}; + uint32_t _node_id = 0; char fault; std::sscanf(str.data(), - uvm_event_info::format_str.data(), + page_migration_info::format_str.data(), &kind, - &rec.start_timestamp, + &rec.timestamp, &rec.pid, &e.address, - &e.node_id, + &_node_id, &fault); e.read_fault = (fault == 'R'); e.address = page_to_bytes(e.address); + e.agent_id = get_node_agent_id(_node_id); - ROCP_INFO << fmt::format("Page fault start [ ts: {} pid: {} addr: 0x{:X} node: {} ] \n", - rec.start_timestamp, - rec.pid, - e.address, - e.node_id); + ROCP_TRACE << fmt::format("Page fault start [ ts: {} pid: {} addr: 0x{:X} node: {} ] \n", + rec.timestamp, + rec.pid, + e.address, + e.agent_id.handle); return rec; } template <> page_migration_record_t -parse_uvm_event(std::string_view str) +parse_event(std::string_view str) { - page_migration_record_t rec{}; - auto& e = rec.page_fault; - uint32_t kind{}; + auto rec = page_migration_record_t{}; + auto& e = rec.args.page_fault_end; + uint32_t kind{}; + uint32_t _node_id = 0; char migrated; std::sscanf(str.data(), - uvm_event_info::format_str.data(), + page_migration_info::format_str.data(), &kind, - &rec.end_timestamp, + &rec.timestamp, &rec.pid, &e.address, - &e.node_id, + &_node_id, &migrated); // M or U -> migrated / unmigrated? @@ -225,16 +181,18 @@ parse_uvm_event(std::string_view str) e.migrated = true; else if(migrated == 'U') e.migrated = false; - // else - // throw std::runtime_error("Invalid SVM memory migrate type"); - e.address = page_to_bytes(e.address); + else + ROCP_WARNING << "Unknown PAGE_FAULT_END migrated/unmigrated state"; - ROCP_INFO << fmt::format( + e.address = page_to_bytes(e.address); + e.agent_id = get_node_agent_id(_node_id); + + ROCP_TRACE << fmt::format( "Page fault end [ ts: {} pid: {} addr: 0x{:X} node: {} migrated: {} ] \n", - rec.end_timestamp, + rec.timestamp, rec.pid, e.address, - e.node_id, + e.agent_id.handle, migrated); return rec; @@ -242,507 +200,310 @@ parse_uvm_event(std::string_view str) template <> page_migration_record_t -parse_uvm_event(std::string_view str) +parse_event(std::string_view str) { - page_migration_record_t rec{}; - auto& e = rec.page_migrate; - uint32_t kind{}; - uint32_t trigger{}; + auto rec = page_migration_record_t{}; + auto& e = rec.args.page_migrate_start; + uint32_t kind{}; + uint32_t trigger{}; + uint32_t _from_node = 0; + uint32_t _to_node = 0; + uint32_t _prefetch_node = 0; + uint32_t _preferred_node = 0; - std::sscanf(str.data(), - uvm_event_info::format_str.data(), - &kind, - &rec.start_timestamp, - &rec.pid, - &e.start_addr, - &e.end_addr, - &e.from_node, - &e.to_node, - &e.prefetch_node, - &e.preferred_node, - &trigger); + std::sscanf( + str.data(), + page_migration_info::format_str.data(), + &kind, + &rec.timestamp, + &rec.pid, + &e.start_addr, + &e.end_addr, + &_from_node, + &_to_node, + &_prefetch_node, + &_preferred_node, + &trigger); e.end_addr += e.start_addr; - e.trigger = static_cast(trigger); - e.start_addr = page_to_bytes(e.start_addr); - e.end_addr = page_to_bytes(e.end_addr) - 1; + e.trigger = static_cast(trigger); + e.start_addr = page_to_bytes(e.start_addr); + e.end_addr = page_to_bytes(e.end_addr) - 1; + e.from_agent = get_node_agent_id(_from_node); + e.to_agent = get_node_agent_id(_to_node); + e.prefetch_agent = get_node_agent_id(_prefetch_node); + e.preferred_agent = get_node_agent_id(_preferred_node); - ROCP_INFO << fmt::format( + ROCP_TRACE << fmt::format( "Page migrate start [ ts: {} pid: {} addr s: 0x{:X} addr " "e: 0x{:X} size: {}B from node: {} to node: {} prefetch node: {} preferred node: {} " "trigger: {} ] \n", - rec.start_timestamp, + rec.timestamp, rec.pid, e.start_addr, e.end_addr, (e.end_addr - e.start_addr), - e.from_node, - e.to_node, - e.prefetch_node, - e.preferred_node, - to_string(e.trigger)); + e.from_agent.handle, + e.to_agent.handle, + e.prefetch_agent.handle, + e.preferred_agent.handle, + trigger); return rec; } template <> page_migration_record_t -parse_uvm_event(std::string_view str) +parse_event(std::string_view str) { - page_migration_record_t rec{}; - auto& e = rec.page_migrate; - uint32_t kind{}; - uint32_t trigger{}; + auto rec = page_migration_record_t{}; + auto& e = rec.args.page_migrate_end; + uint32_t kind{}; + uint32_t trigger{}; + uint32_t _from_node = 0; + uint32_t _to_node = 0; std::sscanf(str.data(), - uvm_event_info::format_str.data(), + page_migration_info::format_str.data(), &kind, - &rec.end_timestamp, + &rec.timestamp, &rec.pid, &e.start_addr, &e.end_addr, - &e.from_node, - &e.to_node, + &_from_node, + &_to_node, &trigger); e.end_addr += e.start_addr; e.trigger = static_cast(trigger); e.start_addr = page_to_bytes(e.start_addr); e.end_addr = page_to_bytes(e.end_addr) - 1; + e.from_agent = get_node_agent_id(_from_node); + e.to_agent = get_node_agent_id(_to_node); - ROCP_INFO << fmt::format("Page migrate end [ ts: {} pid: {} addr s: 0x{:X} addr e: " - "0x{:X} from node: {} to node: {} trigger: {} ] \n", - rec.end_timestamp, - rec.pid, - e.start_addr, - e.end_addr, - e.from_node, - e.to_node, - to_string(e.trigger)); + ROCP_TRACE << fmt::format("Page migrate end [ ts: {} pid: {} addr s: 0x{:X} addr e: " + "0x{:X} from node: {} to node: {} trigger: {} ] \n", + rec.timestamp, + rec.pid, + e.start_addr, + e.end_addr, + e.from_agent.handle, + e.to_agent.handle, + trigger); return rec; } template <> page_migration_record_t -parse_uvm_event(std::string_view str) +parse_event(std::string_view str) { - page_migration_record_t rec{}; - auto& e = rec.queue_suspend; - uint32_t kind{}; - uint32_t trigger{}; + auto rec = page_migration_record_t{}; + auto& e = rec.args.queue_eviction; + uint32_t kind{}; + uint32_t trigger{}; + uint32_t _node_id = 0; std::sscanf(str.data(), - uvm_event_info::format_str.data(), + page_migration_info::format_str.data(), &kind, - &rec.start_timestamp, + &rec.timestamp, &rec.pid, - &e.node_id, + &_node_id, &trigger); - rec.queue_suspend.trigger = static_cast(trigger); + e.trigger = static_cast(trigger); + e.agent_id = get_node_agent_id(_node_id); - ROCP_INFO << fmt::format("Queue evict [ ts: {} pid: {} node: {} trigger: {} ] \n", - rec.start_timestamp, - rec.pid, - e.node_id, - to_string(e.trigger)); + ROCP_TRACE << fmt::format("Queue evict [ ts: {} pid: {} node: {} trigger: {} ] \n", + rec.timestamp, + rec.pid, + e.agent_id.handle, + trigger); return rec; } template <> page_migration_record_t -parse_uvm_event(std::string_view str) +parse_event(std::string_view str) { - page_migration_record_t rec{}; - auto& e = rec.queue_suspend; - uint32_t kind{}; + auto rec = page_migration_record_t{}; + auto& e = rec.args.queue_restore; + uint32_t kind{}; + uint32_t _node_id = 0; std::sscanf(str.data(), - uvm_event_info::format_str.data(), + page_migration_info::format_str.data(), &kind, - &rec.end_timestamp, + &rec.timestamp, &rec.pid, - &e.node_id); + &_node_id); // check if we have a valid char at the end. -1 has \0 if(str[str.size() - 2] == 'R') e.rescheduled = true; else e.rescheduled = false; + e.agent_id = get_node_agent_id(_node_id); - ROCP_INFO << fmt::format( - "Queue restore [ ts: {} pid: {} node: {} ] \n", rec.end_timestamp, rec.pid, e.node_id); + ROCP_TRACE << fmt::format( + "Queue restore [ ts: {} pid: {} node: {} ] \n", rec.timestamp, rec.pid, e.agent_id.handle); return rec; } template <> page_migration_record_t -parse_uvm_event(std::string_view str) +parse_event(std::string_view str) { - page_migration_record_t rec{}; - auto& e = rec.unmap_from_gpu; - uint32_t kind{}; - uint32_t trigger{}; + auto rec = page_migration_record_t{}; + auto& e = rec.args.unmap_from_gpu; + uint32_t kind{}; + uint32_t trigger{}; + uint32_t _node_id = 0; std::sscanf(str.data(), - uvm_event_info::format_str.data(), + page_migration_info::format_str.data(), &kind, - &rec.start_timestamp, + &rec.timestamp, &rec.pid, &e.start_addr, &e.end_addr, - &e.node_id, + &_node_id, &trigger); e.end_addr += e.start_addr; - rec.end_timestamp = rec.start_timestamp; - rec.unmap_from_gpu.trigger = static_cast(trigger); - e.start_addr = page_to_bytes(e.start_addr); - e.end_addr = page_to_bytes(e.end_addr); + e.trigger = static_cast(trigger); + e.start_addr = page_to_bytes(e.start_addr); + e.end_addr = page_to_bytes(e.end_addr); + e.agent_id = get_node_agent_id(_node_id); - ROCP_INFO << fmt::format("Unmap from GPU [ ts: {} pid: {} start addr: 0x{:X} end addr: 0x{:X} " - "node: {} trigger {} ] \n", - rec.start_timestamp, - rec.pid, - e.start_addr, - e.end_addr, - e.node_id, - to_string(e.trigger)); + ROCP_TRACE << fmt::format( + "Unmap from GPU [ ts: {} pid: {} start addr: 0x{:X} end addr: 0x{:X} " + "node: {} trigger {} ] \n", + rec.timestamp, + rec.pid, + e.start_addr, + e.end_addr, + e.agent_id.handle, + trigger); return rec; } +template <> +page_migration_record_t parse_event(std::string_view) +{ + throw std::runtime_error( + "ROCPROFILER_PAGE_MIGRATION_NONE for parsing page migration events should not happen"); +} + template page_migration_record_t -parse_uvm_event(uvm_event_id_t event_id, - std::string_view strn, - std::index_sequence) +parse_event(size_t event_id, std::string_view strn, std::index_sequence) { if(OpInx == static_cast(event_id)) { - auto rec = parse_uvm_event(strn); + auto rec = parse_event(strn); rec.size = sizeof(page_migration_record_t); rec.kind = ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION; - rec.operation = to_rocprof_op(OpInx); + rec.operation = static_cast(OpInx); return rec; } - else if constexpr(sizeof...(OpInxs) > 0) - return parse_uvm_event(event_id, strn, std::index_sequence{}); - else - return page_migration_record_t{}; + + if constexpr(sizeof...(OpInxs) > 0) + return parse_event(event_id, strn, std::index_sequence{}); + + return page_migration_record_t{}; } /* -----------------------------------------------------------------------------------*/ -template -void -update_end(const page_migration_record_t& start, page_migration_record_t& end); - -template <> -void -update_end(const page_migration_record_t& start, - page_migration_record_t& end) -{ - CHECK(start.pid == end.pid); - CHECK(start.page_fault.address == end.page_fault.address); - CHECK(start.page_fault.node_id == end.page_fault.node_id); - COPY_FROM_START_1(start_timestamp); - COPY_FROM_START_2(page_fault, migrated); -} - -template <> -void -update_end(const page_migration_record_t& start, - page_migration_record_t& end) -{ - CHECK(start.pid == end.pid); - CHECK(start.page_migrate.start_addr == end.page_migrate.start_addr); - CHECK(start.page_migrate.end_addr == end.page_migrate.end_addr); - CHECK(start.page_migrate.from_node == end.page_migrate.from_node); - CHECK(start.page_migrate.to_node == end.page_migrate.to_node); - CHECK(start.page_migrate.trigger == end.page_migrate.trigger); - COPY_FROM_START_1(start_timestamp); - COPY_FROM_START_2(page_migrate, prefetch_node); - COPY_FROM_START_2(page_migrate, preferred_node); -} - -template <> -void -update_end(const page_migration_record_t& start, - page_migration_record_t& end) -{ - CHECK(start.pid == end.pid); - CHECK(start.queue_suspend.node_id == end.queue_suspend.node_id); - COPY_FROM_START_1(start_timestamp); - COPY_FROM_START_2(queue_suspend, trigger); -} - -/* -----------------------------------------------------------------------------------*/ - -template -uint64_t -get_key(const rocprofiler_buffer_tracing_page_migration_record_t& rec) = delete; - -template <> -uint64_t -get_key( - const rocprofiler_buffer_tracing_page_migration_record_t& rec) -{ - // page migrate, use address as identifier - return rec.page_migrate.start_addr; -} - -template <> -uint64_t -get_key( - const rocprofiler_buffer_tracing_page_migration_record_t& rec) -{ - // page fault, use address as identifier - return rec.page_fault.address; -} - -template <> -uint64_t -get_key( - const rocprofiler_buffer_tracing_page_migration_record_t& rec) -{ - // Queue suspend/evict. Node ID and pid are sufficient as in kfd, - // eviction is reference-counted per process-device. - uint64_t node_id = rec.queue_suspend.node_id; - return (node_id << 32) | rec.pid; -} - -/* -----------------------------------------------------------------------------------*/ - -template <> -page_migration_record_t parse_uvm_event<0>(std::string_view) -{ - throw std::runtime_error("None Op for parsing UVM events should not happen"); -} - -template <> -void -update_end(const page_migration_record_t&, page_migration_record_t&) -{ - throw std::runtime_error("None Op for parsing UVM events should not happen"); -} - -template <> -uint64_t -get_key(const page_migration_record_t&) -{ - throw std::runtime_error("None Op for parsing UVM events should not happen"); -} - -/* -----------------------------------------------------------------------------------*/ - -template -void -update_end(uvm_event_id_t event_id, - const page_migration_record_t& start, - page_migration_record_t& end, - std::index_sequence) -{ - if(OpInx == static_cast(event_id)) - update_end(start, end); - else if constexpr(sizeof...(OpInxs) > 0) - update_end(event_id, start, end, std::index_sequence{}); - else - return; -} - -template -uint64_t -get_key(uvm_event_id_t event_id, - const page_migration_record_t& record, - std::index_sequence) -{ - if constexpr(OpInx == ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU) - return {}; - else if(is_rocprof_uvm_map(event_id)) - return get_key::operation_idx>(record); - else if constexpr(sizeof...(OpInxs) > 0) - return get_key(event_id, record, std::index_sequence{}); - else - return {}; -} - -void -update_end(uvm_event_id_t event_id, - const page_migration_record_t& start, - page_migration_record_t& end) -{ - update_end(event_id, - start, - end, - std::index_sequence{}); -} } // namespace +size_t +get_rocprof_op(const std::string_view event_data) +{ + uint32_t kfd_event_id{}; + std::sscanf(event_data.data(), "%x ", &kfd_event_id); + + auto rocprof_id = + kfd_to_rocprof_op(static_cast(kfd_event_id), + std::make_index_sequence{}); + + ROCP_CI_LOG_IF(WARNING, rocprof_id == 0) + << fmt::format("Failed to parse KFD event ID {}. Parsed ID: {}, SDK ID: {}\n", + event_data[0], + kfd_event_id, + rocprof_id); + + return rocprof_id; +} + +void +kfd_readlines(const std::string_view str, void(handler)(std::string_view)) +{ + const auto find_newline = [&](auto b) { return std::find(b, str.cend(), '\n'); }; + const auto* cursor = str.cbegin(); + + for(const auto* pos = find_newline(cursor); pos != str.cend(); pos = find_newline(cursor)) + { + size_t char_count = pos - cursor; + assert(char_count > 0); + std::string_view event_str{cursor, char_count}; + + ROCP_INFO << fmt::format("KFD event: [{}]", event_str); + handler(event_str); + + cursor = pos + 1; + } +} + // Event capture and reporting namespace { -// Support seems to have been added in kfdv > 1.10 -static_assert(KFD_IOCTL_MAJOR_VERSION == 1, "KFD API major version changed"); -static_assert(KFD_IOCTL_MINOR_VERSION >= 10, "KFD SMI support missing in kfd_ioctl.h"); +constexpr auto kfd_ioctl_version = (1000 * KFD_IOCTL_MAJOR_VERSION) + KFD_IOCTL_MINOR_VERSION; +// Support has been added in kfdv >= 1.10+ +static_assert(kfd_ioctl_version >= 1010, "KFD SMI support missing in kfd_ioctl.h"); -// Convert from public events to KFD enum config - -template -constexpr size_t -kfd_bitmask_impl(size_t uvm_event_id, std::index_sequence) +auto +get_contexts(int operation) { - if(uvm_event_id == OpInx) return page_migration_info::kfd_bitmask; - if constexpr(sizeof...(OpInxs) > 0) - return kfd_bitmask_impl(uvm_event_id, std::index_sequence{}); - else - return 0; -} + auto active_contexts = context::get_active_contexts([](const auto* ctx) { + return (ctx->buffered_tracer && + ctx->buffered_tracer->domains(ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION)); + }); + auto operation_ctxs = context::context_array_t{}; -template -constexpr auto -kfd_bitmask(const small_vector& rocprof_event_ids, std::index_sequence) -{ - uint64_t m{}; - for(const size_t& event_id : rocprof_event_ids) + for(const auto* itr : active_contexts) { - m |= kfd_bitmask_impl(event_id, std::index_sequence{}); - } - return m; -} - -template -constexpr size_t -to_uvm_op_impl(size_t kfd_id, std::index_sequence) -{ - // if(kfd_id == uvm_event_info::kfd_event) return uvm_event_info::uvm_event; - if(kfd_id == uvm_event_info::kfd_event) return OpInx; - if constexpr(sizeof...(OpInxs) > 0) - return to_uvm_op_impl(kfd_id, std::index_sequence{}); - else - return 0; -} - -constexpr uvm_event_id_t -kfd_to_uvm_op(kfd_event_id_t kfd_id) -{ - return static_cast( - to_uvm_op_impl(kfd_id, std::make_index_sequence{})); -} - -struct buffered_context_data -{ - const context::context* ctx = nullptr; -}; - -void -populate_contexts(int operation_idx, std::vector& buffered_contexts) -{ - buffered_contexts.clear(); - - auto active_contexts = context::context_array_t{}; - for(const auto* itr : context::get_active_contexts(active_contexts)) - { - if(itr->buffered_tracer) + // if the given domain + op is not enabled, skip this context + if(itr->buffered_tracer->domains(ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION, operation)) { - // if the given domain + op is not enabled, skip this context - if(itr->buffered_tracer->domains(ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION, - operation_idx)) - buffered_contexts.emplace_back(buffered_context_data{itr}); + operation_ctxs.emplace_back(itr); } } -} -void -remove_events(events_cache_t& events, size_t timestamp) -{ - for(auto map : events) - { - for(auto i = map.begin(); i != map.end(); ++i) - { - if(i->second.start_timestamp < timestamp) map.erase(i); - } - } -} - -bool -report_event(uvm_event_id_t event_id, - rocprofiler_buffer_tracing_page_migration_record_t& end_record) -{ - using rocprofiler_page_migr_seq = std::make_index_sequence; - static thread_local events_cache_t EVENTS_CACHE{}; - - auto& events_map = EVENTS_CACHE[to_rocprof_op(event_id)]; - - switch(static_cast(event_id)) - { - case ROCPROFILER_UVM_EVENT_MIGRATE_START: [[fallthrough]]; - case ROCPROFILER_UVM_EVENT_PAGE_FAULT_START: [[fallthrough]]; - case ROCPROFILER_UVM_EVENT_QUEUE_EVICTION: - { - // insert into map - auto key = get_key(event_id, end_record, rocprofiler_page_migr_seq{}); - events_map[key] = end_record; - return false; - } - // End events. Pair up and report - case ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU: - { - return true; - } - case ROCPROFILER_UVM_EVENT_MIGRATE_END: [[fallthrough]]; - case ROCPROFILER_UVM_EVENT_PAGE_FAULT_END: [[fallthrough]]; - case ROCPROFILER_UVM_EVENT_QUEUE_RESTORE: - { - auto key = get_key(event_id, end_record, rocprofiler_page_migr_seq{}); - if(auto start_rec = events_map.find(key); start_rec != events_map.end()) - { - update_end(event_id, start_rec->second, end_record); - } - else - { - // we got an end record and can't find the start record - // drop everything in the map before this timestamp - remove_events(EVENTS_CACHE, end_record.end_timestamp); - } - return true; - } - default: throw std::runtime_error("Invalid page migration event"); - } + return operation_ctxs; } void handle_reporting(std::string_view event_data) { - uint32_t kfd_event_id; - std::sscanf(event_data.data(), "%x ", &kfd_event_id); - std::vector buffered_contexts{}; - - auto uvm_event_op = kfd_to_uvm_op(static_cast(kfd_event_id)); - - populate_contexts(uvm_event_op, buffered_contexts); + const auto op_inx = get_rocprof_op(event_data); + auto buffered_contexts = get_contexts(op_inx); if(buffered_contexts.empty()) return; // Parse and process the event - auto record = parse_uvm_event( - uvm_event_op, event_data, std::make_index_sequence{}); + auto record = parse_event( + op_inx, event_data, std::make_index_sequence{}); - // pair up start and end and only then insert it into the buffer - if(report_event(uvm_event_op, record)) + for(const auto& itr : buffered_contexts) { - for(const auto& itr : buffered_contexts) - { - auto* _buffer = buffer::get_buffer(itr.ctx->buffered_tracer->buffer_data.at( - ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION)); - CHECK_NOTNULL(_buffer)->emplace(ROCPROFILER_BUFFER_CATEGORY_TRACING, - ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION, - record); - } + auto* buffer = buffer::get_buffer( + itr->buffered_tracer->buffer_data.at(ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION)); + CHECK_NOTNULL(buffer)->emplace( + ROCPROFILER_BUFFER_CATEGORY_TRACING, ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION, record); } } @@ -751,8 +512,7 @@ handle_reporting(std::string_view event_data) // KFD utils namespace kfd { -void -poll_events(small_vector, bool); +void poll_events(small_vector); using fd_flags_t = decltype(EFD_NONBLOCK); using fd_t = decltype(pollfd::fd); @@ -824,26 +584,26 @@ struct poll_kfd_t struct gpu_fd_t { - unsigned int node_id{}; - fd_t fd{}; - const rocprofiler_agent_t* agent{}; + unsigned int node_id = 0; + fd_t fd = {}; + const rocprofiler_agent_t* agent = nullptr; }; - kfd_device_fd kfd_fd{}; - small_vector file_handles{}; - pollfd thread_notify{}; - std::thread bg_thread; - bool active{false}; + kfd_device_fd kfd_fd = {}; + small_vector file_handles = {}; + pollfd thread_notify = {}; + std::thread bg_thread = {}; + bool active = {false}; poll_kfd_t() = default; - poll_kfd_t(const small_vector& rprof_ev, bool non_blocking) + poll_kfd_t(const small_vector& rprof_ev) : kfd_fd{kfd_device_fd{}} { const auto kfd_flags = kfd_bitmask(rprof_ev, std::make_index_sequence{}); - ROCP_INFO << fmt::format("Setting KFD flags to [0b{:b}] \n", kfd_flags); + ROCP_TRACE << fmt::format("Setting KFD flags to [0b{:b}] \n", kfd_flags); // Create fd for notifying thread when we want to wake it up, and an eventfd for any events // to this thread @@ -876,7 +636,7 @@ struct poll_kfd_t { auto gpu_event_fd = get_node_fd(agent->gpu_id); file_handles.emplace_back(pollfd{gpu_event_fd, POLLIN, 0}); - ROCP_INFO << fmt::format( + ROCP_TRACE << fmt::format( "GPU node {} with fd {} added\n", agent->gpu_id, gpu_event_fd); } } @@ -886,14 +646,14 @@ struct poll_kfd_t { auto& fd = file_handles[i]; auto write_size = write(fd.fd, &kfd_flags, sizeof(kfd_flags)); - ROCP_INFO << fmt::format( + ROCP_TRACE << fmt::format( "Writing {} to GPU fd {} ({} bytes)\n", kfd_flags, fd.fd, write_size); CHECK(write_size == sizeof(kfd_flags)); } // start bg thread internal_threading::notify_pre_internal_thread_create(ROCPROFILER_LIBRARY); - bg_thread = std::thread{poll_events, file_handles, non_blocking}; + bg_thread = std::thread{poll_events, file_handles}; internal_threading::notify_post_internal_thread_create(ROCPROFILER_LIBRARY); active = true; @@ -949,7 +709,7 @@ get_config() kfd::poll_kfd_t::~poll_kfd_t() { - ROCP_INFO << fmt::format("Terminating poll_kfd\n"); + ROCP_TRACE << fmt::format("Terminating poll_kfd\n"); if(!active) return; // wake thread up @@ -961,7 +721,7 @@ kfd::poll_kfd_t::~poll_kfd_t() } while(bytes_written == -1 && (errno == EINTR || errno == EAGAIN)); if(bg_thread.joinable()) bg_thread.join(); - ROCP_INFO << fmt::format("Background thread terminated\n"); + ROCP_TRACE << fmt::format("Background thread terminated\n"); for(const auto& f : file_handles) close(f.fd); @@ -969,34 +729,28 @@ kfd::poll_kfd_t::~poll_kfd_t() } // namespace void -poll_events(small_vector file_handles, bool non_blocking) +poll_events(small_vector file_handles) { // storage to write records to, 1MB constexpr size_t PREALLOCATE_ELEMENT_COUNT{1024 * 128}; std::string scratch_buffer(PREALLOCATE_ELEMENT_COUNT, '\0'); - auto& exitfd = file_handles[1]; - const auto timeout_val = non_blocking == true ? 0 : -1; + auto& exitfd = file_handles[1]; // Wait or spin on events. // 0 -> return immediately even if no events // -1 -> wait indefinitely - ROCP_INFO << fmt::format("{} polling = {}, polling with timeout = {}", - non_blocking ? "Non-blocking" : "Blocking", - non_blocking, - timeout_val); - pthread_setname_np(pthread_self(), "bg:pagemigr"); for(auto& fd : file_handles) { - ROCP_INFO << fmt::format( + ROCP_TRACE << fmt::format( "Handle = {}, events = {}, revents = {}\n", fd.fd, fd.events, fd.revents); } while(!kfd::get_config().should_exit()) { - auto poll_ret = poll(file_handles.data(), file_handles.size(), timeout_val); + auto poll_ret = poll(file_handles.data(), file_handles.size(), -1); if(poll_ret == -1) throw std::runtime_error{"Background thread file descriptors are invalid"}; @@ -1009,6 +763,7 @@ poll_events(small_vector file_handles, bool non_blocking) using namespace std::chrono_literals; + // 0 and 1 are for generic and pipe-notify handles for(size_t i = 2; i < file_handles.size(); ++i) { auto& fd = file_handles[i]; @@ -1016,15 +771,9 @@ poll_events(small_vector file_handles, bool non_blocking) // We have data to read, perhaps multiple events if((fd.revents & POLLIN) != 0) { - size_t status_size = read(fd.fd, scratch_buffer.data(), scratch_buffer.size()); - - // ROCP_INFO << fmt::format( - // "status_size: {} size {}\n", status_size, scratch_buffer.size()); - std::string_view event_strings{scratch_buffer.data(), status_size}; - - // ROCP_INFO << fmt::format("Raw KFD string [({})]\n", - // event_strings.data()); - KFD_EVENT_PARSE_EVENTS(event_strings, handle_reporting); + size_t status_size = read(fd.fd, scratch_buffer.data(), scratch_buffer.size()); + auto event_strings = std::string_view{scratch_buffer.data(), status_size}; + kfd_readlines(event_strings, handle_reporting); } fd.revents = 0; } @@ -1051,7 +800,7 @@ get_ids(std::vector& _id_list, std::index_sequence) if(_v < static_cast(ROCPROFILER_HSA_AMD_EXT_API_ID_LAST)) _vec.emplace_back(_v); }; - (_emplace(_id_list, page_migration_info::operation_idx), ...); + (_emplace(_id_list, page_migration_info::operation), ...); } bool @@ -1069,13 +818,13 @@ to_bitmask(small_vector& _id_list, std::index_sequence) if(_v < static_cast(ROCPROFILER_HSA_AMD_EXT_API_ID_LAST)) _vec.emplace_back(_v); }; - (_emplace(_id_list, page_migration_info::operation_idx), ...); + (_emplace(_id_list, page_migration_info::operation), ...); } namespace { rocprofiler_status_t -init(const small_vector& event_ids, bool non_blocking) +init(const small_vector& event_ids) { // Check if version is more than 1.11 auto ver = kfd::get_version(); @@ -1084,7 +833,7 @@ init(const small_vector& event_ids, bool non_blocking) if(!context::get_registered_contexts(context_filter).empty()) { if(!kfd::get_config().kfd_handle) - kfd::get_config().kfd_handle = new kfd::poll_kfd_t{event_ids, non_blocking}; + kfd::get_config().kfd_handle = new kfd::poll_kfd_t{event_ids}; } return ROCPROFILER_STATUS_SUCCESS; } @@ -1104,12 +853,13 @@ rocprofiler_status_t init() { // Testing page migration - return init({ROCPROFILER_PAGE_MIGRATION_NONE, - ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT, - ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE, - ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND, - ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU}, - rocprofiler::common::get_env("ROCPROF_PAGE_MIGRATION_NON_BLOCKING", false)); + return init({ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_START, + ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_END, + ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT_START, + ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT_END, + ROCPROFILER_PAGE_MIGRATION_QUEUE_EVICTION, + ROCPROFILER_PAGE_MIGRATION_QUEUE_RESTORE, + ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU}); } void diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/page_migration/page_migration.def.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/page_migration/page_migration.def.cpp index ace2dae896..ab726c4bae 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/page_migration/page_migration.def.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/page_migration/page_migration.def.cpp @@ -30,45 +30,30 @@ namespace rocprofiler { namespace page_migration { +using namespace rocprofiler::common; + +using kfd_event_id_t = decltype(KFD_SMI_EVENT_NONE); +using migrate_trigger_t = rocprofiler_page_migration_trigger_t; +using page_migration_record_t = rocprofiler_buffer_tracing_page_migration_record_t; +using queue_suspend_trigger_t = rocprofiler_page_migration_queue_suspend_trigger_t; +using unmap_from_gpu_trigger_t = rocprofiler_page_migration_unmap_from_gpu_trigger_t; + +using trigger_type_list_t = common::mpl::type_list; // clang-format off // Map ROCPROF UVM enums to KFD enums -SPECIALIZE_UVM_KFD_EVENT(ROCPROFILER_UVM_EVENT_NONE, KFD_SMI_EVENT_NONE, "Error: Invalid UVM event from KFD" ); -SPECIALIZE_UVM_KFD_EVENT(ROCPROFILER_UVM_EVENT_MIGRATE_START, KFD_SMI_EVENT_MIGRATE_START, "%x %ld -%d @%lx(%lx) %x->%x %x:%x %d\n" ); -SPECIALIZE_UVM_KFD_EVENT(ROCPROFILER_UVM_EVENT_MIGRATE_END, KFD_SMI_EVENT_MIGRATE_END, "%x %ld -%d @%lx(%lx) %x->%x %d\n" ); -SPECIALIZE_UVM_KFD_EVENT(ROCPROFILER_UVM_EVENT_PAGE_FAULT_START, KFD_SMI_EVENT_PAGE_FAULT_START, "%x %ld -%d @%lx(%x) %c\n" ); -SPECIALIZE_UVM_KFD_EVENT(ROCPROFILER_UVM_EVENT_PAGE_FAULT_END, KFD_SMI_EVENT_PAGE_FAULT_END, "%x %ld -%d @%lx(%x) %c\n" ); -SPECIALIZE_UVM_KFD_EVENT(ROCPROFILER_UVM_EVENT_QUEUE_EVICTION, KFD_SMI_EVENT_QUEUE_EVICTION, "%x %ld -%d %x %d\n" ); -SPECIALIZE_UVM_KFD_EVENT(ROCPROFILER_UVM_EVENT_QUEUE_RESTORE, KFD_SMI_EVENT_QUEUE_RESTORE, "%x %ld -%d %x\n" ); -SPECIALIZE_UVM_KFD_EVENT(ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU, KFD_SMI_EVENT_UNMAP_FROM_GPU, "%x %ld -%d @%lx(%lx) %x %d\n" ); +SPECIALIZE_PAGE_MIGRATION_INFO(NONE, NONE, "Error: Invalid UVM event from KFD" ); +SPECIALIZE_PAGE_MIGRATION_INFO(PAGE_MIGRATE_START, MIGRATE_START, "%x %ld -%d @%lx(%lx) %x->%x %x:%x %d\n"); +SPECIALIZE_PAGE_MIGRATION_INFO(PAGE_MIGRATE_END, MIGRATE_END, "%x %ld -%d @%lx(%lx) %x->%x %d\n" ); +SPECIALIZE_PAGE_MIGRATION_INFO(PAGE_FAULT_START, PAGE_FAULT_START, "%x %ld -%d @%lx(%x) %c\n" ); +SPECIALIZE_PAGE_MIGRATION_INFO(PAGE_FAULT_END, PAGE_FAULT_END, "%x %ld -%d @%lx(%x) %c\n" ); +SPECIALIZE_PAGE_MIGRATION_INFO(QUEUE_EVICTION, QUEUE_EVICTION, "%x %ld -%d %x %d\n" ); +SPECIALIZE_PAGE_MIGRATION_INFO(QUEUE_RESTORE, QUEUE_RESTORE, "%x %ld -%d %x\n" ); +SPECIALIZE_PAGE_MIGRATION_INFO(UNMAP_FROM_GPU, UNMAP_FROM_GPU, "%x %ld -%d @%lx(%lx) %x %d\n" ); +#undef SPECIALIZE_PAGE_MIGRATION_INFO // clang-format on -# undef SPECIALIZE_UVM_KFD_EVENT -SPECIALIZE_PAGE_MIGRATION_INFO(NONE, NONE); -SPECIALIZE_PAGE_MIGRATION_INFO(PAGE_MIGRATE, MIGRATE_START, MIGRATE_END); -SPECIALIZE_PAGE_MIGRATION_INFO(PAGE_FAULT, PAGE_FAULT_START, PAGE_FAULT_END); -SPECIALIZE_PAGE_MIGRATION_INFO(QUEUE_SUSPEND, QUEUE_EVICTION, QUEUE_RESTORE); -SPECIALIZE_PAGE_MIGRATION_INFO(UNMAP_FROM_GPU, UNMAP_FROM_GPU); - -template -constexpr size_t to_rocprof_op_impl(std::index_sequence) -{ - return ((((bitmask(UvmOpInx) & page_migration_info::uvm_bitmask) != 0) * OpInxs) + ...); -} - -template -constexpr auto _to_rocprof_op_impl(std::index_sequence) -{ - return std::array{ - to_rocprof_op_impl(std::make_index_sequence{})...}; -} - -constexpr auto -to_rocprof_op(size_t pos) -{ - using rop = rocprofiler_page_migration_operation_t; - return static_cast( - _to_rocprof_op_impl(std::make_index_sequence{})[pos]); -} } // namespace page_migration } // namespace rocprofiler #endif diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/page_migration/utils.hpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/page_migration/utils.hpp index 65efda40ee..d519e255d7 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/page_migration/utils.hpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/page_migration/utils.hpp @@ -22,12 +22,15 @@ #pragma once +#include "lib/common/container/small_vector.hpp" #include "lib/rocprofiler-sdk/details/kfd_ioctl.h" #include #include +#include #include +#include #include #include @@ -35,38 +38,32 @@ namespace rocprofiler { namespace page_migration { -// serves as an overview of what events we capture and report -enum fault_type_t -{ - NONE, - READ, - WRITE, -}; +/* serves as an overview of what events we capture and report -struct uvm_event_page_fault_start_t -{ - int kind; - uint64_t start_timestamp; - int pid; - int node_id; - uint64_t address; - fault_type_t fault; -}; - -struct uvm_event_page_fault_end_t +struct event_page_fault_start_t { int kind; - uint64_t end_timestamp; + uint64_t timestamp; + int pid; + int node_id; + uint64_t address; + fault_t fault; +}; + +struct event_page_fault_end_t +{ + int kind; + uint64_t timestamp; uint32_t pid; int node_id; uint64_t address; bool migrated; }; -struct uvm_event_migrate_start_t +struct event_migrate_start_t { int kind; - uint64_t start_timestamp; + uint64_t timestamp; uint32_t pid; uint64_t start; uint64_t end_offset; @@ -77,10 +74,10 @@ struct uvm_event_migrate_start_t uint32_t trigger; }; -struct uvm_event_migrate_end_t +struct event_migrate_end_t { int kind; - uint64_t end_timestamp; + uint64_t timestamp; uint32_t pid; uint64_t start; uint64_t end_offset; @@ -89,25 +86,25 @@ struct uvm_event_migrate_end_t uint32_t trigger; }; -struct uvm_event_queue_eviction_t +struct event_queue_eviction_t { int kind; - uint64_t start_timestamp; + uint64_t timestamp; uint32_t pid; int node_id; uint32_t trigger; }; -struct uvm_event_queue_restore_t +struct event_queue_restore_t { int kind; - uint64_t end_timestamp; + uint64_t timestamp; uint32_t pid; int node_id; bool rescheduled; }; -struct uvm_event_unmap_from_gpu_t +struct event_unmap_from_gpu_t { int kind; uint64_t timestamp; @@ -117,13 +114,13 @@ struct uvm_event_unmap_from_gpu_t int node_id; uint32_t trigger; }; - -template -struct uvm_event_info; +*/ template struct page_migration_info; +using namespace rocprofiler::common; + namespace kfd { template @@ -145,74 +142,56 @@ constexpr size_t bitmask(std::index_sequence) return (bitmask(Args) | ...); } -enum uvm_event_id_t +template +constexpr size_t kfd_bitmask(std::index_sequence) { - ROCPROFILER_UVM_EVENT_NONE, - ROCPROFILER_UVM_EVENT_MIGRATE_START, - ROCPROFILER_UVM_EVENT_MIGRATE_END, - ROCPROFILER_UVM_EVENT_PAGE_FAULT_START, - ROCPROFILER_UVM_EVENT_PAGE_FAULT_END, - ROCPROFILER_UVM_EVENT_QUEUE_EVICTION, - ROCPROFILER_UVM_EVENT_QUEUE_RESTORE, - ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU, - ROCPROFILER_UVM_EVENT_LAST, -}; + return (page_migration_info::kfd_bitmask | ...); +} -static_assert(KFD_SMI_EVENT_NONE == 0); -static_assert(KFD_SMI_EVENT_MIGRATE_START == 5); -static_assert(KFD_SMI_EVENT_MIGRATE_END == 6); -static_assert(KFD_SMI_EVENT_PAGE_FAULT_START == 7); -static_assert(KFD_SMI_EVENT_PAGE_FAULT_END == 8); -static_assert(KFD_SMI_EVENT_QUEUE_EVICTION == 9); -static_assert(KFD_SMI_EVENT_QUEUE_RESTORE == 10); -static_assert(KFD_SMI_EVENT_UNMAP_FROM_GPU == 11); -static_assert(KFD_SMI_EVENT_ALL_PROCESS == 64); +template +constexpr size_t +kfd_bitmask_impl(size_t rocprof_op, std::index_sequence) +{ + if(rocprof_op == OpInx) return page_migration_info::kfd_bitmask; + if constexpr(sizeof...(OpInxs) > 0) + return kfd_bitmask_impl(rocprof_op, std::index_sequence{}); + else + return 0; +} + +template +constexpr auto +kfd_bitmask(const container::small_vector& rocprof_event_ids, + std::index_sequence) +{ + uint64_t m{}; + for(const size_t& event_id : rocprof_event_ids) + { + m |= kfd_bitmask_impl(event_id, std::index_sequence{}); + } + return m; +} + +template +constexpr size_t +kfd_to_rocprof_op(size_t kfd_id, std::index_sequence) +{ + if(kfd_id == page_migration_info::kfd_operation) return OpInx; + if constexpr(sizeof...(OpInxs) > 0) + return kfd_to_rocprof_op(kfd_id, std::index_sequence{}); + else + return 0; +} + +size_t +get_rocprof_op(const std::string_view event_data); + +void +kfd_readlines(const std::string_view str, void(handler)(std::string_view)); using rocprof_buffer_op_t = rocprofiler_page_migration_operation_t; using node_fd_t = int; -using event_map_t = - std::unordered_map; -using events_cache_t = std::array; - -template -constexpr size_t to_kfd_bitmask(std::index_sequence) -{ - return bitmask(std::index_sequence::kfd_event...>()); -} - -template -constexpr size_t to_uvm_bitmask(std::index_sequence) -{ - return bitmask(std::index_sequence(Ops)...>()); -} - -template -constexpr bool -is_rocprof_uvm_map() -{ - return page_migration_info::uvm_bitmask & bitmask(UvmOpIdx); -} - -template -constexpr bool -_is_rocprof_uvm_map(size_t uvm_event, std::index_sequence) -{ - if(OpInx == uvm_event) - return is_rocprof_uvm_map(); - else if constexpr(sizeof...(OpInxs) > 0) - return _is_rocprof_uvm_map(uvm_event, std::index_sequence{}); - else - return false; -} - -template -constexpr bool -is_rocprof_uvm_map(size_t uvm_event) -{ - return _is_rocprof_uvm_map( - uvm_event, std::make_index_sequence{}); -} } // namespace page_migration } // namespace rocprofiler diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/tests/CMakeLists.txt b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/tests/CMakeLists.txt index 3b43509f13..e08b02193b 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/tests/CMakeLists.txt +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/tests/CMakeLists.txt @@ -11,8 +11,16 @@ include(GoogleTest) # # -------------------------------------------------------------------------------------- # -set(rocprofiler_lib_sources agent.cpp buffer.cpp contexts.cpp hsa.cpp naming.cpp - timestamp.cpp version.cpp hsa_barrier.cpp) +set(rocprofiler_lib_sources + agent.cpp + buffer.cpp + contexts.cpp + hsa.cpp + naming.cpp + timestamp.cpp + version.cpp + hsa_barrier.cpp + page_migration.cpp) add_executable(rocprofiler-sdk-lib-tests) target_sources(rocprofiler-sdk-lib-tests PRIVATE ${rocprofiler_lib_sources} @@ -23,6 +31,7 @@ target_link_libraries( rocprofiler-sdk::rocprofiler-sdk-common-library rocprofiler-sdk::counter-test-constants rocprofiler-sdk::rocprofiler-sdk-hsa-runtime + rocprofiler-sdk::rocprofiler-sdk-drm GTest::gtest GTest::gtest_main) @@ -40,9 +49,8 @@ set_tests_properties(${lib_TESTS} PROPERTIES TIMEOUT 30 LABELS "unittests") # # -------------------------------------------------------------------------------------- # -set(rocprofiler_shared_lib_sources - external_correlation.cpp intercept_table.cpp page_migration.cpp registration.cpp - roctx.cpp status.cpp) +set(rocprofiler_shared_lib_sources external_correlation.cpp intercept_table.cpp + registration.cpp roctx.cpp status.cpp) add_executable(rocprofiler-sdk-lib-tests-shared) target_sources(rocprofiler-sdk-lib-tests-shared PRIVATE ${rocprofiler_shared_lib_sources}) diff --git a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/tests/page_migration.cpp b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/tests/page_migration.cpp index 696bf03078..e2f0ca31cb 100644 --- a/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/tests/page_migration.cpp +++ b/projects/rocprofiler-sdk/source/lib/rocprofiler-sdk/tests/page_migration.cpp @@ -20,25 +20,26 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. +#include "lib/common/container/small_vector.hpp" #include "lib/common/defines.hpp" -#include "lib/rocprofiler-sdk/details/kfd_ioctl.h" +#include "lib/common/mpl.hpp" #include "lib/rocprofiler-sdk/page_migration/utils.hpp" +#include #include #include -#include +#include #include #include #include +#include #define ROCPROFILER_LIB_ROCPROFILER_SDK_PAGE_MIGRATION_PAGE_MIGRATION_CPP_IMPL 1 #include "lib/rocprofiler-sdk/page_migration/page_migration.def.cpp" #undef ROCPROFILER_LIB_ROCPROFILER_SDK_PAGE_MIGRATION_PAGE_MIGRATION_CPP_IMPL -#define ASSERT_SAME(A, B) static_assert(static_cast(A) == static_cast(B)) - namespace { constexpr std::string_view MULTILINE_STRING = "This is 0 Line 0\n" @@ -61,7 +62,7 @@ return_line(const std::string_view line) auto parse_lines() { - KFD_EVENT_PARSE_EVENTS(MULTILINE_STRING, return_line); + rocprofiler::page_migration::kfd_readlines(MULTILINE_STRING, return_line); } TEST(page_migration, readlines) @@ -70,107 +71,35 @@ TEST(page_migration, readlines) parse_lines(); } -TEST(page_migration, parse_kvm_events) -{ - // Ensure all lines are read - parse_lines(); -} - TEST(page_migtation, rocprof_kfd_map) { - using namespace ::rocprofiler::page_migration; + using namespace rocprofiler::page_migration; + using namespace rocprofiler::common::container; + + using rocprofiler_page_migration_seq_t = + std::make_index_sequence; + + const small_vector vec{ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_END, + ROCPROFILER_PAGE_MIGRATION_QUEUE_EVICTION, + ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU}; + + EXPECT_EQ((page_migration_info::kfd_bitmask | + page_migration_info::kfd_bitmask | + page_migration_info::kfd_bitmask), + kfd_bitmask(vec, rocprofiler_page_migration_seq_t{})); + + const auto to_kfd_str = [](kfd_smi_event e) { + std::string str = fmt::format("{:x} ", static_cast(e)); + return rocprofiler::page_migration::get_rocprof_op({str}); + }; // clang-format off - static_assert( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT, ROCPROFILER_UVM_EVENT_PAGE_FAULT_START >() ); - static_assert( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT, ROCPROFILER_UVM_EVENT_PAGE_FAULT_END >() ); - static_assert( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE, ROCPROFILER_UVM_EVENT_MIGRATE_START >() ); - static_assert( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE, ROCPROFILER_UVM_EVENT_MIGRATE_END >() ); - static_assert( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND, ROCPROFILER_UVM_EVENT_QUEUE_EVICTION >() ); - static_assert( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND, ROCPROFILER_UVM_EVENT_QUEUE_RESTORE >() ); - static_assert( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU, ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU >() ); - - EXPECT_TRUE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT >(ROCPROFILER_UVM_EVENT_PAGE_FAULT_START) ); - EXPECT_TRUE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT >(ROCPROFILER_UVM_EVENT_PAGE_FAULT_END ) ); - EXPECT_TRUE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE >(ROCPROFILER_UVM_EVENT_MIGRATE_START ) ); - EXPECT_TRUE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE >(ROCPROFILER_UVM_EVENT_MIGRATE_END ) ); - EXPECT_TRUE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND >(ROCPROFILER_UVM_EVENT_QUEUE_EVICTION ) ); - EXPECT_TRUE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND >(ROCPROFILER_UVM_EVENT_QUEUE_RESTORE ) ); - EXPECT_TRUE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU >(ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU ) ); - - - static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE, ROCPROFILER_UVM_EVENT_QUEUE_EVICTION >() ); - static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE, ROCPROFILER_UVM_EVENT_QUEUE_RESTORE >() ); - static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE, ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU >() ); - static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE, ROCPROFILER_UVM_EVENT_PAGE_FAULT_START >() ); - static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE, ROCPROFILER_UVM_EVENT_PAGE_FAULT_END >() ); - - static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT, ROCPROFILER_UVM_EVENT_MIGRATE_START >() ); - static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT, ROCPROFILER_UVM_EVENT_MIGRATE_END >() ); - static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT, ROCPROFILER_UVM_EVENT_QUEUE_EVICTION >() ); - static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT, ROCPROFILER_UVM_EVENT_QUEUE_RESTORE >() ); - static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT, ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU >() ); - - static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND, ROCPROFILER_UVM_EVENT_MIGRATE_START >() ); - static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND, ROCPROFILER_UVM_EVENT_MIGRATE_END >() ); - static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND, ROCPROFILER_UVM_EVENT_PAGE_FAULT_START >() ); - static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND, ROCPROFILER_UVM_EVENT_PAGE_FAULT_END >() ); - static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND, ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU >() ); - - static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU, ROCPROFILER_UVM_EVENT_MIGRATE_START >() ); - static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU, ROCPROFILER_UVM_EVENT_MIGRATE_END >() ); - static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU, ROCPROFILER_UVM_EVENT_PAGE_FAULT_START >() ); - static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU, ROCPROFILER_UVM_EVENT_PAGE_FAULT_END >() ); - static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU, ROCPROFILER_UVM_EVENT_QUEUE_EVICTION >() ); - static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU, ROCPROFILER_UVM_EVENT_QUEUE_RESTORE >() ); - - EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE >(ROCPROFILER_UVM_EVENT_QUEUE_EVICTION ) ); - EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE >(ROCPROFILER_UVM_EVENT_QUEUE_RESTORE ) ); - EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE >(ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU ) ); - EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE >(ROCPROFILER_UVM_EVENT_PAGE_FAULT_START) ); - EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE >(ROCPROFILER_UVM_EVENT_PAGE_FAULT_END ) ); - - EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT >(ROCPROFILER_UVM_EVENT_MIGRATE_START ) ); - EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT >(ROCPROFILER_UVM_EVENT_MIGRATE_END ) ); - EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT >(ROCPROFILER_UVM_EVENT_QUEUE_EVICTION ) ); - EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT >(ROCPROFILER_UVM_EVENT_QUEUE_RESTORE ) ); - EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT >(ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU ) ); - - EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND >(ROCPROFILER_UVM_EVENT_MIGRATE_START ) ); - EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND >(ROCPROFILER_UVM_EVENT_MIGRATE_END ) ); - EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND >(ROCPROFILER_UVM_EVENT_PAGE_FAULT_START) ); - EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND >(ROCPROFILER_UVM_EVENT_PAGE_FAULT_END ) ); - EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND >(ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU ) ); - - EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU >(ROCPROFILER_UVM_EVENT_MIGRATE_START ) ); - EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU >(ROCPROFILER_UVM_EVENT_MIGRATE_END ) ); - EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU >(ROCPROFILER_UVM_EVENT_PAGE_FAULT_START) ); - EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU >(ROCPROFILER_UVM_EVENT_PAGE_FAULT_END ) ); - EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU >(ROCPROFILER_UVM_EVENT_QUEUE_EVICTION ) ); - EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU >(ROCPROFILER_UVM_EVENT_QUEUE_RESTORE ) ); - - static_assert(to_rocprof_op(ROCPROFILER_UVM_EVENT_MIGRATE_START) == ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE ); - static_assert(to_rocprof_op(ROCPROFILER_UVM_EVENT_PAGE_FAULT_END) == ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT ); - static_assert(to_rocprof_op(ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU) == ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU ); - static_assert(to_rocprof_op(ROCPROFILER_UVM_EVENT_QUEUE_EVICTION) == ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND ); - - ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_TRIGGER_PREFETCH, KFD_MIGRATE_TRIGGER_PREFETCH ); - ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_TRIGGER_PAGEFAULT_GPU, KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU ); - ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_TRIGGER_PAGEFAULT_CPU, KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU ); - ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_TRIGGER_TTM_EVICTION, KFD_MIGRATE_TRIGGER_TTM_EVICTION ); - ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_SVM, KFD_QUEUE_EVICTION_TRIGGER_SVM ); - ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_USERPTR, KFD_QUEUE_EVICTION_TRIGGER_USERPTR ); - ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_TTM, KFD_QUEUE_EVICTION_TRIGGER_TTM ); - ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_SUSPEND, KFD_QUEUE_EVICTION_TRIGGER_SUSPEND ); - ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_CRIU_CHECKPOINT, KFD_QUEUE_EVICTION_CRIU_CHECKPOINT ); - ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_CRIU_RESTORE, KFD_QUEUE_EVICTION_CRIU_RESTORE ); - ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_MMU_NOTIFY, KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY ); - ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_MMU_NOTIFY_MIGRATE, KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY_MIGRATE); - ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_UNMAP_FROM_CPU, KFD_SVM_UNMAP_TRIGGER_UNMAP_FROM_CPU ); - - static_assert(to_kfd_bitmask(std::index_sequence< - ROCPROFILER_UVM_EVENT_PAGE_FAULT_START, ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU>()) == - (KFD_SMI_EVENT_MASK_FROM_INDEX(KFD_SMI_EVENT_PAGE_FAULT_START) - | KFD_SMI_EVENT_MASK_FROM_INDEX(KFD_SMI_EVENT_UNMAP_FROM_GPU))); - + EXPECT_EQ(to_kfd_str(KFD_SMI_EVENT_MIGRATE_START), ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_START); + EXPECT_EQ(to_kfd_str(KFD_SMI_EVENT_MIGRATE_END), ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_END); + EXPECT_EQ(to_kfd_str(KFD_SMI_EVENT_PAGE_FAULT_START), ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT_START); + EXPECT_EQ(to_kfd_str(KFD_SMI_EVENT_PAGE_FAULT_END), ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT_END); + EXPECT_EQ(to_kfd_str(KFD_SMI_EVENT_QUEUE_EVICTION), ROCPROFILER_PAGE_MIGRATION_QUEUE_EVICTION); + EXPECT_EQ(to_kfd_str(KFD_SMI_EVENT_QUEUE_RESTORE), ROCPROFILER_PAGE_MIGRATION_QUEUE_RESTORE); + EXPECT_EQ(to_kfd_str(KFD_SMI_EVENT_UNMAP_FROM_GPU), ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU); // clang-format on } diff --git a/projects/rocprofiler-sdk/tests/bin/page-migration/CMakeLists.txt b/projects/rocprofiler-sdk/tests/bin/page-migration/CMakeLists.txt index 1406a9e580..e2f1b9f503 100644 --- a/projects/rocprofiler-sdk/tests/bin/page-migration/CMakeLists.txt +++ b/projects/rocprofiler-sdk/tests/bin/page-migration/CMakeLists.txt @@ -40,8 +40,3 @@ target_compile_options(page-migration PRIVATE -W -Wall -Wextra -Wpedantic -Wshad find_package(Threads REQUIRED) target_link_libraries(page-migration PRIVATE Threads::Threads) - -install( - TARGETS page-migration - DESTINATION bin - COMPONENT tests) diff --git a/projects/rocprofiler-sdk/tests/bin/page-migration/page-migration.cpp b/projects/rocprofiler-sdk/tests/bin/page-migration/page-migration.cpp index 5cb3859cb3..5a38389dba 100644 --- a/projects/rocprofiler-sdk/tests/bin/page-migration/page-migration.cpp +++ b/projects/rocprofiler-sdk/tests/bin/page-migration/page-migration.cpp @@ -27,13 +27,14 @@ #include #include #include -#include #include #include #include #include #include #include +#include +#include #include #include @@ -117,14 +118,14 @@ private: }; int -main() +run_test(int num_iter) { using namespace std::chrono_literals; - static constexpr auto NUM_PAGES = 16; - const auto PAGE_SIZE_BYTES = ::sysconf(_SC_PAGE_SIZE); + constexpr size_t NUM_PAGES = 512; + const size_t PAGE_SIZE_BYTES = ::sysconf(_SC_PAGE_SIZE); - size_t elem_count = (NUM_PAGES * PAGE_SIZE_BYTES) / sizeof(size_t); // one page? + const size_t elem_count = (NUM_PAGES * PAGE_SIZE_BYTES) / sizeof(size_t); auto alloc = mmap_allocator(NUM_PAGES); void* data_v = alloc.get(); @@ -142,8 +143,10 @@ main() HIP_API_CALL(hipHostRegister(data, elem_count * sizeof(size_t), hipHostRegisterDefault)); - char maps[1024 * 1024]; - std::memset(maps, '\0', 1024 * 1024); + constexpr size_t MAPS_BUFFER_SIZE = 1024 * 1024; + + char maps[MAPS_BUFFER_SIZE]; + std::memset(maps, '\0', MAPS_BUFFER_SIZE); auto fd = open("/proc/self/maps", O_RDONLY | O_CLOEXEC); if(fd == -1) @@ -153,7 +156,7 @@ main() exit(-1); } - auto bytes = read(fd, maps, 1024 * 1024 - 1); + auto bytes = read(fd, maps, MAPS_BUFFER_SIZE - 1); if(bytes == -1) { auto ecode = errno; @@ -190,33 +193,96 @@ main() } } - for(int iter = 0; iter < 1000; ++iter) + hipStream_t stream{}; + HIP_API_CALL(hipStreamCreate(&stream)); + + for(int iter = 0; iter < num_iter; ++iter) { for(size_t i = 0; i < elem_count; ++i) + { data[i] = i; + } // std::cout << "launching..." << std::endl; - hipLaunchKernelGGL(kernel, 128, 64, 0, 0, data, elem_count); + hipLaunchKernelGGL(kernel, 1024, 1024, 0, stream, data, elem_count); - // std::cout << "syncing..." << std::endl; - HIP_API_CALL(hipDeviceSynchronize()); + HIP_API_CALL(hipStreamSynchronize(stream)); - // std::cout << "checking..." << std::endl; for(size_t i = 0; i < elem_count; ++i) { - if(data[i] != (i * 2)) + const auto data_i = data[i]; + if(data_i != (i * 2)) { auto msg = std::stringstream{}; msg << "GPU computed value at " << i << " in iteration " << iter - << " is incorrect. Expected " << (i * 2) << ", found " << data[i]; + << " is incorrect. Expected " << (i * 2) << ", found " << data_i; throw std::runtime_error{msg.str()}; } } - - std::cout << "Iteration " << std::setw(2) << iter << ": correct\n" << std::flush; } + HIP_API_CALL(hipStreamDestroy(stream)); HIP_API_CALL(hipDeviceSynchronize()); return 0; } + +int +main(int argc, const char** argv) +{ + using namespace std::chrono_literals; + + const auto usage_msg = [](const char** _argv) { + fprintf(stderr, "usage: %s \n", _argv[0]); + }; + + if(argc != 3) + { + usage_msg(argv); + exit(EXIT_FAILURE); + } + + for(int i = 1; i < argc; ++i) + { + auto _arg = std::string{argv[i]}; + if(_arg == "?" || _arg == "-h" || _arg == "--help") + { + usage_msg(argv); + exit(EXIT_SUCCESS); + } + } + + const auto num_threads = std::atoi(argv[1]); + if(num_threads < 1) + { + fprintf(stderr, "Error: Invalid value %d for num_threads (min 1)\n", num_threads); + exit(EXIT_FAILURE); + } + + const auto num_iter = std::atoi(argv[2]); + if(num_iter < 1) + { + fprintf(stderr, "Error: Invalid value %d for num_iter (min 1)\n", num_iter); + exit(EXIT_FAILURE); + } + + run_test(num_iter); + + std::vector threads; + threads.reserve(num_threads); + + std::cerr << "Running " << num_iter << " iterations/thread on " << num_threads << " threads\n"; + + for(auto i = 0; i < num_threads; ++i) + { + threads.emplace_back([_num_iter = num_iter]() { run_test(_num_iter); }); + } + + std::cerr << "Waiting for threads\n"; + for(auto& t : threads) + { + t.join(); + } + + return 0; +} diff --git a/projects/rocprofiler-sdk/tests/page-migration/CMakeLists.txt b/projects/rocprofiler-sdk/tests/page-migration/CMakeLists.txt index d7c0650841..8f0fd6d9f7 100644 --- a/projects/rocprofiler-sdk/tests/page-migration/CMakeLists.txt +++ b/projects/rocprofiler-sdk/tests/page-migration/CMakeLists.txt @@ -17,7 +17,7 @@ else() set(PRELOAD_ENV "LD_PRELOAD=$") endif() -add_test(NAME test-page-migration-execute COMMAND $) +add_test(NAME test-page-migration-execute COMMAND $ 4 1024) set(page-migration-env "${PRELOAD_ENV}" diff --git a/projects/rocprofiler-sdk/tests/page-migration/conftest.py b/projects/rocprofiler-sdk/tests/page-migration/conftest.py index 15b059dc42..a65124ec3b 100644 --- a/projects/rocprofiler-sdk/tests/page-migration/conftest.py +++ b/projects/rocprofiler-sdk/tests/page-migration/conftest.py @@ -2,6 +2,7 @@ import json import pytest +from rocprofiler_sdk.pytest_utils.dotdict import dotdict def pytest_addoption(parser): @@ -24,4 +25,4 @@ def input_data(request): return pytest.skip( "Skipping test because KFD does not support SVM event reporting" ) - return data + return dotdict(data) diff --git a/projects/rocprofiler-sdk/tests/page-migration/validate.py b/projects/rocprofiler-sdk/tests/page-migration/validate.py index ddf799c4b2..15619d96b2 100644 --- a/projects/rocprofiler-sdk/tests/page-migration/validate.py +++ b/projects/rocprofiler-sdk/tests/page-migration/validate.py @@ -271,71 +271,113 @@ def get_allocated_pages(callback_records): assert "hostPtr" in itr["args"].keys(), f"{itr}" host_register_record.append(itr) - assert len(host_register_record) == 1 - alloc_size = int(host_register_record[0]["args"]["sizeBytes"], 10) - start_addr = int(host_register_record[0]["args"]["hostPtr"], 16) - end_addr = start_addr + alloc_size + num_host_register_calls = len(host_register_record) + assert num_host_register_calls == 5, "Expected 5 hipHostRegister calls in test" - return start_addr, end_addr, alloc_size + ret = [] + for i in range(num_host_register_calls): + alloc_size = int(host_register_record[0]["args"]["sizeBytes"], 10) + start_addr = int(host_register_record[0]["args"]["hostPtr"], 16) + end_addr = start_addr + alloc_size + ret.append((start_addr, end_addr)) + return ret + + +def validate_node(id, nodes): + assert id.handle in nodes def test_page_migration_data(input_data): data = input_data sdk_data = data["rocprofiler-sdk-json-tool"] - buffer_records = sdk_data["buffer_records"] - callback_records = sdk_data["callback_records"] - page_migration_buffers = buffer_records["page_migration"] + buffer_records = sdk_data.buffer_records + callback_records = sdk_data.callback_records + page_migtation_buffers = buffer_records.page_migration _, bf_op_names = get_operation(buffer_records, "PAGE_MIGRATION") assert bf_op_names[0] == "PAGE_MIGRATION_NONE" - assert "PAGE_MIGRATION_PAGE_MIGRATE" in bf_op_names - assert len(bf_op_names) == 5 - node_ids = set(x["gpu_id"] for x in sdk_data["agents"]) - start_addr, end_addr, alloc_size = get_allocated_pages(callback_records) + for op_name in bf_op_names: + assert "PAGE_MIGRATION" in op_name - assert start_addr < end_addr and start_addr + alloc_size == end_addr - assert int(alloc_size) == 16 * 4096 # We allocated 16 pages in the test + assert len(bf_op_names) == 8 - # PID must be same - assert len(set(r["pid"] for r in page_migration_buffers)) == 1 + nodes = set(x.id.handle for x in sdk_data.agents) + allocations = get_allocated_pages(callback_records) - for r in page_migration_buffers: - op = r["operation"] + for start_addr, end_addr in allocations: - assert r["size"] == 136 - assert op != 0 and bf_op_names[op] != "PAGE_MIGRATION_NONE" - assert bf_op_names[op].lower().replace("page_migration_", "") in r.keys() + assert ( + start_addr < end_addr + ), "Expected start address less than end address for mmap range" + alloc_size = end_addr - start_addr + assert int(alloc_size) == 512 * 4096 # We allocated 512 pages in the test - if "page_migrate" in r: - assert r["page_migrate"]["from_node"] in node_ids - assert r["page_migrate"]["to_node"] in node_ids - assert r["page_migrate"]["prefetch_node"] in node_ids - assert r["page_migrate"]["preferred_node"] in node_ids - assert r["page_migrate"]["trigger"] >= 0 + # PID must be same + assert len(set(r.pid for r in page_migtation_buffers)) == 1 - if "queue_suspend" in r: - assert r["queue_suspend"]["trigger"] >= 0 - assert r["queue_suspend"]["node_id"] in node_ids + for r in page_migtation_buffers: + op = r.operation - if "unmap_from_gpu" in r: - assert r["unmap_from_gpu"]["trigger"] >= 0 - # unmap is "instantaneous" - assert 0 < r["start_timestamp"] == r["end_timestamp"] - else: - assert 0 < r["start_timestamp"] < r["end_timestamp"] + assert r.size == 160 + assert op != 0 and bf_op_names[op] != "PAGE_MIGRATION_NONE" + assert bf_op_names[op].lower().replace("page_migration_", "") in r.keys() - # Check for events with our page - for r in page_migration_buffers: + if "page_fault_start" in r: + arg = r.page_fault_start + assert arg.read_fault < 2 + validate_node(arg.agent_id, nodes) + assert arg.address > 0 - if "page_migrate" in r and r["page_migrate"]["start_addr"] == start_addr: - assert end_addr == r["page_migrate"]["end_addr"] + if "page_fault_end" in r: + arg = r.page_fault_end + assert arg.migrated < 2 + validate_node(arg.agent_id, nodes) + assert arg.address > 0 - if "unmap_from_gpu" in r and r["unmap_from_gpu"]["start_addr"] == start_addr: - assert end_addr == r["unmap_from_gpu"]["end_addr"] + if "page_migrate_start" in r: + arg = r.page_migrate_start + assert ( + 0 < arg.start_addr < arg.end_addr + ), "Expected start addr to be less than end addr" + if arg.start_addr == start_addr: + assert arg.end_addr == end_addr + validate_node(arg.from_agent, nodes) + validate_node(arg.to_agent, nodes) + validate_node(arg.prefetch_agent, nodes) + validate_node(arg.preferred_agent, nodes) + assert 0 <= arg.trigger < 4 - # TODO: Check if a migrate a->b is paired up with b->a - # It may not always be reported towards app finalization + if "page_migrate_end" in r: + arg = r.page_migrate_end + assert ( + 0 < arg.start_addr < arg.end_addr + ), "Expected start addr to be less than end addr" + if arg.start_addr == start_addr: + assert arg.end_addr == end_addr + validate_node(arg.from_agent, nodes) + validate_node(arg.to_agent, nodes) + assert 0 <= arg.trigger < 4 + + if "queue_eviction" in r: + arg = r.queue_eviction + validate_node(arg.agent_id, nodes) + assert 0 <= arg.trigger < 6 + + if "queue_restore" in r: + arg = r.queue_restore + assert arg.rescheduled < 2 + validate_node(arg.agent_id, nodes) + + if "unmap_from_gpu" in r: + arg = r.unmap_from_gpu + assert ( + 0 < arg.start_addr < arg.end_addr + ), "Expected start addr to be less than end addr" + if arg.start_addr == start_addr: + assert arg.end_addr == end_addr + validate_node(arg.agent_id, nodes) + assert 0 <= arg.trigger < 3 if __name__ == "__main__": diff --git a/projects/rocprofiler-sdk/tests/tools/json-tool.cpp b/projects/rocprofiler-sdk/tests/tools/json-tool.cpp index b698de2886..597bf20844 100644 --- a/projects/rocprofiler-sdk/tests/tools/json-tool.cpp +++ b/projects/rocprofiler-sdk/tests/tools/json-tool.cpp @@ -1277,7 +1277,7 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data) 0, page_migration_buffer); - constexpr auto message = "page migration service for memory copy configure"; + constexpr auto message = "buffer tracing service for page migration configure"; if(page_migration_status == ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_KERNEL) std::cerr << message << " failed: " << rocprofiler_get_status_string(page_migration_status)