[SDK] KFD new events API (#321)

* Remove page-migration

* Add KFD events API

* Address review comments

* Move assert checks

* Update enum-string utils

* Update codeowners

* Update KFD header

* Add perfetto category

[ROCm/rocprofiler-sdk commit: 8a461afe20]
This commit is contained in:
Kuricheti, Mythreya
2025-06-26 11:28:45 -07:00
committed by GitHub
vanhempi 58532974bc
commit bde07e7baa
41 muutettua tiedostoa jossa 3132 lisäystä ja 2760 poistoa
+2 -2
Näytä tiedosto
@@ -8,8 +8,8 @@ source/docs @bgopesh
source/include @jrmadsen @bwelton @ammarwa
source/include/rocprofiler-sdk/rccl @MythreyaK
source/include/rocprofiler-sdk/kfd @MythreyaK
source/include/rocprofiler-sdk/cxx/codeobj @ApoKalipse-V
source/bin @SrirakshaNag @bgopesh
source/libexec @SrirakshaNag
@@ -17,8 +17,8 @@ source/libexec @SrirakshaNag
source/lib/rocprofiler-sdk/counters @bwelton
source/lib/rocprofiler-sdk/pc_sampling @vlaindic
source/lib/rocprofiler-sdk/thread_trace @ApoKalipse-V
source/lib/rocprofiler-sdk/page_migration @MythreyaK
source/lib/rocprofiler-sdk/rccl @MythreyaK
source/lib/rocprofiler-sdk/kfd @MythreyaK
source/lib/rocprofiler-sdk-tool @SrirakshaNag
@@ -311,99 +311,6 @@ tool_tracing_callback(rocprofiler_context_id_t context,
static_cast<call_stack_t*>(user_data)->emplace_back(
source_location{__FUNCTION__, __FILE__, __LINE__, kind_name + info.str()});
}
else if(header->category == ROCPROFILER_BUFFER_CATEGORY_TRACING &&
header->kind == ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION)
{
auto* record =
static_cast<rocprofiler_buffer_tracing_page_migration_record_t*>(header->payload);
auto info = std::stringstream{};
info << "kind=" << record->kind << ", operation=" << record->operation
<< ", pid=" << record->pid << ", timestamp=" << record->timestamp
<< ", name=" << get_name(record) << std::boolalpha;
switch(record->operation)
{
case ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_START:
{
const auto& arg = record->args;
info << ", page_migrate_start=(" << as_hex(arg.page_migrate_start.start_addr)
<< ", " << as_hex(arg.page_migrate_start.end_addr) << ", "
<< arg.page_migrate_start.from_agent.handle << ", "
<< arg.page_migrate_start.to_agent.handle << ", "
<< arg.page_migrate_start.prefetch_agent.handle << ", "
<< arg.page_migrate_start.preferred_agent.handle << ", "
<< arg.page_migrate_start.trigger << ")";
break;
}
case ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_END:
{
const auto& arg = record->args;
info << ", page_migrate_end=(" << as_hex(arg.page_migrate_end.start_addr)
<< ", " << as_hex(arg.page_migrate_end.end_addr) << ", "
<< arg.page_migrate_end.from_agent.handle << ", "
<< arg.page_migrate_end.to_agent.handle << ", "
<< arg.page_migrate_end.trigger << ", " << arg.page_migrate_end.error_code
<< ")";
break;
}
case ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT_START:
{
const auto& arg = record->args;
info << ", page_fault_start=(" << arg.page_fault_start.read_fault << ", "
<< arg.page_fault_start.agent_id.handle << ", "
<< as_hex(arg.page_fault_start.address) << ")";
break;
}
case ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT_END:
{
const auto& arg = record->args;
info << ", page_fault_end=(" << arg.page_fault_end.migrated << ", "
<< arg.page_fault_end.agent_id.handle << ", "
<< as_hex(arg.page_fault_end.address) << ")";
break;
}
case ROCPROFILER_PAGE_MIGRATION_QUEUE_EVICTION:
{
const auto& arg = record->args;
info << ", queue_eviction=(" << arg.queue_eviction.agent_id.handle << ", "
<< arg.queue_eviction.trigger << ")";
break;
}
case ROCPROFILER_PAGE_MIGRATION_QUEUE_RESTORE:
{
const auto& arg = record->args;
info << ", queue_restore=(" << arg.queue_restore.rescheduled << ", "
<< arg.queue_restore.agent_id.handle << ")";
break;
}
case ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU:
{
const auto& arg = record->args;
info << ", unmap_from_gpu=(" << as_hex(arg.unmap_from_gpu.start_addr) << ", "
<< as_hex(arg.unmap_from_gpu.end_addr) << ", "
<< arg.unmap_from_gpu.agent_id.handle << ", " << arg.unmap_from_gpu.trigger
<< ")";
break;
}
case ROCPROFILER_PAGE_MIGRATION_DROPPED_EVENT:
{
const auto& arg = record->args;
info << ", dropped_event=(" << arg.dropped_event.dropped_events_count << ")";
break;
}
case ROCPROFILER_PAGE_MIGRATION_NONE:
case ROCPROFILER_PAGE_MIGRATION_LAST:
{
throw std::runtime_error{"unexpected page migration value"};
break;
}
}
static_cast<call_stack_t*>(user_data)->emplace_back(
source_location{__FUNCTION__, __FILE__, __LINE__, kind_name + info.str()});
}
else if(header->category == ROCPROFILER_BUFFER_CATEGORY_TRACING &&
header->kind == ROCPROFILER_BUFFER_TRACING_SCRATCH_MEMORY)
{
@@ -546,14 +453,10 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
client_ctx, ROCPROFILER_BUFFER_TRACING_MEMORY_COPY, nullptr, 0, client_buffer),
"buffer tracing service for memory copy configure");
// May have incompatible kernel so only emit a warning here
ROCPROFILER_WARN(rocprofiler_configure_buffer_tracing_service(
client_ctx, ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION, nullptr, 0, client_buffer));
ROCPROFILER_CALL(
rocprofiler_configure_buffer_tracing_service(
client_ctx, ROCPROFILER_BUFFER_TRACING_SCRATCH_MEMORY, nullptr, 0, client_buffer),
"buffer tracing service for page migration configure");
"buffer tracing service for scratch memory configure");
auto client_thread = rocprofiler_callback_thread_t{};
ROCPROFILER_CALL(rocprofiler_create_callback_thread(&client_thread),
@@ -26,7 +26,7 @@
#include <rocprofiler-sdk/defines.h>
#include <rocprofiler-sdk/fwd.h>
#include <rocprofiler-sdk/hip/api_args.h>
#include <rocprofiler-sdk/kfd/page_migration_args.h>
#include <rocprofiler-sdk/kfd/kfd_id.h>
#include <rocprofiler-sdk/rocdecode/api_args.h>
#include <rocprofiler-sdk/rocdecode/api_id.h>
@@ -339,17 +339,160 @@ typedef struct rocprofiler_buffer_tracing_kernel_dispatch_record_t
} rocprofiler_buffer_tracing_kernel_dispatch_record_t;
/**
* @brief ROCProfiler Buffer Page Migration Tracer Record
* @brief ROCProfiler Buffer Page Migration event record from KFD.
*/
typedef struct rocprofiler_buffer_tracing_page_migration_record_t
typedef struct rocprofiler_buffer_tracing_kfd_event_page_migrate_record_t
{
uint64_t size; ///< size of this struct
rocprofiler_buffer_tracing_kind_t kind; ///< ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION
rocprofiler_page_migration_operation_t operation;
rocprofiler_timestamp_t timestamp; ///< start time in nanoseconds
uint32_t pid;
rocprofiler_page_migration_args_t args;
} rocprofiler_buffer_tracing_page_migration_record_t;
uint64_t size; ///< Size of this struct
rocprofiler_buffer_tracing_kind_t kind;
rocprofiler_kfd_event_page_migrate_operation_t operation;
rocprofiler_timestamp_t timestamp; ///< Timestamp of the event as reported by KFD
uint32_t pid; ///< PID of the process as reported by KFD
rocprofiler_address_t start_address; ///< Start address of the memory range being migrated
rocprofiler_address_t end_address; ///< End address of the memory range being migrated
rocprofiler_agent_id_t src_agent; ///< Source agent from which pages were migrated
rocprofiler_agent_id_t dst_agent; ///< Destination agent to which pages were migrated
rocprofiler_agent_id_t prefetch_agent;
rocprofiler_agent_id_t preferred_agent;
int32_t error_code; ///< Non-zero if there was an error at migrate-end
///< @var kind
///< @brief ::ROCPROFILER_BUFFER_TRACING_KFD_EVENT_PAGE_MIGRATE
///< @var operation
///< @brief @see rocprofiler_kfd_page_migrate_operation_t
///< @var prefetch_agent
///< @brief Agent to which memory is to be prefetched.
/// This field should be ignored on a migrate-end event
///< @var preferred_agent
///< @brief Preferred location for this memory
/// This field should be ignored on a migrate-end event
} rocprofiler_buffer_tracing_kfd_event_page_migrate_record_t;
/**
* @brief ROCProfiler Buffer Page Fault event record from KFD.
*/
typedef struct rocprofiler_buffer_tracing_kfd_event_page_fault_record_t
{
uint64_t size; ///< Size of this struct
rocprofiler_buffer_tracing_kind_t kind; ///< ::ROCPROFILER_BUFFER_TRACING_KFD_EVENT_PAGE_FAULT
rocprofiler_kfd_event_page_fault_operation_t operation;
rocprofiler_timestamp_t timestamp; ///< Timestamp of the event as reported by KFD
uint32_t pid; ///< PID of the process as reported by KFD
rocprofiler_agent_id_t agent_id; ///< Agent ID which generated the fault
rocprofiler_address_t address; ///< Memory access that generated the fault event
///< @var operation
///< @brief @see rocprofiler_kfd_page_fault_operation_t
} rocprofiler_buffer_tracing_kfd_event_page_fault_record_t;
/**
* @brief ROCProfiler Buffer Queue event record from KFD.
*/
typedef struct rocprofiler_buffer_tracing_kfd_event_queue_record_t
{
uint64_t size; ///< Size of this struct
rocprofiler_buffer_tracing_kind_t kind; ///< ::ROCPROFILER_BUFFER_TRACING_KFD_EVENT_QUEUE
rocprofiler_kfd_event_queue_operation_t operation; ///< @see rocprofiler_kfd_queue_operation_t
rocprofiler_timestamp_t timestamp; ///< Timestamp of the event as reported by KFD
uint32_t pid; ///< PID of the process as reported by KFD
rocprofiler_agent_id_t agent_id; ///< Agent ID on which this event occurred
} rocprofiler_buffer_tracing_kfd_event_queue_record_t;
/**
* @brief ROCProfiler Buffer Unmap of memory from GPU event record from KFD.
*/
typedef struct rocprofiler_buffer_tracing_kfd_event_unmap_from_gpu_record_t
{
uint64_t size; ///< Size of this struct
rocprofiler_buffer_tracing_kind_t kind;
rocprofiler_kfd_event_unmap_from_gpu_operation_t operation;
rocprofiler_timestamp_t timestamp; ///< Timestamp of the event as reported by KFD
uint32_t pid; ///< PID of the process as reported by KFD
rocprofiler_agent_id_t agent_id; ///< Agent ID on which memory ranges were unmapped
rocprofiler_address_t start_address; ///< Start address of the memory range being unmapped
rocprofiler_address_t end_address; ///< End address of the memory range being unmapped
///< @var kind
///< @brief ::ROCPROFILER_BUFFER_TRACING_KFD_EVENT_UNMAP_FROM_GPU
///< @var operation
///< @brief @see rocprofiler_kfd_unmap_from_gpu_operation_t
} rocprofiler_buffer_tracing_kfd_event_unmap_from_gpu_record_t;
/**
* @brief ROCProfiler Buffer Dropped events event record, for when KFD reports
* that it has dropped some events.
*/
typedef struct rocprofiler_buffer_tracing_kfd_event_dropped_events_record_t
{
uint64_t size; ///< Size of this struct
rocprofiler_buffer_tracing_kind_t kind;
rocprofiler_kfd_event_dropped_events_operation_t operation;
rocprofiler_timestamp_t timestamp; ///< Timestamp of the event as reported by KFD
uint32_t pid; ///< PID of the process as reported by KFD
uint32_t count; ///< Number of records that KFD dropped
///< @var kind
///< @brief ::ROCPROFILER_BUFFER_TRACING_KFD_EVENT_DROPPED_EVENTS
///< @var operation
///< @brief @see rocprofiler_kfd_event_dropped_events_operation_t
} rocprofiler_buffer_tracing_kfd_event_dropped_events_record_t;
/**
* @brief ROCProfiler Buffer Page Migration (paired) record from KFD.
*/
typedef struct rocprofiler_buffer_tracing_kfd_page_migrate_record_t
{
uint64_t size; ///< Size of this struct
rocprofiler_buffer_tracing_kind_t kind; ///< ::ROCPROFILER_BUFFER_TRACING_KFD_PAGE_MIGRATE
rocprofiler_kfd_page_migrate_operation_t operation;
rocprofiler_timestamp_t start_timestamp; ///< Start timestamp as reported by KFD
rocprofiler_timestamp_t end_timestamp; ///< End timestamp as reported by KFD
uint32_t pid; ///< PID of the process as reported by KFD
rocprofiler_address_t start_address; ///< Start address of the memory range being migrated
rocprofiler_address_t end_address; ///< End address of the memory range being migrated
rocprofiler_agent_id_t src_agent; ///< Source agent from which pages were migrated
rocprofiler_agent_id_t dst_agent; ///< Destination agent to which pages were migrated
rocprofiler_agent_id_t prefetch_agent;
rocprofiler_agent_id_t preferred_agent;
int32_t error_code; ///< Non-zero codes are errors, as reported by KFD
///< @var operation
///< @brief @see rocprofiler_kfd_page_migrate_operation_t
///< @var prefetch_agent
///< @brief Agent to which memory is to be prefetched.
///< @var preferred_agent
///< @brief Preferred location for this memory
} rocprofiler_buffer_tracing_kfd_page_migrate_record_t;
/**
* @brief ROCProfiler Buffer Page Fault (paired) record from KFD.
*/
typedef struct rocprofiler_buffer_tracing_kfd_page_fault_record_t
{
uint64_t size; ///< Size of this struct
rocprofiler_buffer_tracing_kind_t kind; ///< ::ROCPROFILER_BUFFER_TRACING_KFD_PAGE_FAULT
rocprofiler_kfd_page_fault_operation_t operation;
rocprofiler_timestamp_t start_timestamp; ///< Start timestamp as reported by KFD
rocprofiler_timestamp_t end_timestamp; ///< End timestamp as reported by KFD
uint32_t pid; ///< PID of the process as reported by KFD
rocprofiler_agent_id_t agent_id; ///< Agent ID which generated the fault
rocprofiler_address_t address; ///< Memory access that generated the page fault
///< @var operation
///< @brief @see rocprofiler_kfd_page_fault_operation_t
} rocprofiler_buffer_tracing_kfd_page_fault_record_t;
/**
* @brief ROCProfiler Buffer Queue suspend (paired) record from KFD.
*/
typedef struct rocprofiler_buffer_tracing_kfd_queue_record_t
{
uint64_t size; ///< Size of this struct
rocprofiler_buffer_tracing_kind_t kind; ///< ::ROCPROFILER_BUFFER_TRACING_KFD_QUEUE
rocprofiler_kfd_queue_operation_t operation; ///< @see rocprofiler_kfd_queue_operation_t
rocprofiler_timestamp_t start_timestamp; ///< Start timestamp as reported by KFD
rocprofiler_timestamp_t end_timestamp; ///< End timestamp as reported by KFD
uint32_t pid; ///< PID of the process as reported by KFD
rocprofiler_agent_id_t agent_id; ///< Agent ID on which this event occurred
} rocprofiler_buffer_tracing_kfd_queue_record_t;
/**
* @brief ROCProfiler Buffer Scratch Memory Tracer Record
@@ -1195,10 +1195,6 @@ ROCPROFILER_ENUM_INFO(rocprofiler_buffer_policy_t,
ROCPROFILER_BUFFER_POLICY_NONE,
ROCPROFILER_BUFFER_POLICY_LAST,
false);
ROCPROFILER_ENUM_INFO(rocprofiler_page_migration_operation_t,
ROCPROFILER_PAGE_MIGRATION_NONE,
ROCPROFILER_PAGE_MIGRATION_LAST,
false);
ROCPROFILER_ENUM_INFO(rocprofiler_scratch_memory_operation_t,
ROCPROFILER_SCRATCH_MEMORY_NONE,
ROCPROFILER_SCRATCH_MEMORY_LAST,
@@ -1243,19 +1239,40 @@ ROCPROFILER_ENUM_INFO(rocprofiler_pc_sampling_record_kind_t,
ROCPROFILER_PC_SAMPLING_RECORD_LAST,
false);
// page-migration
ROCPROFILER_ENUM_INFO(rocprofiler_page_migration_trigger_t,
ROCPROFILER_PAGE_MIGRATION_TRIGGER_NONE,
ROCPROFILER_PAGE_MIGRATION_TRIGGER_LAST,
// kfd/kfd_id.h
ROCPROFILER_ENUM_INFO(rocprofiler_kfd_event_page_migrate_operation_t,
ROCPROFILER_KFD_EVENT_PAGE_MIGRATE_NONE,
ROCPROFILER_KFD_EVENT_PAGE_MIGRATE_LAST,
false);
ROCPROFILER_ENUM_INFO(rocprofiler_page_migration_queue_suspend_trigger_t,
ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_NONE,
ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_LAST,
ROCPROFILER_ENUM_INFO(rocprofiler_kfd_event_page_fault_operation_t,
ROCPROFILER_KFD_EVENT_PAGE_FAULT_NONE,
ROCPROFILER_KFD_EVENT_PAGE_FAULT_LAST,
false);
ROCPROFILER_ENUM_INFO(rocprofiler_page_migration_unmap_from_gpu_trigger_t,
ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_NONE,
ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_LAST,
ROCPROFILER_ENUM_INFO(rocprofiler_kfd_event_queue_operation_t,
ROCPROFILER_KFD_EVENT_QUEUE_NONE,
ROCPROFILER_KFD_EVENT_QUEUE_LAST,
false);
ROCPROFILER_ENUM_INFO(rocprofiler_kfd_event_unmap_from_gpu_operation_t,
ROCPROFILER_KFD_EVENT_UNMAP_FROM_GPU_NONE,
ROCPROFILER_KFD_EVENT_UNMAP_FROM_GPU_LAST,
false);
ROCPROFILER_ENUM_INFO(rocprofiler_kfd_event_dropped_events_operation_t,
ROCPROFILER_KFD_EVENT_DROPPED_EVENTS_NONE,
ROCPROFILER_KFD_EVENT_DROPPED_EVENTS_LAST,
false);
ROCPROFILER_ENUM_INFO(rocprofiler_kfd_page_migrate_operation_t,
ROCPROFILER_KFD_PAGE_MIGRATE_NONE,
ROCPROFILER_KFD_PAGE_MIGRATE_LAST,
false);
ROCPROFILER_ENUM_INFO(rocprofiler_kfd_page_fault_operation_t,
ROCPROFILER_KFD_PAGE_FAULT_NONE,
ROCPROFILER_KFD_PAGE_FAULT_LAST,
false);
ROCPROFILER_ENUM_INFO(rocprofiler_kfd_queue_operation_t,
ROCPROFILER_KFD_QUEUE_NONE,
ROCPROFILER_KFD_QUEUE_LAST,
false);
ROCPROFILER_ENUM_INFO(rocprofiler_external_correlation_id_request_kind_t,
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_NONE,
ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_LAST,
@@ -1404,7 +1421,6 @@ ROCPROFILER_ENUM_LABEL(ROCPROFILER_BUFFER_TRACING_MARKER_CONTROL_API);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_BUFFER_TRACING_MARKER_NAME_API);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_BUFFER_TRACING_MEMORY_COPY);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_BUFFER_TRACING_SCRATCH_MEMORY);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_BUFFER_TRACING_CORRELATION_ID_RETIREMENT);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_BUFFER_TRACING_RCCL_API);
@@ -1417,7 +1433,15 @@ ROCPROFILER_ENUM_LABEL(ROCPROFILER_BUFFER_TRACING_HIP_STREAM);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API_EXT);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API_EXT);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_BUFFER_TRACING_ROCDECODE_API_EXT);
static_assert(ROCPROFILER_BUFFER_TRACING_LAST == 25);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_BUFFER_TRACING_KFD_EVENT_PAGE_MIGRATE);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_BUFFER_TRACING_KFD_EVENT_PAGE_FAULT);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_BUFFER_TRACING_KFD_EVENT_QUEUE);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_BUFFER_TRACING_KFD_EVENT_UNMAP_FROM_GPU);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_BUFFER_TRACING_KFD_EVENT_DROPPED_EVENTS);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_BUFFER_TRACING_KFD_PAGE_MIGRATE);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_BUFFER_TRACING_KFD_PAGE_FAULT);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_BUFFER_TRACING_KFD_QUEUE);
static_assert(ROCPROFILER_BUFFER_TRACING_LAST == 32);
// rocprofiler_code_object_operation_t
ROCPROFILER_ENUM_LABEL(ROCPROFILER_CODE_OBJECT_NONE);
@@ -1467,18 +1491,6 @@ ROCPROFILER_ENUM_LABEL(ROCPROFILER_BUFFER_POLICY_DISCARD);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_BUFFER_POLICY_LOSSLESS);
static_assert(ROCPROFILER_BUFFER_POLICY_LAST == 3);
// rocprofiler_page_migration_operation_t
ROCPROFILER_ENUM_LABEL(ROCPROFILER_PAGE_MIGRATION_NONE);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_START);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_END);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT_START);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT_END);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_PAGE_MIGRATION_QUEUE_EVICTION);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_PAGE_MIGRATION_QUEUE_RESTORE);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_PAGE_MIGRATION_DROPPED_EVENT);
static_assert(ROCPROFILER_PAGE_MIGRATION_LAST == 9);
// rocprofiler_scratch_memory_operation_t
ROCPROFILER_ENUM_LABEL(ROCPROFILER_SCRATCH_MEMORY_NONE);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_SCRATCH_MEMORY_ALLOC);
@@ -1546,23 +1558,65 @@ ROCPROFILER_ENUM_LABEL(ROCPROFILER_PC_SAMPLING_RECORD_HOST_TRAP_V0_SAMPLE);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_PC_SAMPLING_RECORD_STOCHASTIC_V0_SAMPLE);
static_assert(ROCPROFILER_PC_SAMPLING_RECORD_LAST == 4);
// page-migration
ROCPROFILER_ENUM_LABEL(ROCPROFILER_PAGE_MIGRATION_TRIGGER_PREFETCH);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_PAGE_MIGRATION_TRIGGER_PAGEFAULT_GPU);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_PAGE_MIGRATION_TRIGGER_PAGEFAULT_CPU);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_PAGE_MIGRATION_TRIGGER_TTM_EVICTION);
static_assert(ROCPROFILER_PAGE_MIGRATION_TRIGGER_LAST == 4);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_SVM);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_USERPTR);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_TTM);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_SUSPEND);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_CRIU_CHECKPOINT);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_CRIU_RESTORE);
static_assert(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_LAST == 6);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_MMU_NOTIFY);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_MMU_NOTIFY_MIGRATE);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_UNMAP_FROM_CPU);
static_assert(ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_LAST == 3);
// kfd events
ROCPROFILER_ENUM_LABEL(ROCPROFILER_KFD_EVENT_PAGE_MIGRATE_PREFETCH);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_KFD_EVENT_PAGE_MIGRATE_PAGEFAULT_GPU);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_KFD_EVENT_PAGE_MIGRATE_PAGEFAULT_CPU);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_KFD_EVENT_PAGE_MIGRATE_TTM_EVICTION);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_KFD_EVENT_PAGE_MIGRATE_END);
static_assert(ROCPROFILER_KFD_EVENT_PAGE_MIGRATE_LAST == 5);
// rocprofiler_kfd_event_page_fault_operation_t
ROCPROFILER_ENUM_LABEL(ROCPROFILER_KFD_EVENT_PAGE_FAULT_START);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_KFD_EVENT_PAGE_FAULT_START_READ_FAULT);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_KFD_EVENT_PAGE_FAULT_START_WRITE_FAULT);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_KFD_EVENT_PAGE_FAULT_END_PAGE_MIGRATED);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_KFD_EVENT_PAGE_FAULT_END_PAGE_UPDATED);
static_assert(ROCPROFILER_KFD_EVENT_PAGE_FAULT_LAST == 5);
// rocprofiler_kfd_event_queue_operation_t
ROCPROFILER_ENUM_LABEL(ROCPROFILER_KFD_EVENT_QUEUE_EVICT_SVM);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_KFD_EVENT_QUEUE_EVICT_USERPTR);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_KFD_EVENT_QUEUE_EVICT_TTM);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_KFD_EVENT_QUEUE_EVICT_SUSPEND);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_KFD_EVENT_QUEUE_EVICT_CRIU_CHECKPOINT);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_KFD_EVENT_QUEUE_EVICT_CRIU_RESTORE);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_KFD_EVENT_QUEUE_RESTORE_RESCHEDULED);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_KFD_EVENT_QUEUE_RESTORE);
static_assert(ROCPROFILER_KFD_EVENT_QUEUE_LAST == 8);
// rocprofiler_kfd_event_unmap_from_gpu_operation_t
ROCPROFILER_ENUM_LABEL(ROCPROFILER_KFD_EVENT_UNMAP_FROM_GPU_MMU_NOTIFY);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_KFD_EVENT_UNMAP_FROM_GPU_MMU_NOTIFY_MIGRATE);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_KFD_EVENT_UNMAP_FROM_GPU_UNMAP_FROM_CPU);
static_assert(ROCPROFILER_KFD_EVENT_UNMAP_FROM_GPU_LAST == 3);
// rocprofiler_kfd_event_dropped_events_operation_t
ROCPROFILER_ENUM_LABEL(ROCPROFILER_KFD_EVENT_DROPPED_EVENTS);
static_assert(ROCPROFILER_KFD_EVENT_DROPPED_EVENTS_LAST == 1);
// rocprofiler_kfd_page_migrate_operation_t
ROCPROFILER_ENUM_LABEL(ROCPROFILER_KFD_PAGE_MIGRATE_PREFETCH);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_KFD_PAGE_MIGRATE_PAGEFAULT_GPU);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_KFD_PAGE_MIGRATE_PAGEFAULT_CPU);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_KFD_PAGE_MIGRATE_TTM_EVICTION);
static_assert(ROCPROFILER_KFD_PAGE_MIGRATE_LAST == 4);
// rocprofiler_kfd_page_fault_operation_t
ROCPROFILER_ENUM_LABEL(ROCPROFILER_KFD_PAGE_FAULT_READ_FAULT_MIGRATED);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_KFD_PAGE_FAULT_READ_FAULT_UPDATED);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_KFD_PAGE_FAULT_WRITE_FAULT_MIGRATED);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_KFD_PAGE_FAULT_WRITE_FAULT_UPDATED);
static_assert(ROCPROFILER_KFD_PAGE_FAULT_LAST == 4);
// rocprofiler_kfd_queue_operation_t
ROCPROFILER_ENUM_LABEL(ROCPROFILER_KFD_QUEUE_EVICT_SVM);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_KFD_QUEUE_EVICT_USERPTR);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_KFD_QUEUE_EVICT_TTM);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_KFD_QUEUE_EVICT_SUSPEND);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_KFD_QUEUE_EVICT_CRIU_CHECKPOINT);
ROCPROFILER_ENUM_LABEL(ROCPROFILER_KFD_QUEUE_EVICT_CRIU_RESTORE);
static_assert(ROCPROFILER_KFD_QUEUE_LAST == 6);
// rocprofiler_external_correlation_id_request_kind_t
ROCPROFILER_ENUM_LABEL(ROCPROFILER_EXTERNAL_CORRELATION_REQUEST_NONE);
@@ -101,6 +101,7 @@ ROCPROFILER_DEFINE_CATEGORY(category, memory_allocation, "Memory Allocation")
ROCPROFILER_DEFINE_CATEGORY(category, rocdecode_api, "rocDecode API function")
ROCPROFILER_DEFINE_CATEGORY(category, rocjpeg_api, "rocJPEG API function")
ROCPROFILER_DEFINE_CATEGORY(category, counter_collection, "Counter Collection")
ROCPROFILER_DEFINE_CATEGORY(category, kfd_events, "KFD events collection")
ROCPROFILER_DEFINE_CATEGORY(category, none, "Unknown category")
#define ROCPROFILER_PERFETTO_CATEGORIES \
@@ -215,7 +216,6 @@ ROCPROFILER_PERFETTO_BUFFER_TRACING_CATEGORY(MARKER_NAME_API, marker_api)
ROCPROFILER_PERFETTO_BUFFER_TRACING_CATEGORY(MEMORY_COPY, memory_copy)
ROCPROFILER_PERFETTO_BUFFER_TRACING_CATEGORY(MEMORY_ALLOCATION, memory_allocation)
ROCPROFILER_PERFETTO_BUFFER_TRACING_CATEGORY(KERNEL_DISPATCH, kernel_dispatch)
ROCPROFILER_PERFETTO_BUFFER_TRACING_CATEGORY(PAGE_MIGRATION, none)
ROCPROFILER_PERFETTO_BUFFER_TRACING_CATEGORY(SCRATCH_MEMORY, memory_allocation)
ROCPROFILER_PERFETTO_BUFFER_TRACING_CATEGORY(CORRELATION_ID_RETIREMENT, none)
ROCPROFILER_PERFETTO_BUFFER_TRACING_CATEGORY(RCCL_API, rccl_api)
@@ -227,6 +227,14 @@ ROCPROFILER_PERFETTO_BUFFER_TRACING_CATEGORY(HIP_STREAM, hip_api)
ROCPROFILER_PERFETTO_BUFFER_TRACING_CATEGORY(HIP_RUNTIME_API_EXT, hip_api)
ROCPROFILER_PERFETTO_BUFFER_TRACING_CATEGORY(HIP_COMPILER_API_EXT, hip_api)
ROCPROFILER_PERFETTO_BUFFER_TRACING_CATEGORY(ROCDECODE_API_EXT, rocdecode_api)
ROCPROFILER_PERFETTO_BUFFER_TRACING_CATEGORY(KFD_EVENT_PAGE_MIGRATE, kfd_events)
ROCPROFILER_PERFETTO_BUFFER_TRACING_CATEGORY(KFD_EVENT_PAGE_FAULT, kfd_events)
ROCPROFILER_PERFETTO_BUFFER_TRACING_CATEGORY(KFD_EVENT_QUEUE, kfd_events)
ROCPROFILER_PERFETTO_BUFFER_TRACING_CATEGORY(KFD_EVENT_UNMAP_FROM_GPU, kfd_events)
ROCPROFILER_PERFETTO_BUFFER_TRACING_CATEGORY(KFD_EVENT_DROPPED_EVENTS, kfd_events)
ROCPROFILER_PERFETTO_BUFFER_TRACING_CATEGORY(KFD_PAGE_MIGRATE, kfd_events)
ROCPROFILER_PERFETTO_BUFFER_TRACING_CATEGORY(KFD_PAGE_FAULT, kfd_events)
ROCPROFILER_PERFETTO_BUFFER_TRACING_CATEGORY(KFD_QUEUE, kfd_events)
ROCPROFILER_PERFETTO_CALLBACK_TRACING_CATEGORY(NONE, none)
ROCPROFILER_PERFETTO_CALLBACK_TRACING_CATEGORY(HSA_CORE_API, hsa_api)
@@ -729,140 +729,109 @@ save(ArchiveT& ar, rocprofiler_buffer_tracing_memory_allocation_record_t data)
template <typename ArchiveT>
void
save(ArchiveT& ar, const rocprofiler_page_migration_page_fault_start_t& data)
save(ArchiveT& ar, rocprofiler_buffer_tracing_kfd_event_page_migrate_record_t data)
{
ROCP_SDK_SAVE_DATA_BITFIELD("read_fault", read_fault);
ROCP_SDK_SAVE_DATA_FIELD(agent_id);
ROCP_SDK_SAVE_DATA_FIELD(address);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, const rocprofiler_page_migration_page_fault_end_t& data)
{
ROCP_SDK_SAVE_DATA_BITFIELD("migrated", migrated);
ROCP_SDK_SAVE_DATA_FIELD(agent_id);
ROCP_SDK_SAVE_DATA_FIELD(address);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, const rocprofiler_page_migration_page_migrate_start_t& data)
{
ROCP_SDK_SAVE_DATA_FIELD(start_addr);
ROCP_SDK_SAVE_DATA_FIELD(end_addr);
ROCP_SDK_SAVE_DATA_FIELD(from_agent);
ROCP_SDK_SAVE_DATA_FIELD(to_agent);
ROCP_SDK_SAVE_DATA_FIELD(size);
ROCP_SDK_SAVE_DATA_FIELD(operation);
ROCP_SDK_SAVE_DATA_FIELD(timestamp);
ROCP_SDK_SAVE_DATA_FIELD(pid);
ROCP_SDK_SAVE_DATA_FIELD(start_address);
ROCP_SDK_SAVE_DATA_FIELD(end_address);
ROCP_SDK_SAVE_DATA_FIELD(src_agent);
ROCP_SDK_SAVE_DATA_FIELD(dst_agent);
ROCP_SDK_SAVE_DATA_FIELD(prefetch_agent);
ROCP_SDK_SAVE_DATA_FIELD(preferred_agent);
ROCP_SDK_SAVE_DATA_FIELD(trigger);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, const rocprofiler_page_migration_page_migrate_end_t& data)
{
ROCP_SDK_SAVE_DATA_FIELD(start_addr);
ROCP_SDK_SAVE_DATA_FIELD(end_addr);
ROCP_SDK_SAVE_DATA_FIELD(from_agent);
ROCP_SDK_SAVE_DATA_FIELD(to_agent);
ROCP_SDK_SAVE_DATA_FIELD(trigger);
ROCP_SDK_SAVE_DATA_FIELD(error_code);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, const rocprofiler_page_migration_queue_eviction_t& data)
{
ROCP_SDK_SAVE_DATA_FIELD(agent_id);
ROCP_SDK_SAVE_DATA_FIELD(trigger);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, const rocprofiler_page_migration_queue_restore_t& data)
{
ROCP_SDK_SAVE_DATA_BITFIELD("rescheduled", rescheduled);
ROCP_SDK_SAVE_DATA_FIELD(agent_id);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, const rocprofiler_page_migration_unmap_from_gpu_t& data)
{
ROCP_SDK_SAVE_DATA_FIELD(start_addr);
ROCP_SDK_SAVE_DATA_FIELD(end_addr);
ROCP_SDK_SAVE_DATA_FIELD(agent_id);
ROCP_SDK_SAVE_DATA_FIELD(trigger);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, const rocprofiler_page_migration_dropped_event_t& data)
{
ROCP_SDK_SAVE_DATA_FIELD(dropped_events_count);
}
namespace details
{
template <size_t Idx>
struct save_page_migration_arg;
#define ROCP_SDK_SPECIALIZE_PAGE_MIGRATION_ARG(ENUM_VALUE, UNION_ARG) \
template <> \
struct save_page_migration_arg<ROCPROFILER_PAGE_MIGRATION_##ENUM_VALUE> \
{ \
static constexpr auto value = ROCPROFILER_PAGE_MIGRATION_##ENUM_VALUE; \
template <typename ArchiveT> \
void operator()(ArchiveT& ar, rocprofiler_page_migration_args_t args) \
{ \
ar(make_nvp(#UNION_ARG, args.UNION_ARG)); \
} \
};
ROCP_SDK_SPECIALIZE_PAGE_MIGRATION_ARG(NONE, none)
ROCP_SDK_SPECIALIZE_PAGE_MIGRATION_ARG(PAGE_MIGRATE_START, page_migrate_start)
ROCP_SDK_SPECIALIZE_PAGE_MIGRATION_ARG(PAGE_MIGRATE_END, page_migrate_end)
ROCP_SDK_SPECIALIZE_PAGE_MIGRATION_ARG(PAGE_FAULT_START, page_fault_start)
ROCP_SDK_SPECIALIZE_PAGE_MIGRATION_ARG(PAGE_FAULT_END, page_fault_end)
ROCP_SDK_SPECIALIZE_PAGE_MIGRATION_ARG(QUEUE_EVICTION, queue_eviction)
ROCP_SDK_SPECIALIZE_PAGE_MIGRATION_ARG(QUEUE_RESTORE, queue_restore)
ROCP_SDK_SPECIALIZE_PAGE_MIGRATION_ARG(UNMAP_FROM_GPU, unmap_from_gpu)
ROCP_SDK_SPECIALIZE_PAGE_MIGRATION_ARG(DROPPED_EVENT, dropped_event)
#undef ROCP_SDK_SPECIALIZE_PAGE_MIGRATION_ARG
template <typename ArchiveT, size_t Idx, size_t... IdxTail>
void
save_page_migration_args(ArchiveT& ar,
rocprofiler_page_migration_operation_t op,
rocprofiler_page_migration_args_t args,
std::index_sequence<Idx, IdxTail...>)
{
using save_page_migration_type = save_page_migration_arg<Idx>;
if(op == save_page_migration_type::value)
{
if constexpr(save_page_migration_type::value != ROCPROFILER_PAGE_MIGRATION_NONE)
save_page_migration_type{}(ar, args);
}
else if constexpr(sizeof...(IdxTail) > 0)
{
save_page_migration_args(ar, op, args, std::index_sequence<IdxTail...>{});
}
}
} // namespace details
template <typename ArchiveT>
void
save(ArchiveT& ar, const rocprofiler_buffer_tracing_page_migration_record_t& data)
save(ArchiveT& ar, rocprofiler_buffer_tracing_kfd_event_page_fault_record_t data)
{
ROCP_SDK_SAVE_DATA_FIELD(size);
ROCP_SDK_SAVE_DATA_FIELD(kind);
ROCP_SDK_SAVE_DATA_FIELD(operation);
ROCP_SDK_SAVE_DATA_FIELD(timestamp);
ROCP_SDK_SAVE_DATA_FIELD(pid);
details::save_page_migration_args(
ar, data.operation, data.args, std::make_index_sequence<ROCPROFILER_PAGE_MIGRATION_LAST>{});
ROCP_SDK_SAVE_DATA_FIELD(agent_id);
ROCP_SDK_SAVE_DATA_FIELD(address);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, rocprofiler_buffer_tracing_kfd_event_queue_record_t data)
{
ROCP_SDK_SAVE_DATA_FIELD(size);
ROCP_SDK_SAVE_DATA_FIELD(operation);
ROCP_SDK_SAVE_DATA_FIELD(timestamp);
ROCP_SDK_SAVE_DATA_FIELD(pid);
ROCP_SDK_SAVE_DATA_FIELD(agent_id);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, rocprofiler_buffer_tracing_kfd_event_unmap_from_gpu_record_t data)
{
ROCP_SDK_SAVE_DATA_FIELD(size);
ROCP_SDK_SAVE_DATA_FIELD(operation);
ROCP_SDK_SAVE_DATA_FIELD(timestamp);
ROCP_SDK_SAVE_DATA_FIELD(pid);
ROCP_SDK_SAVE_DATA_FIELD(agent_id);
ROCP_SDK_SAVE_DATA_FIELD(start_address);
ROCP_SDK_SAVE_DATA_FIELD(end_address);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, rocprofiler_buffer_tracing_kfd_event_dropped_events_record_t data)
{
ROCP_SDK_SAVE_DATA_FIELD(size);
ROCP_SDK_SAVE_DATA_FIELD(operation);
ROCP_SDK_SAVE_DATA_FIELD(timestamp);
ROCP_SDK_SAVE_DATA_FIELD(pid);
ROCP_SDK_SAVE_DATA_FIELD(count);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, rocprofiler_buffer_tracing_kfd_page_migrate_record_t data)
{
ROCP_SDK_SAVE_DATA_FIELD(size);
ROCP_SDK_SAVE_DATA_FIELD(operation);
ROCP_SDK_SAVE_DATA_FIELD(start_timestamp);
ROCP_SDK_SAVE_DATA_FIELD(end_timestamp);
ROCP_SDK_SAVE_DATA_FIELD(pid);
ROCP_SDK_SAVE_DATA_FIELD(start_address);
ROCP_SDK_SAVE_DATA_FIELD(end_address);
ROCP_SDK_SAVE_DATA_FIELD(src_agent);
ROCP_SDK_SAVE_DATA_FIELD(dst_agent);
ROCP_SDK_SAVE_DATA_FIELD(prefetch_agent);
ROCP_SDK_SAVE_DATA_FIELD(preferred_agent);
ROCP_SDK_SAVE_DATA_FIELD(error_code);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, rocprofiler_buffer_tracing_kfd_page_fault_record_t data)
{
ROCP_SDK_SAVE_DATA_FIELD(size);
ROCP_SDK_SAVE_DATA_FIELD(operation);
ROCP_SDK_SAVE_DATA_FIELD(start_timestamp);
ROCP_SDK_SAVE_DATA_FIELD(end_timestamp);
ROCP_SDK_SAVE_DATA_FIELD(pid);
ROCP_SDK_SAVE_DATA_FIELD(agent_id);
ROCP_SDK_SAVE_DATA_FIELD(address);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, rocprofiler_buffer_tracing_kfd_queue_record_t data)
{
ROCP_SDK_SAVE_DATA_FIELD(size);
ROCP_SDK_SAVE_DATA_FIELD(operation);
ROCP_SDK_SAVE_DATA_FIELD(start_timestamp);
ROCP_SDK_SAVE_DATA_FIELD(end_timestamp);
ROCP_SDK_SAVE_DATA_FIELD(pid);
ROCP_SDK_SAVE_DATA_FIELD(agent_id);
}
template <typename ArchiveT>
@@ -196,7 +196,6 @@ typedef enum rocprofiler_buffer_tracing_kind_t // NOLINT(performance-enum-size)
ROCPROFILER_BUFFER_TRACING_MARKER_NAME_API, ///< @see ::rocprofiler_marker_name_api_id_t
ROCPROFILER_BUFFER_TRACING_MEMORY_COPY, ///< @see ::rocprofiler_memory_copy_operation_t
ROCPROFILER_BUFFER_TRACING_KERNEL_DISPATCH, ///< Buffer kernel dispatch info
ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION, ///< Buffer page migration info
ROCPROFILER_BUFFER_TRACING_SCRATCH_MEMORY, ///< Buffer scratch memory reclaimation info
ROCPROFILER_BUFFER_TRACING_CORRELATION_ID_RETIREMENT, ///< Correlation ID in no longer in use
ROCPROFILER_BUFFER_TRACING_RCCL_API, ///< RCCL tracing
@@ -212,6 +211,20 @@ typedef enum rocprofiler_buffer_tracing_kind_t // NOLINT(performance-enum-size)
ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API_EXT,
ROCPROFILER_BUFFER_TRACING_HIP_COMPILER_API_EXT,
ROCPROFILER_BUFFER_TRACING_ROCDECODE_API_EXT,
ROCPROFILER_BUFFER_TRACING_KFD_EVENT_PAGE_MIGRATE, ///< @see
///< rocprofiler_kfd_event_page_migrate_operation_t
ROCPROFILER_BUFFER_TRACING_KFD_EVENT_PAGE_FAULT, ///< @see
///< rocprofiler_kfd_event_page_fault_operation_t
ROCPROFILER_BUFFER_TRACING_KFD_EVENT_QUEUE, ///< @see rocprofiler_kfd_event_queue_operation_t
ROCPROFILER_BUFFER_TRACING_KFD_EVENT_UNMAP_FROM_GPU, ///< @see
///< rocprofiler_kfd_event_unmap_from_gpu_operation_t
ROCPROFILER_BUFFER_TRACING_KFD_EVENT_DROPPED_EVENTS, ///< @see
///< rocprofiler_kfd_event_dropped_events_operation_t
ROCPROFILER_BUFFER_TRACING_KFD_PAGE_MIGRATE, ///< @see rocprofiler_kfd_page_migrate_operation_t
ROCPROFILER_BUFFER_TRACING_KFD_PAGE_FAULT, ///< @see rocprofiler_kfd_page_fault_operation_t
ROCPROFILER_BUFFER_TRACING_KFD_QUEUE, ///< @see rocprofiler_kfd_queue_operation_t
ROCPROFILER_BUFFER_TRACING_LAST,
/// @var ROCPROFILER_BUFFER_TRACING_HIP_RUNTIME_API_EXT
@@ -361,23 +374,6 @@ typedef enum rocprofiler_buffer_policy_t // NOLINT(performance-enum-size)
ROCPROFILER_BUFFER_POLICY_LAST,
} rocprofiler_buffer_policy_t;
/**
* @brief Page migration event.
*/
typedef enum rocprofiler_page_migration_operation_t // NOLINT(performance-enum-size)
{
ROCPROFILER_PAGE_MIGRATION_NONE = 0, ///< Unknown event
ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_START,
ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_END,
ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT_START,
ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT_END,
ROCPROFILER_PAGE_MIGRATION_QUEUE_EVICTION,
ROCPROFILER_PAGE_MIGRATION_QUEUE_RESTORE,
ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU,
ROCPROFILER_PAGE_MIGRATION_DROPPED_EVENT,
ROCPROFILER_PAGE_MIGRATION_LAST,
} rocprofiler_page_migration_operation_t;
/**
* @brief Scratch event kind
*/
@@ -3,7 +3,7 @@
# Installation of public KFD headers
#
#
set(ROCPROFILER_KFD_HEADER_FILES page_migration_args.h page_migration_id.h)
set(ROCPROFILER_KFD_HEADER_FILES kfd_id.h)
install(
FILES ${ROCPROFILER_KFD_HEADER_FILES}
@@ -0,0 +1,159 @@
// MIT License
//
// Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#pragma once
#include <rocprofiler-sdk/defines.h>
#include <rocprofiler-sdk/hsa.h>
#include <rocprofiler-sdk/hsa/api_trace_version.h>
#include <rocprofiler-sdk/version.h>
#include <stdint.h>
ROCPROFILER_EXTERN_C_INIT
/**
* @brief Page migration event operations. @see
* rocprofiler_buffer_tracing_kfd_event_page_migrate_record_t
*/
typedef enum rocprofiler_kfd_event_page_migrate_operation_t
{
ROCPROFILER_KFD_EVENT_PAGE_MIGRATE_NONE = -1,
ROCPROFILER_KFD_EVENT_PAGE_MIGRATE_PREFETCH, ///< Migration triggered by a prefetch
ROCPROFILER_KFD_EVENT_PAGE_MIGRATE_PAGEFAULT_GPU, ///< Migration triggered by a page fault on
///< the GPU
ROCPROFILER_KFD_EVENT_PAGE_MIGRATE_PAGEFAULT_CPU, ///< Migration triggered by a page fault on
///< the CPU
ROCPROFILER_KFD_EVENT_PAGE_MIGRATE_TTM_EVICTION, ///< Page evicted by linux TTM (Translation
///< Table Manager)
ROCPROFILER_KFD_EVENT_PAGE_MIGRATE_END,
ROCPROFILER_KFD_EVENT_PAGE_MIGRATE_LAST,
} rocprofiler_kfd_event_page_migrate_operation_t;
/**
* @brief Page fault event operations. @see rocprofiler_buffer_tracing_kfd_event_page_fault_record_t
*/
typedef enum rocprofiler_kfd_event_page_fault_operation_t
{
ROCPROFILER_KFD_EVENT_PAGE_FAULT_NONE = -1,
ROCPROFILER_KFD_EVENT_PAGE_FAULT_START,
ROCPROFILER_KFD_EVENT_PAGE_FAULT_START_READ_FAULT, ///< Page fault was due to a read operation
ROCPROFILER_KFD_EVENT_PAGE_FAULT_START_WRITE_FAULT, ///< Page fault was due to a write
///< operation
ROCPROFILER_KFD_EVENT_PAGE_FAULT_END_PAGE_MIGRATED, ///< Fault resolved through a migration
ROCPROFILER_KFD_EVENT_PAGE_FAULT_END_PAGE_UPDATED, ///< Fault resolved through an update
ROCPROFILER_KFD_EVENT_PAGE_FAULT_LAST,
} rocprofiler_kfd_event_page_fault_operation_t;
/**
* @brief Queue evict/restore event operations. @see
* rocprofiler_buffer_tracing_kfd_event_queue_record_t
*/
typedef enum rocprofiler_kfd_event_queue_operation_t
{
ROCPROFILER_KFD_EVENT_QUEUE_NONE = -1,
ROCPROFILER_KFD_EVENT_QUEUE_EVICT_SVM, ///< SVM Buffer migration
ROCPROFILER_KFD_EVENT_QUEUE_EVICT_USERPTR, ///< userptr movement
ROCPROFILER_KFD_EVENT_QUEUE_EVICT_TTM, ///< TTM move buffer
ROCPROFILER_KFD_EVENT_QUEUE_EVICT_SUSPEND, ///< GPU suspend
ROCPROFILER_KFD_EVENT_QUEUE_EVICT_CRIU_CHECKPOINT, ///< Queues evicted due to process
///< checkpoint by CRIU
ROCPROFILER_KFD_EVENT_QUEUE_EVICT_CRIU_RESTORE, ///< Queues restored during process restore by
///< CRIU
ROCPROFILER_KFD_EVENT_QUEUE_RESTORE_RESCHEDULED, ///< Queue was not restored; will be restored
///< later
ROCPROFILER_KFD_EVENT_QUEUE_RESTORE, ///< Queue was restored
ROCPROFILER_KFD_EVENT_QUEUE_LAST,
} rocprofiler_kfd_event_queue_operation_t;
/**
* @brief Memory unmap from GPU event operations. @see
* rocprofiler_buffer_tracing_kfd_event_unmap_from_gpu_record_t
*/
typedef enum rocprofiler_kfd_event_unmap_from_gpu_operation_t
{
ROCPROFILER_KFD_EVENT_UNMAP_FROM_GPU_NONE = -1,
ROCPROFILER_KFD_EVENT_UNMAP_FROM_GPU_MMU_NOTIFY, ///< MMU notifier CPU buffer movement
ROCPROFILER_KFD_EVENT_UNMAP_FROM_GPU_MMU_NOTIFY_MIGRATE, ///< MMU notifier page migration
ROCPROFILER_KFD_EVENT_UNMAP_FROM_GPU_UNMAP_FROM_CPU, ///< Unmap to free the buffer
ROCPROFILER_KFD_EVENT_UNMAP_FROM_GPU_LAST,
} rocprofiler_kfd_event_unmap_from_gpu_operation_t;
/**
* @brief KFD dropped event operations. @see
* rocprofiler_buffer_tracing_kfd_event_dropped_events_record_t
*/
typedef enum rocprofiler_kfd_event_dropped_events_operation_t
{
ROCPROFILER_KFD_EVENT_DROPPED_EVENTS_NONE = -1,
ROCPROFILER_KFD_EVENT_DROPPED_EVENTS,
ROCPROFILER_KFD_EVENT_DROPPED_EVENTS_LAST,
} rocprofiler_kfd_event_dropped_events_operation_t;
/**
* @brief Operation kinds for @see rocprofiler_buffer_tracing_kfd_page_migrate_record_t
*/
typedef enum rocprofiler_kfd_page_migrate_operation_t
{
ROCPROFILER_KFD_PAGE_MIGRATE_NONE = -1,
ROCPROFILER_KFD_PAGE_MIGRATE_PREFETCH, ///< Migration triggered by a prefetch
ROCPROFILER_KFD_PAGE_MIGRATE_PAGEFAULT_GPU, ///< Migration triggered by a page fault on the
///< GPU
ROCPROFILER_KFD_PAGE_MIGRATE_PAGEFAULT_CPU, ///< Migration triggered by a page fault on the
///< CPU
ROCPROFILER_KFD_PAGE_MIGRATE_TTM_EVICTION, ///< Page evicted by linux TTM (Translation Table
///< Manager)
ROCPROFILER_KFD_PAGE_MIGRATE_LAST,
} rocprofiler_kfd_page_migrate_operation_t;
/**
* @brief Operation kinds for @see rocprofiler_buffer_tracing_kfd_page_fault_record_t
*/
typedef enum rocprofiler_kfd_page_fault_operation_t
{
ROCPROFILER_KFD_PAGE_FAULT_NONE = -1,
ROCPROFILER_KFD_PAGE_FAULT_READ_FAULT_MIGRATED, ///< read fault resolved with a migrate
ROCPROFILER_KFD_PAGE_FAULT_READ_FAULT_UPDATED, ///< read fault resolved with an update
ROCPROFILER_KFD_PAGE_FAULT_WRITE_FAULT_MIGRATED, ///< write fault resolved with an migrate
ROCPROFILER_KFD_PAGE_FAULT_WRITE_FAULT_UPDATED, ///< write fault resolved with an update
ROCPROFILER_KFD_PAGE_FAULT_LAST,
} rocprofiler_kfd_page_fault_operation_t;
/**
* @brief Operation kinds for @see rocprofiler_buffer_tracing_kfd_queue_record_t
*/
typedef enum rocprofiler_kfd_queue_operation_t
{
ROCPROFILER_KFD_QUEUE_NONE = -1,
ROCPROFILER_KFD_QUEUE_EVICT_SVM, ///< SVM Buffer migration
ROCPROFILER_KFD_QUEUE_EVICT_USERPTR, ///< userptr movement
ROCPROFILER_KFD_QUEUE_EVICT_TTM, ///< TTM move buffer
ROCPROFILER_KFD_QUEUE_EVICT_SUSPEND, ///< GPU suspend
ROCPROFILER_KFD_QUEUE_EVICT_CRIU_CHECKPOINT, ///< Queues evicted due to process checkpoint by
///< CRIU
ROCPROFILER_KFD_QUEUE_EVICT_CRIU_RESTORE, ///< Queues restored during process restore by CRIU
ROCPROFILER_KFD_QUEUE_LAST,
} rocprofiler_kfd_queue_operation_t;
ROCPROFILER_EXTERN_C_FINI
@@ -1,111 +0,0 @@
// MIT License
//
// Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#pragma once
#include <rocprofiler-sdk/defines.h>
#include <rocprofiler-sdk/kfd/page_migration_id.h>
#include <stdint.h>
ROCPROFILER_EXTERN_C_INIT
typedef struct rocprofiler_page_migration_none_t
{
char empty;
} rocprofiler_page_migration_none_t;
typedef struct rocprofiler_page_migration_page_migrate_start_t
{
uint64_t start_addr;
uint64_t end_addr;
rocprofiler_agent_id_t from_agent;
rocprofiler_agent_id_t to_agent;
rocprofiler_agent_id_t prefetch_agent;
rocprofiler_agent_id_t preferred_agent;
rocprofiler_page_migration_trigger_t trigger;
} rocprofiler_page_migration_page_migrate_start_t;
typedef struct rocprofiler_page_migration_page_migrate_end_t
{
uint64_t start_addr;
uint64_t end_addr;
rocprofiler_agent_id_t from_agent;
rocprofiler_agent_id_t to_agent;
rocprofiler_page_migration_trigger_t trigger;
int32_t error_code;
} rocprofiler_page_migration_page_migrate_end_t;
typedef struct rocprofiler_page_migration_page_fault_start_t
{
uint32_t read_fault : 1;
rocprofiler_agent_id_t agent_id;
uint64_t address;
} rocprofiler_page_migration_page_fault_start_t;
typedef struct rocprofiler_page_migration_page_fault_end_t
{
uint32_t migrated : 1;
rocprofiler_agent_id_t agent_id;
uint64_t address;
} rocprofiler_page_migration_page_fault_end_t;
typedef struct rocprofiler_page_migration_queue_eviction_t
{
rocprofiler_agent_id_t agent_id;
rocprofiler_page_migration_queue_suspend_trigger_t trigger;
} rocprofiler_page_migration_queue_eviction_t;
typedef struct rocprofiler_page_migration_queue_restore_t
{
uint32_t rescheduled : 1;
rocprofiler_agent_id_t agent_id;
} rocprofiler_page_migration_queue_restore_t;
typedef struct rocprofiler_page_migration_unmap_from_gpu_t
{
uint64_t start_addr;
uint64_t end_addr;
rocprofiler_agent_id_t agent_id;
rocprofiler_page_migration_unmap_from_gpu_trigger_t trigger;
} rocprofiler_page_migration_unmap_from_gpu_t;
typedef struct rocprofiler_page_migration_dropped_event_t
{
uint32_t dropped_events_count;
} rocprofiler_page_migration_dropped_event_t;
typedef union rocprofiler_page_migration_args_t
{
rocprofiler_page_migration_none_t none;
rocprofiler_page_migration_page_migrate_start_t page_migrate_start;
rocprofiler_page_migration_page_migrate_end_t page_migrate_end;
rocprofiler_page_migration_page_fault_start_t page_fault_start;
rocprofiler_page_migration_page_fault_end_t page_fault_end;
rocprofiler_page_migration_queue_eviction_t queue_eviction;
rocprofiler_page_migration_queue_restore_t queue_restore;
rocprofiler_page_migration_unmap_from_gpu_t unmap_from_gpu;
rocprofiler_page_migration_dropped_event_t dropped_event;
uint64_t reserved[16];
} rocprofiler_page_migration_args_t;
ROCPROFILER_EXTERN_C_FINI
@@ -1,82 +0,0 @@
// MIT License
//
// Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#pragma once
#include <rocprofiler-sdk/defines.h>
#include <rocprofiler-sdk/fwd.h>
#include <rocprofiler-sdk/hsa.h>
#include <rocprofiler-sdk/hsa/api_trace_version.h>
#include <stdint.h>
ROCPROFILER_EXTERN_C_INIT
/**
* @brief Page migration triggers
*
*/
typedef enum rocprofiler_page_migration_trigger_t
{
ROCPROFILER_PAGE_MIGRATION_TRIGGER_NONE = -1,
ROCPROFILER_PAGE_MIGRATION_TRIGGER_PREFETCH, ///< Migration triggered by a prefetch
ROCPROFILER_PAGE_MIGRATION_TRIGGER_PAGEFAULT_GPU, ///< Triggered by a page fault on the GPU
ROCPROFILER_PAGE_MIGRATION_TRIGGER_PAGEFAULT_CPU, ///< Triggered by a page fault on the CPU
ROCPROFILER_PAGE_MIGRATION_TRIGGER_TTM_EVICTION, ///< Page evicted by linux TTM (Translation
///< Table Manager)
ROCPROFILER_PAGE_MIGRATION_TRIGGER_LAST,
} rocprofiler_page_migration_trigger_t;
/**
* @brief Page migration triggers causing the queue to suspend
*
*/
typedef enum rocprofiler_page_migration_queue_suspend_trigger_t
{
ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_NONE = -1,
ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_SVM,
ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_USERPTR,
ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_TTM, ///< Queue suspended by TTM (Translation
///< Table Manager) operation
ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_SUSPEND,
ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_CRIU_CHECKPOINT, ///< Queues evicted due to
///< process save
///< (checkpoint) by CRIU
ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_CRIU_RESTORE, ///< Queues restored during
///< process restore by CRIU
ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_LAST,
} rocprofiler_page_migration_queue_suspend_trigger_t;
/**
* @brief Page migration triggers causing an unmap from the GPU
*
*/
typedef enum rocprofiler_page_migration_unmap_from_gpu_trigger_t
{
ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_NONE = -1,
ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_MMU_NOTIFY,
ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_MMU_NOTIFY_MIGRATE,
ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_UNMAP_FROM_CPU,
ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_LAST,
} rocprofiler_page_migration_unmap_from_gpu_trigger_t;
ROCPROFILER_EXTERN_C_FINI
@@ -49,7 +49,7 @@ add_subdirectory(marker)
add_subdirectory(thread_trace)
add_subdirectory(tracing)
add_subdirectory(kernel_dispatch)
add_subdirectory(page_migration)
add_subdirectory(kfd)
add_subdirectory(rccl)
add_subdirectory(rocdecode)
add_subdirectory(rocjpeg)
@@ -30,9 +30,9 @@
#include "lib/rocprofiler-sdk/hsa/memory_allocation.hpp"
#include "lib/rocprofiler-sdk/hsa/scratch_memory.hpp"
#include "lib/rocprofiler-sdk/kernel_dispatch/kernel_dispatch.hpp"
#include "lib/rocprofiler-sdk/kfd/kfd.hpp"
#include "lib/rocprofiler-sdk/marker/marker.hpp"
#include "lib/rocprofiler-sdk/ompt/ompt.hpp"
#include "lib/rocprofiler-sdk/page_migration/page_migration.hpp"
#include "lib/rocprofiler-sdk/rccl/rccl.hpp"
#include "lib/rocprofiler-sdk/registration.hpp"
#include "lib/rocprofiler-sdk/rocdecode/rocdecode.hpp"
@@ -52,6 +52,7 @@
#include <limits>
#include <stdexcept>
#include <string_view>
#include <unordered_set>
#include <vector>
#define RETURN_STATUS_ON_FAIL(...) \
@@ -90,7 +91,6 @@ ROCPROFILER_BUFFER_TRACING_KIND_STRING(MARKER_NAME_API)
ROCPROFILER_BUFFER_TRACING_KIND_STRING(MEMORY_COPY)
ROCPROFILER_BUFFER_TRACING_KIND_STRING(MEMORY_ALLOCATION)
ROCPROFILER_BUFFER_TRACING_KIND_STRING(KERNEL_DISPATCH)
ROCPROFILER_BUFFER_TRACING_KIND_STRING(PAGE_MIGRATION)
ROCPROFILER_BUFFER_TRACING_KIND_STRING(SCRATCH_MEMORY)
ROCPROFILER_BUFFER_TRACING_KIND_STRING(CORRELATION_ID_RETIREMENT)
ROCPROFILER_BUFFER_TRACING_KIND_STRING(RCCL_API)
@@ -102,6 +102,14 @@ ROCPROFILER_BUFFER_TRACING_KIND_STRING(HIP_STREAM)
ROCPROFILER_BUFFER_TRACING_KIND_STRING(HIP_RUNTIME_API_EXT)
ROCPROFILER_BUFFER_TRACING_KIND_STRING(HIP_COMPILER_API_EXT)
ROCPROFILER_BUFFER_TRACING_KIND_STRING(ROCDECODE_API_EXT)
ROCPROFILER_BUFFER_TRACING_KIND_STRING(KFD_EVENT_PAGE_MIGRATE)
ROCPROFILER_BUFFER_TRACING_KIND_STRING(KFD_EVENT_PAGE_FAULT)
ROCPROFILER_BUFFER_TRACING_KIND_STRING(KFD_EVENT_QUEUE)
ROCPROFILER_BUFFER_TRACING_KIND_STRING(KFD_EVENT_UNMAP_FROM_GPU)
ROCPROFILER_BUFFER_TRACING_KIND_STRING(KFD_EVENT_DROPPED_EVENTS)
ROCPROFILER_BUFFER_TRACING_KIND_STRING(KFD_PAGE_MIGRATE)
ROCPROFILER_BUFFER_TRACING_KIND_STRING(KFD_PAGE_FAULT)
ROCPROFILER_BUFFER_TRACING_KIND_STRING(KFD_QUEUE)
template <size_t Idx, size_t... Tail>
std::pair<const char*, size_t>
@@ -172,8 +180,21 @@ rocprofiler_configure_buffer_tracing_service(rocprofiler_context_id_t
ctx->buffered_tracer->domains, kind, operations[i]));
}
if(kind == ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION)
RETURN_STATUS_ON_FAIL(rocprofiler::page_migration::init());
{
static constexpr auto kfd_events =
std::array{ROCPROFILER_BUFFER_TRACING_KFD_EVENT_PAGE_MIGRATE,
ROCPROFILER_BUFFER_TRACING_KFD_EVENT_PAGE_FAULT,
ROCPROFILER_BUFFER_TRACING_KFD_EVENT_QUEUE,
ROCPROFILER_BUFFER_TRACING_KFD_EVENT_UNMAP_FROM_GPU,
ROCPROFILER_BUFFER_TRACING_KFD_PAGE_MIGRATE,
ROCPROFILER_BUFFER_TRACING_KFD_PAGE_FAULT,
ROCPROFILER_BUFFER_TRACING_KFD_QUEUE};
if(std::find(kfd_events.begin(), kfd_events.end(), kind) != kfd_events.end())
{
RETURN_STATUS_ON_FAIL(rocprofiler::kfd::init());
}
}
return ROCPROFILER_STATUS_SUCCESS;
}
@@ -282,11 +303,6 @@ rocprofiler_query_buffer_tracing_kind_operation_name(rocprofiler_buffer_tracing_
val = rocprofiler::kernel_dispatch::name_by_id(operation);
break;
}
case ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION:
{
val = rocprofiler::page_migration::name_by_id(operation);
break;
}
case ROCPROFILER_BUFFER_TRACING_OMPT:
{
val = rocprofiler::ompt::name_by_id(operation);
@@ -318,6 +334,18 @@ rocprofiler_query_buffer_tracing_kind_operation_name(rocprofiler_buffer_tracing_
val = rocprofiler::hip::stream::name_by_id(operation);
break;
}
case ROCPROFILER_BUFFER_TRACING_KFD_EVENT_PAGE_MIGRATE:
case ROCPROFILER_BUFFER_TRACING_KFD_EVENT_PAGE_FAULT:
case ROCPROFILER_BUFFER_TRACING_KFD_EVENT_QUEUE:
case ROCPROFILER_BUFFER_TRACING_KFD_EVENT_UNMAP_FROM_GPU:
case ROCPROFILER_BUFFER_TRACING_KFD_EVENT_DROPPED_EVENTS:
case ROCPROFILER_BUFFER_TRACING_KFD_PAGE_MIGRATE:
case ROCPROFILER_BUFFER_TRACING_KFD_PAGE_FAULT:
case ROCPROFILER_BUFFER_TRACING_KFD_QUEUE:
{
val = rocprofiler::kfd::name_by_id(kind, operation);
break;
}
};
if(!val)
@@ -432,11 +460,6 @@ rocprofiler_iterate_buffer_tracing_kind_operations(
ops = rocprofiler::kernel_dispatch::get_ids();
break;
}
case ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION:
{
ops = rocprofiler::page_migration::get_ids();
break;
}
case ROCPROFILER_BUFFER_TRACING_OMPT:
{
ops = rocprofiler::ompt::get_ids();
@@ -467,6 +490,18 @@ rocprofiler_iterate_buffer_tracing_kind_operations(
ops = rocprofiler::hip::stream::get_ids();
break;
}
case ROCPROFILER_BUFFER_TRACING_KFD_EVENT_PAGE_MIGRATE:
case ROCPROFILER_BUFFER_TRACING_KFD_EVENT_PAGE_FAULT:
case ROCPROFILER_BUFFER_TRACING_KFD_EVENT_QUEUE:
case ROCPROFILER_BUFFER_TRACING_KFD_EVENT_UNMAP_FROM_GPU:
case ROCPROFILER_BUFFER_TRACING_KFD_EVENT_DROPPED_EVENTS:
case ROCPROFILER_BUFFER_TRACING_KFD_PAGE_MIGRATE:
case ROCPROFILER_BUFFER_TRACING_KFD_PAGE_FAULT:
case ROCPROFILER_BUFFER_TRACING_KFD_QUEUE:
{
ops = rocprofiler::kfd::get_ids(kind);
break;
}
}
for(const auto& itr : ops)
@@ -59,7 +59,7 @@ template <typename DomainT>
rocprofiler_status_t
add_domain(domain_context<DomainT>& _cfg, DomainT _domain)
{
static_assert((1 << domain_info<DomainT>::last) < std::numeric_limits<uint64_t>::max(),
static_assert((1UL << domain_info<DomainT>::last) < std::numeric_limits<uint64_t>::max(),
"uint64_t cannot handle all the domains");
if(_domain <= domain_info<DomainT>::none) return ROCPROFILER_STATUS_ERROR_KIND_NOT_FOUND;
@@ -42,9 +42,11 @@
* - 1.14 - Update kfd_event_data
* - 1.15 - Enable managing mappings in compute VMs with GEM_VA ioctl
* - 1.16 - Add contiguous VRAM allocation flag
* - 1.17 - Add SDMA queue creation with target SDMA engine ID
* - 1.18 - Rename pad in set_memory_policy_args to misc_process_flag
*/
#define KFD_IOCTL_MAJOR_VERSION 1
#define KFD_IOCTL_MINOR_VERSION 16
#define KFD_IOCTL_MINOR_VERSION 18
struct kfd_ioctl_get_version_args
{
@@ -53,14 +55,17 @@ struct kfd_ioctl_get_version_args
};
/* For kfd_ioctl_create_queue_args.queue_type. */
#define KFD_IOC_QUEUE_TYPE_COMPUTE 0x0
#define KFD_IOC_QUEUE_TYPE_SDMA 0x1
#define KFD_IOC_QUEUE_TYPE_COMPUTE_AQL 0x2
#define KFD_IOC_QUEUE_TYPE_SDMA_XGMI 0x3
#define KFD_IOC_QUEUE_TYPE_COMPUTE 0x0
#define KFD_IOC_QUEUE_TYPE_SDMA 0x1
#define KFD_IOC_QUEUE_TYPE_COMPUTE_AQL 0x2
#define KFD_IOC_QUEUE_TYPE_SDMA_XGMI 0x3
#define KFD_IOC_QUEUE_TYPE_SDMA_BY_ENG_ID 0x4
#define KFD_MAX_QUEUE_PERCENTAGE 100
#define KFD_MAX_QUEUE_PRIORITY 15
#define KFD_MIN_QUEUE_RING_SIZE 1024
struct kfd_ioctl_create_queue_args
{
__u64 ring_base_address; /* to KFD */
@@ -80,6 +85,8 @@ struct kfd_ioctl_create_queue_args
__u64 ctx_save_restore_address; /* to KFD */
__u32 ctx_save_restore_size; /* to KFD */
__u32 ctl_stack_size; /* to KFD */
__u32 sdma_engine_id; /* to KFD */
__u32 pad;
};
struct kfd_ioctl_destroy_queue_args
@@ -152,15 +159,18 @@ struct kfd_dbg_device_info_entry
#define KFD_IOC_CACHE_POLICY_COHERENT 0
#define KFD_IOC_CACHE_POLICY_NONCOHERENT 1
/* Misc. per process flags */
#define KFD_PROC_FLAG_MFMA_HIGH_PRECISION (1 << 0)
struct kfd_ioctl_set_memory_policy_args
{
__u64 alternate_aperture_base; /* to KFD */
__u64 alternate_aperture_size; /* to KFD */
__u32 gpu_id; /* to KFD */
__u32 default_policy; /* to KFD */
__u32 alternate_policy; /* to KFD */
__u32 pad;
__u32 gpu_id; /* to KFD */
__u32 default_policy; /* to KFD */
__u32 alternate_policy; /* to KFD */
__u32 misc_process_flag; /* to KFD */
};
/*
@@ -578,7 +588,9 @@ enum kfd_smi_event
KFD_SMI_EVENT_QUEUE_EVICTION = 9,
KFD_SMI_EVENT_QUEUE_RESTORE = 10,
KFD_SMI_EVENT_UNMAP_FROM_GPU = 11,
KFD_SMI_EVENT_DROPPED_EVENT = 12,
KFD_SMI_EVENT_PROCESS_START = 12,
KFD_SMI_EVENT_PROCESS_END = 13,
KFD_SMI_EVENT_DROPPED_EVENT = 14,
/*
* max event number, as a flag bit to get events from all processes,
@@ -589,14 +601,16 @@ enum kfd_smi_event
KFD_SMI_EVENT_ALL_PROCESS = 64
};
/* The reason of the page migration event */
enum KFD_MIGRATE_TRIGGERS
{
KFD_MIGRATE_TRIGGER_PREFETCH, /* Prefetch to GPU */
KFD_MIGRATE_TRIGGER_PREFETCH, /* Prefetch to GPU VRAM or system memory */
KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU, /* GPU page fault recover */
KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU, /* CPU page fault recover */
KFD_MIGRATE_TRIGGER_TTM_EVICTION /* TTM eviction */
};
/* The reason of user queue evition event */
enum KFD_QUEUE_EVICTION_TRIGGERS
{
KFD_QUEUE_EVICTION_TRIGGER_SVM, /* SVM buffer migration */
@@ -607,6 +621,7 @@ enum KFD_QUEUE_EVICTION_TRIGGERS
KFD_QUEUE_EVICTION_CRIU_RESTORE /* CRIU restore */
};
/* The reason of unmap buffer from GPU event */
enum KFD_SVM_UNMAP_TRIGGERS
{
KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY, /* MMU notifier CPU buffer movement */
@@ -647,9 +662,8 @@ enum kfd_ioctl_spm_op
* @buf_size[in]: size of the destination buffer
* @timeout[in/out]: [in]: timeout in milliseconds, [out]: amount of time left
* `in the timeout window
* @bytes_copied[out]: amount of data that was copied to the previous dest_buf
* @has_data_loss: boolean indicating whether data was lost
* (e.g. due to a ring-buffer overflow)
* @bytes_copied[out]: total amount of data that was copied to the previous dest_buf
* @has_data_loss: total count for sub-block which has data loss
*
* This ioctl performs different functions depending on the @op parameter.
*
@@ -700,6 +714,22 @@ struct kfd_ioctl_spm_args
__u32 has_data_loss;
};
/**
* kfd_ioctl_spm_buffer_header - SPM Buffer header for kfd_ioctl_spm_args->dest_buf
*
* @version [out]: spm versiom
* @bytes_copied [out]: amount of data for each sub-block
* @has_data_loss: [out]: boolean indicating whether data was lost for each sub-block
* (e.g. due to a ring-buffer overflow)
*/
struct kfd_ioctl_spm_buffer_header
{
__u32 version; /* 0-23: minor 24-31: major */
__u32 bytes_copied;
__u32 has_data_loss;
__u32 reserved[5];
};
/*
* SVM event tracing via SMI system management interface
*
@@ -760,6 +790,7 @@ struct kfd_ioctl_spm_args
"%lld -%d %x %c\n", (ns), (pid), (node), (rescheduled)
#define KFD_EVENT_FMT_UNMAP_FROM_GPU(ns, pid, addr, size, node, unmap_trigger) \
"%lld -%d @%lx(%lx) %x %d\n", (ns), (pid), (addr), (size), (node), (unmap_trigger)
#define KFD_EVENT_FMT_PROCESS(pid, task_name) "%x %s\n", (pid), (task_name)
#define KFD_EVENT_FMT_DROPPED_EVENT(ns, pid, drop_count) "%lld -%d %d\n", (ns), (pid), (drop_count)
/**************************************************************************************************
@@ -0,0 +1,12 @@
#
#
set(ROCPROFILER_LIB_KFD_EVENT_SOURCES abi.cpp kfd.cpp)
set(ROCPROFILER_LIB_KFD_EVENT_HEADERS defines.hpp kfd.hpp utils.hpp)
target_sources(
rocprofiler-sdk-object-library PRIVATE ${ROCPROFILER_LIB_KFD_EVENT_SOURCES}
${ROCPROFILER_LIB_KFD_EVENT_HEADERS})
if(ROCPROFILER_BUILD_TESTS)
add_subdirectory(tests)
endif()
@@ -0,0 +1,88 @@
// MIT License
//
// Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include "lib/common/container/small_vector.hpp"
#include "lib/common/defines.hpp"
#include "lib/common/mpl.hpp"
#include "lib/rocprofiler-sdk/details/kfd_ioctl.h"
#include "lib/rocprofiler-sdk/kfd/defines.hpp"
#include "lib/rocprofiler-sdk/kfd/utils.hpp"
#include <fmt/core.h>
#define ASSERT_SAME(A, B) static_assert(static_cast<size_t>(A) == static_cast<size_t>(B))
namespace rocprofiler
{
namespace kfd
{
static_assert(KFD_SMI_EVENT_NONE == 0);
static_assert(KFD_SMI_EVENT_MIGRATE_START == 5);
static_assert(KFD_SMI_EVENT_MIGRATE_END == 6);
static_assert(KFD_SMI_EVENT_PAGE_FAULT_START == 7);
static_assert(KFD_SMI_EVENT_PAGE_FAULT_END == 8);
static_assert(KFD_SMI_EVENT_QUEUE_EVICTION == 9);
static_assert(KFD_SMI_EVENT_QUEUE_RESTORE == 10);
static_assert(KFD_SMI_EVENT_UNMAP_FROM_GPU == 11);
static_assert(KFD_SMI_EVENT_DROPPED_EVENT == 14);
static_assert(KFD_SMI_EVENT_ALL_PROCESS == 64);
// If any of these changes, we can no longer static_cast the new triggers
// from the KFD trigger to the record operation type
static_assert(ROCPROFILER_KFD_EVENT_PAGE_MIGRATE_LAST == 5);
static_assert(ROCPROFILER_KFD_PAGE_MIGRATE_LAST == 4);
static_assert(ROCPROFILER_KFD_EVENT_PAGE_FAULT_LAST == 5);
static_assert(ROCPROFILER_KFD_PAGE_FAULT_LAST == 4);
static_assert(ROCPROFILER_KFD_EVENT_QUEUE_LAST == 8);
static_assert(ROCPROFILER_KFD_QUEUE_LAST == 6);
static_assert(ROCPROFILER_KFD_EVENT_UNMAP_FROM_GPU_LAST == 3);
static_assert(ROCPROFILER_KFD_EVENT_QUEUE_RESTORE_RESCHEDULED == 6);
static_assert(ROCPROFILER_KFD_EVENT_QUEUE_RESTORE == 7);
// clang-format off
ASSERT_SAME(ROCPROFILER_KFD_EVENT_PAGE_MIGRATE_PREFETCH, KFD_MIGRATE_TRIGGER_PREFETCH );
ASSERT_SAME(ROCPROFILER_KFD_EVENT_PAGE_MIGRATE_PAGEFAULT_GPU, KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU );
ASSERT_SAME(ROCPROFILER_KFD_EVENT_PAGE_MIGRATE_PAGEFAULT_CPU, KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU );
ASSERT_SAME(ROCPROFILER_KFD_EVENT_PAGE_MIGRATE_TTM_EVICTION, KFD_MIGRATE_TRIGGER_TTM_EVICTION );
ASSERT_SAME(ROCPROFILER_KFD_PAGE_MIGRATE_PREFETCH, KFD_MIGRATE_TRIGGER_PREFETCH );
ASSERT_SAME(ROCPROFILER_KFD_PAGE_MIGRATE_PAGEFAULT_GPU, KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU );
ASSERT_SAME(ROCPROFILER_KFD_PAGE_MIGRATE_PAGEFAULT_CPU, KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU );
ASSERT_SAME(ROCPROFILER_KFD_PAGE_MIGRATE_TTM_EVICTION, KFD_MIGRATE_TRIGGER_TTM_EVICTION );
ASSERT_SAME(ROCPROFILER_KFD_EVENT_QUEUE_EVICT_SVM, KFD_QUEUE_EVICTION_TRIGGER_SVM );
ASSERT_SAME(ROCPROFILER_KFD_EVENT_QUEUE_EVICT_USERPTR, KFD_QUEUE_EVICTION_TRIGGER_USERPTR );
ASSERT_SAME(ROCPROFILER_KFD_EVENT_QUEUE_EVICT_TTM, KFD_QUEUE_EVICTION_TRIGGER_TTM );
ASSERT_SAME(ROCPROFILER_KFD_EVENT_QUEUE_EVICT_SUSPEND, KFD_QUEUE_EVICTION_TRIGGER_SUSPEND );
ASSERT_SAME(ROCPROFILER_KFD_EVENT_QUEUE_EVICT_CRIU_CHECKPOINT, KFD_QUEUE_EVICTION_CRIU_CHECKPOINT );
ASSERT_SAME(ROCPROFILER_KFD_EVENT_QUEUE_EVICT_CRIU_RESTORE, KFD_QUEUE_EVICTION_CRIU_RESTORE );
ASSERT_SAME(ROCPROFILER_KFD_QUEUE_EVICT_SVM, KFD_QUEUE_EVICTION_TRIGGER_SVM );
ASSERT_SAME(ROCPROFILER_KFD_QUEUE_EVICT_USERPTR, KFD_QUEUE_EVICTION_TRIGGER_USERPTR );
ASSERT_SAME(ROCPROFILER_KFD_QUEUE_EVICT_TTM, KFD_QUEUE_EVICTION_TRIGGER_TTM );
ASSERT_SAME(ROCPROFILER_KFD_QUEUE_EVICT_SUSPEND, KFD_QUEUE_EVICTION_TRIGGER_SUSPEND );
ASSERT_SAME(ROCPROFILER_KFD_QUEUE_EVICT_CRIU_CHECKPOINT, KFD_QUEUE_EVICTION_CRIU_CHECKPOINT );
ASSERT_SAME(ROCPROFILER_KFD_QUEUE_EVICT_CRIU_RESTORE, KFD_QUEUE_EVICTION_CRIU_RESTORE );
ASSERT_SAME(ROCPROFILER_KFD_EVENT_UNMAP_FROM_GPU_MMU_NOTIFY, KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY );
ASSERT_SAME(ROCPROFILER_KFD_EVENT_UNMAP_FROM_GPU_MMU_NOTIFY_MIGRATE, KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY_MIGRATE);
ASSERT_SAME(ROCPROFILER_KFD_EVENT_UNMAP_FROM_GPU_UNMAP_FROM_CPU, KFD_SVM_UNMAP_TRIGGER_UNMAP_FROM_CPU );
// clang-format on
} // namespace kfd
} // namespace rocprofiler
@@ -1,6 +1,6 @@
// MIT License
//
// Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
@@ -44,20 +44,47 @@
} \
} while(0)
#define SPECIALIZE_PAGE_MIGRATION_INFO(ROCPROF_NAME, KFD_NAME, FORMAT_STRING) \
#define SPECIALIZE_KFD_EVENT_INFO(EVENT_NAME, KFD_NAME, BUFFER_KIND, FORMAT_STRING) \
template <> \
struct page_migration_info<ROCPROFILER_PAGE_MIGRATION_##ROCPROF_NAME> \
struct kfd_event_info<KFD_EVENT_##EVENT_NAME> \
{ \
static constexpr auto name = "PAGE_MIGRATION_" #ROCPROF_NAME; \
static constexpr size_t operation = ROCPROFILER_PAGE_MIGRATION_##ROCPROF_NAME; \
static constexpr size_t kfd_operation = KFD_SMI_EVENT_##KFD_NAME; \
static constexpr size_t kfd_bitmask = bitmask(KFD_SMI_EVENT_##KFD_NAME); \
static constexpr std::string_view format_str = FORMAT_STRING; \
static constexpr size_t kind = KFD_EVENT_##EVENT_NAME; \
static constexpr size_t buffer_kind = ROCPROFILER_BUFFER_TRACING_KFD_##BUFFER_KIND; \
static constexpr size_t kfd_id = KFD_SMI_EVENT_##KFD_NAME; \
static constexpr size_t kfd_bitmask = bitmask(KFD_SMI_EVENT_##KFD_NAME); \
static constexpr std::string_view format_str = FORMAT_STRING; \
};
#define SPECIALIZE_KFD_KIND_INFO(KIND_NAME, LAST_OP) \
template <> \
struct kfd_kind_info<ROCPROFILER_BUFFER_TRACING_KFD_##KIND_NAME> \
{ \
static constexpr auto kind = ROCPROFILER_BUFFER_TRACING_KFD_##KIND_NAME; \
static constexpr auto last = ::LAST_OP; \
}
#define SPECIALIZE_KFD_KIND_NAME(KIND_NAME_SUFFIX, OPERATION) \
template <> \
struct kfd_operation_info<ROCPROFILER_BUFFER_TRACING_KFD_##KIND_NAME_SUFFIX, OPERATION> \
{ \
static constexpr auto kind = ROCPROFILER_BUFFER_TRACING_KFD_##KIND_NAME_SUFFIX; \
static constexpr auto operation = ::OPERATION; \
static constexpr auto name = #OPERATION; \
}
#define SPECIALIZE_KFD_IOC_IOCTL(STRUCT, ARG_IOC) \
template <> \
struct IOC_event<STRUCT> \
{ \
static constexpr auto value = ARG_IOC; \
}
#define ASSERT_SAME_AND_COPY(member) \
if(end.member == start.member) \
{ \
ret.member = end.member; \
} \
else \
ROCP_ERROR << fmt::format("Expected member " #member " to be same in events. {} vs {}", \
end.member, \
start.member);
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,87 @@
// MIT License
//
// Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#if defined(ROCPROFILER_LIB_ROCPROFILER_SDK_KFD_CPP_IMPL) && \
ROCPROFILER_LIB_ROCPROFILER_SDK_KFD_CPP_IMPL == 1
// clang-format off
// Mapping for kfd_event_id (utils.h) | ID from kfd_ioctl.h | buffer_tracing_kind_t | format string
SPECIALIZE_KFD_EVENT_INFO(PAGE_MIGRATE_START, MIGRATE_START, EVENT_PAGE_MIGRATE, "%x %ld -%d @%lx(%lx) %x->%x %x:%x %d\n");
SPECIALIZE_KFD_EVENT_INFO(PAGE_MIGRATE_END, MIGRATE_END, EVENT_PAGE_MIGRATE, "%x %ld -%d @%lx(%lx) %x->%x %d %d\n" );
SPECIALIZE_KFD_EVENT_INFO(PAGE_FAULT_START, PAGE_FAULT_START, EVENT_PAGE_FAULT, "%x %ld -%d @%lx(%x) %c\n" );
SPECIALIZE_KFD_EVENT_INFO(PAGE_FAULT_END, PAGE_FAULT_END, EVENT_PAGE_FAULT, "%x %ld -%d @%lx(%x) %c\n" );
SPECIALIZE_KFD_EVENT_INFO(QUEUE_EVICTION, QUEUE_EVICTION, EVENT_QUEUE, "%x %ld -%d %x %d\n" );
SPECIALIZE_KFD_EVENT_INFO(QUEUE_RESTORE, QUEUE_RESTORE, EVENT_QUEUE, "%x %ld -%d %x %c\n" );
SPECIALIZE_KFD_EVENT_INFO(UNMAP_FROM_GPU, UNMAP_FROM_GPU, EVENT_UNMAP_FROM_GPU, "%x %ld -%d @%lx(%lx) %x %d\n" );
SPECIALIZE_KFD_EVENT_INFO(DROPPED_EVENT, DROPPED_EVENT, EVENT_DROPPED_EVENTS, "%x %ld -%d %d\n" );
SPECIALIZE_KFD_KIND_INFO(EVENT_PAGE_MIGRATE, ROCPROFILER_KFD_EVENT_PAGE_MIGRATE_LAST);
SPECIALIZE_KFD_KIND_INFO(EVENT_PAGE_FAULT, ROCPROFILER_KFD_EVENT_PAGE_FAULT_LAST);
SPECIALIZE_KFD_KIND_INFO(EVENT_QUEUE, ROCPROFILER_KFD_EVENT_QUEUE_LAST);
SPECIALIZE_KFD_KIND_INFO(EVENT_UNMAP_FROM_GPU, ROCPROFILER_KFD_EVENT_UNMAP_FROM_GPU_LAST);
SPECIALIZE_KFD_KIND_INFO(EVENT_DROPPED_EVENTS, ROCPROFILER_KFD_EVENT_DROPPED_EVENTS_LAST);
SPECIALIZE_KFD_KIND_INFO(PAGE_MIGRATE, ROCPROFILER_KFD_PAGE_MIGRATE_LAST);
SPECIALIZE_KFD_KIND_INFO(PAGE_FAULT, ROCPROFILER_KFD_PAGE_FAULT_LAST);
SPECIALIZE_KFD_KIND_INFO(QUEUE, ROCPROFILER_KFD_QUEUE_LAST);
// Events
SPECIALIZE_KFD_KIND_NAME(EVENT_PAGE_MIGRATE, ROCPROFILER_KFD_EVENT_PAGE_MIGRATE_PREFETCH);
SPECIALIZE_KFD_KIND_NAME(EVENT_PAGE_MIGRATE, ROCPROFILER_KFD_EVENT_PAGE_MIGRATE_PAGEFAULT_GPU);
SPECIALIZE_KFD_KIND_NAME(EVENT_PAGE_MIGRATE, ROCPROFILER_KFD_EVENT_PAGE_MIGRATE_PAGEFAULT_CPU);
SPECIALIZE_KFD_KIND_NAME(EVENT_PAGE_MIGRATE, ROCPROFILER_KFD_EVENT_PAGE_MIGRATE_TTM_EVICTION);
SPECIALIZE_KFD_KIND_NAME(EVENT_PAGE_MIGRATE, ROCPROFILER_KFD_EVENT_PAGE_MIGRATE_END);
SPECIALIZE_KFD_KIND_NAME(EVENT_PAGE_FAULT, ROCPROFILER_KFD_EVENT_PAGE_FAULT_START);
SPECIALIZE_KFD_KIND_NAME(EVENT_PAGE_FAULT, ROCPROFILER_KFD_EVENT_PAGE_FAULT_START_READ_FAULT);
SPECIALIZE_KFD_KIND_NAME(EVENT_PAGE_FAULT, ROCPROFILER_KFD_EVENT_PAGE_FAULT_START_WRITE_FAULT);
SPECIALIZE_KFD_KIND_NAME(EVENT_PAGE_FAULT, ROCPROFILER_KFD_EVENT_PAGE_FAULT_END_PAGE_MIGRATED);
SPECIALIZE_KFD_KIND_NAME(EVENT_PAGE_FAULT, ROCPROFILER_KFD_EVENT_PAGE_FAULT_END_PAGE_UPDATED);
SPECIALIZE_KFD_KIND_NAME(EVENT_QUEUE, ROCPROFILER_KFD_EVENT_QUEUE_EVICT_SVM);
SPECIALIZE_KFD_KIND_NAME(EVENT_QUEUE, ROCPROFILER_KFD_EVENT_QUEUE_EVICT_USERPTR);
SPECIALIZE_KFD_KIND_NAME(EVENT_QUEUE, ROCPROFILER_KFD_EVENT_QUEUE_EVICT_TTM);
SPECIALIZE_KFD_KIND_NAME(EVENT_QUEUE, ROCPROFILER_KFD_EVENT_QUEUE_EVICT_SUSPEND);
SPECIALIZE_KFD_KIND_NAME(EVENT_QUEUE, ROCPROFILER_KFD_EVENT_QUEUE_EVICT_CRIU_CHECKPOINT);
SPECIALIZE_KFD_KIND_NAME(EVENT_QUEUE, ROCPROFILER_KFD_EVENT_QUEUE_EVICT_CRIU_RESTORE);
SPECIALIZE_KFD_KIND_NAME(EVENT_QUEUE, ROCPROFILER_KFD_EVENT_QUEUE_RESTORE_RESCHEDULED);
SPECIALIZE_KFD_KIND_NAME(EVENT_QUEUE, ROCPROFILER_KFD_EVENT_QUEUE_RESTORE);
SPECIALIZE_KFD_KIND_NAME(EVENT_UNMAP_FROM_GPU, ROCPROFILER_KFD_EVENT_UNMAP_FROM_GPU_MMU_NOTIFY);
SPECIALIZE_KFD_KIND_NAME(EVENT_UNMAP_FROM_GPU, ROCPROFILER_KFD_EVENT_UNMAP_FROM_GPU_MMU_NOTIFY_MIGRATE);
SPECIALIZE_KFD_KIND_NAME(EVENT_UNMAP_FROM_GPU, ROCPROFILER_KFD_EVENT_UNMAP_FROM_GPU_UNMAP_FROM_CPU);
SPECIALIZE_KFD_KIND_NAME(EVENT_DROPPED_EVENTS, ROCPROFILER_KFD_EVENT_DROPPED_EVENTS);
// Paired records
SPECIALIZE_KFD_KIND_NAME(PAGE_MIGRATE, ROCPROFILER_KFD_PAGE_MIGRATE_PREFETCH);
SPECIALIZE_KFD_KIND_NAME(PAGE_MIGRATE, ROCPROFILER_KFD_PAGE_MIGRATE_PAGEFAULT_GPU);
SPECIALIZE_KFD_KIND_NAME(PAGE_MIGRATE, ROCPROFILER_KFD_PAGE_MIGRATE_PAGEFAULT_CPU);
SPECIALIZE_KFD_KIND_NAME(PAGE_MIGRATE, ROCPROFILER_KFD_PAGE_MIGRATE_TTM_EVICTION);
SPECIALIZE_KFD_KIND_NAME(PAGE_FAULT, ROCPROFILER_KFD_PAGE_FAULT_READ_FAULT_MIGRATED);
SPECIALIZE_KFD_KIND_NAME(PAGE_FAULT, ROCPROFILER_KFD_PAGE_FAULT_READ_FAULT_UPDATED);
SPECIALIZE_KFD_KIND_NAME(PAGE_FAULT, ROCPROFILER_KFD_PAGE_FAULT_WRITE_FAULT_MIGRATED);
SPECIALIZE_KFD_KIND_NAME(PAGE_FAULT, ROCPROFILER_KFD_PAGE_FAULT_WRITE_FAULT_UPDATED);
SPECIALIZE_KFD_KIND_NAME(QUEUE, ROCPROFILER_KFD_QUEUE_EVICT_SVM);
SPECIALIZE_KFD_KIND_NAME(QUEUE, ROCPROFILER_KFD_QUEUE_EVICT_USERPTR);
SPECIALIZE_KFD_KIND_NAME(QUEUE, ROCPROFILER_KFD_QUEUE_EVICT_TTM);
SPECIALIZE_KFD_KIND_NAME(QUEUE, ROCPROFILER_KFD_QUEUE_EVICT_SUSPEND);
SPECIALIZE_KFD_KIND_NAME(QUEUE, ROCPROFILER_KFD_QUEUE_EVICT_CRIU_CHECKPOINT);
SPECIALIZE_KFD_KIND_NAME(QUEUE, ROCPROFILER_KFD_QUEUE_EVICT_CRIU_RESTORE);
// clang-format on
#endif
@@ -1,6 +1,6 @@
// MIT License
//
// Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
@@ -24,22 +24,22 @@
#include "lib/common/container/small_vector.hpp"
#include <rocprofiler-sdk/fwd.h>
#include <rocprofiler-sdk/rocprofiler.h>
namespace rocprofiler
{
namespace page_migration
namespace kfd
{
const char*
name_by_id(uint32_t id);
name_by_id(uint32_t kind, uint32_t id);
std::vector<uint32_t>
get_ids();
get_ids(uint32_t kind);
rocprofiler_status_t
init();
void
finalize();
} // namespace page_migration
} // namespace kfd
} // namespace rocprofiler
@@ -0,0 +1,48 @@
# MIT License
#
# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
rocprofiler_deactivate_clang_tidy()
include(GoogleTest)
set(ROCPROFILER_LIB_KFD_EVENTS_TEST_SOURCES parser.cpp)
add_executable(kfd-events-parser)
target_sources(kfd-events-parser PRIVATE ${ROCPROFILER_LIB_KFD_EVENTS_TEST_SOURCES})
target_link_libraries(
kfd-events-parser
PRIVATE rocprofiler-sdk::rocprofiler-sdk-common-library
rocprofiler-sdk::rocprofiler-sdk-static-library GTest::gtest
GTest::gtest_main)
gtest_add_tests(
TARGET kfd-events-parser
SOURCES ${ROCPROFILER_LIB_COUNTER_TEST_SOURCES}
TEST_LIST kfd-events-parsers_TESTS
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
set_tests_properties(
${kfd-events-parsers_TESTS}
PROPERTIES TIMEOUT 45 LABELS "unittests;kfd-events" FAIL_REGULAR_EXPRESSION
"${ROCPROFILER_DEFAULT_FAIL_REGEX}")
@@ -0,0 +1,274 @@
// MIT License
//
// Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include <chrono>
#include <cstddef>
#include <limits>
#include <random>
#include <string>
#include <string_view>
#include "lib/rocprofiler-sdk/kfd/defines.hpp"
#include "lib/rocprofiler-sdk/kfd/utils.hpp"
#include "rocprofiler-sdk/kfd/kfd_id.h"
#include <fmt/core.h>
#include <gtest/gtest.h>
#include <unistd.h>
namespace rocprofiler
{
namespace kfd
{
#define ROCPROFILER_LIB_ROCPROFILER_SDK_KFD_CPP_IMPL 1
#include "lib/rocprofiler-sdk/kfd/kfd.def.cpp"
#undef ROCPROFILER_LIB_ROCPROFILER_SDK_KFD_CPP_IMPL
} // namespace kfd
} // namespace rocprofiler
namespace
{
using namespace rocprofiler::kfd;
// Internally, get_node_map is called for a given gpu ID
// We are testing the parser, so use an identity mapping
const agent_id_map_t agent_map = {
{0, {0}},
{0x1ea0, {0x1ea0}},
{0xfc7d, {0xfc7d}},
{0xad5c, {0xad5c}},
};
kfd_event_record
parse(std::string_view arg)
{
size_t kfd_id = std::numeric_limits<size_t>::max();
const auto scan_count = std::sscanf(arg.data(), "%lx ", &kfd_id);
EXPECT_EQ(scan_count, 1);
EXPECT_GE(kfd_id, KFD_SMI_EVENT_MIGRATE_START);
EXPECT_LE(kfd_id, KFD_SMI_EVENT_DROPPED_EVENT);
auto event_id = to_rocprofiler_kfd_event_id_func(static_cast<rocprofiler_kfd_event_id>(kfd_id),
std::make_index_sequence<KFD_EVENT_LAST>{});
EXPECT_GE(event_id, KFD_EVENT_PAGE_MIGRATE_START);
EXPECT_LE(event_id, KFD_EVENT_DROPPED_EVENT);
return parse_event(event_id, agent_map, arg);
}
} // namespace
/* The following tests ensure that we can correctly parse and convert an event to the structured
equivalent
*/
TEST(rocprofiler_lib, parse_kfd_event_page_migrate_start)
{
using namespace rocprofiler::kfd;
const auto event = parse("5 14601738586508 -152990 @7f0f15200(2c02) 0->1ea0 1ea0:0 1");
const auto migrate_start = event.data.page_migrate_event;
EXPECT_EQ(event.kind, ROCPROFILER_BUFFER_TRACING_KFD_EVENT_PAGE_MIGRATE);
EXPECT_EQ(event.operation, ROCPROFILER_KFD_EVENT_PAGE_MIGRATE_PAGEFAULT_GPU);
EXPECT_EQ(migrate_start.kind, ROCPROFILER_BUFFER_TRACING_KFD_EVENT_PAGE_MIGRATE);
EXPECT_EQ(migrate_start.operation, ROCPROFILER_KFD_EVENT_PAGE_MIGRATE_PAGEFAULT_GPU);
EXPECT_EQ(migrate_start.timestamp, 14601738586508);
EXPECT_EQ(migrate_start.pid, 152990);
EXPECT_EQ(migrate_start.start_address.handle, 0x7f0f15200 << 12);
EXPECT_EQ(migrate_start.end_address.handle, (0x7f0f15200 + 0x2c02) << 12);
EXPECT_EQ(migrate_start.src_agent.handle, 0);
EXPECT_EQ(migrate_start.dst_agent.handle, 0x1ea0);
EXPECT_EQ(migrate_start.prefetch_agent.handle, 0x1ea0);
EXPECT_EQ(migrate_start.preferred_agent.handle, 0);
EXPECT_EQ(migrate_start.error_code, 0);
}
TEST(rocprofiler_lib, parse_kfd_event_page_migrate_end)
{
using namespace rocprofiler::kfd;
const auto event = parse("6 15202910515905 -152990 @7f0f15200(2c02) 0->1ea0 1 75");
const auto migrate_end = event.data.page_migrate_event;
EXPECT_EQ(event.kind, ROCPROFILER_BUFFER_TRACING_KFD_EVENT_PAGE_MIGRATE);
EXPECT_EQ(event.operation, ROCPROFILER_KFD_EVENT_PAGE_MIGRATE_END);
EXPECT_EQ(migrate_end.kind, ROCPROFILER_BUFFER_TRACING_KFD_EVENT_PAGE_MIGRATE);
EXPECT_EQ(migrate_end.operation, ROCPROFILER_KFD_EVENT_PAGE_MIGRATE_END);
EXPECT_EQ(migrate_end.timestamp, 15202910515905);
EXPECT_EQ(migrate_end.pid, 152990);
EXPECT_EQ(migrate_end.start_address.handle, 0x7f0f15200 << 12);
EXPECT_EQ(migrate_end.end_address.handle, (0x7f0f15200 + 0x2c02) << 12);
EXPECT_EQ(migrate_end.src_agent.handle, 0);
EXPECT_EQ(migrate_end.dst_agent.handle, 0x1ea0);
EXPECT_EQ(migrate_end.prefetch_agent.handle, 0); // Not generated for end event
EXPECT_EQ(migrate_end.preferred_agent.handle, 0); // Not generated for end event
EXPECT_EQ(migrate_end.error_code, 75);
}
TEST(rocprofiler_lib, parse_kfd_event_page_fault_start)
{
using namespace rocprofiler::kfd;
const auto event = parse("7 40507688414784 -156893 @7fa608127(ad5c) R");
const auto page_fault = event.data.page_fault_event;
EXPECT_EQ(event.kind, ROCPROFILER_BUFFER_TRACING_KFD_EVENT_PAGE_FAULT);
EXPECT_EQ(event.operation, ROCPROFILER_KFD_EVENT_PAGE_FAULT_START_READ_FAULT);
EXPECT_EQ(page_fault.kind, ROCPROFILER_BUFFER_TRACING_KFD_EVENT_PAGE_FAULT);
EXPECT_EQ(page_fault.operation, ROCPROFILER_KFD_EVENT_PAGE_FAULT_START_READ_FAULT);
EXPECT_EQ(page_fault.timestamp, 40507688414784);
EXPECT_EQ(page_fault.pid, 156893);
EXPECT_EQ(page_fault.agent_id.handle, 0xad5c);
EXPECT_EQ(page_fault.address.handle, 0x7fa608127 << 12);
}
TEST(rocprofiler_lib, parse_kfd_event_page_fault_end)
{
using namespace rocprofiler::kfd;
{
const auto event = parse("8 40507688514376 -156893 @7fa608127(ad5c) U");
const auto page_fault = event.data.page_fault_event;
EXPECT_EQ(event.kind, ROCPROFILER_BUFFER_TRACING_KFD_EVENT_PAGE_FAULT);
EXPECT_EQ(event.operation, ROCPROFILER_KFD_EVENT_PAGE_FAULT_END_PAGE_UPDATED);
EXPECT_EQ(page_fault.kind, ROCPROFILER_BUFFER_TRACING_KFD_EVENT_PAGE_FAULT);
EXPECT_EQ(page_fault.operation, ROCPROFILER_KFD_EVENT_PAGE_FAULT_END_PAGE_UPDATED);
EXPECT_EQ(page_fault.timestamp, 40507688514376);
EXPECT_EQ(page_fault.pid, 156893);
EXPECT_EQ(page_fault.agent_id.handle, 0xad5c);
EXPECT_EQ(page_fault.address.handle, 0x7fa608127 << 12);
}
{
const auto event = parse("8 40507688516386 -156898 @7fa698127(ad5c) M");
const auto page_fault = event.data.page_fault_event;
EXPECT_EQ(event.kind, ROCPROFILER_BUFFER_TRACING_KFD_EVENT_PAGE_FAULT);
EXPECT_EQ(event.operation, ROCPROFILER_KFD_EVENT_PAGE_FAULT_END_PAGE_MIGRATED);
EXPECT_EQ(page_fault.kind, ROCPROFILER_BUFFER_TRACING_KFD_EVENT_PAGE_FAULT);
EXPECT_EQ(page_fault.operation, ROCPROFILER_KFD_EVENT_PAGE_FAULT_END_PAGE_MIGRATED);
EXPECT_EQ(page_fault.timestamp, 40507688516386);
EXPECT_EQ(page_fault.pid, 156898);
EXPECT_EQ(page_fault.agent_id.handle, 0xad5c);
EXPECT_EQ(page_fault.address.handle, 0x7fa698127 << 12);
}
}
TEST(rocprofiler_lib, parse_kfd_event_queue_eviction)
{
using namespace rocprofiler::kfd;
{
const auto event = parse("9 38086928279363 -125752 1ea0 1");
const auto queue_event = event.data.queue_event;
EXPECT_EQ(event.kind, ROCPROFILER_BUFFER_TRACING_KFD_EVENT_QUEUE);
EXPECT_EQ(event.operation, ROCPROFILER_KFD_EVENT_QUEUE_EVICT_USERPTR);
EXPECT_EQ(queue_event.kind, ROCPROFILER_BUFFER_TRACING_KFD_EVENT_QUEUE);
EXPECT_EQ(queue_event.operation, ROCPROFILER_KFD_EVENT_QUEUE_EVICT_USERPTR);
EXPECT_EQ(queue_event.timestamp, 38086928279363);
EXPECT_EQ(queue_event.pid, 125752);
EXPECT_EQ(queue_event.agent_id.handle, 0x1ea0);
}
{
const auto event = parse("9 38086928279363 -125752 1ea0 3");
const auto queue_event = event.data.queue_event;
EXPECT_EQ(event.kind, ROCPROFILER_BUFFER_TRACING_KFD_EVENT_QUEUE);
EXPECT_EQ(event.operation, ROCPROFILER_KFD_EVENT_QUEUE_EVICT_SUSPEND);
EXPECT_EQ(queue_event.kind, ROCPROFILER_BUFFER_TRACING_KFD_EVENT_QUEUE);
EXPECT_EQ(queue_event.operation, ROCPROFILER_KFD_EVENT_QUEUE_EVICT_SUSPEND);
EXPECT_EQ(queue_event.timestamp, 38086928279363);
EXPECT_EQ(queue_event.pid, 125752);
EXPECT_EQ(queue_event.agent_id.handle, 0x1ea0);
}
}
TEST(rocprofiler_lib, parse_kfd_event_queue_restore)
{
using namespace rocprofiler::kfd;
{
const auto event = parse("a 38652512365099 -131057 fc7d");
const auto queue_event = event.data.queue_event;
EXPECT_EQ(event.kind, ROCPROFILER_BUFFER_TRACING_KFD_EVENT_QUEUE);
EXPECT_EQ(event.operation, ROCPROFILER_KFD_EVENT_QUEUE_RESTORE);
EXPECT_EQ(queue_event.kind, ROCPROFILER_BUFFER_TRACING_KFD_EVENT_QUEUE);
EXPECT_EQ(queue_event.operation, ROCPROFILER_KFD_EVENT_QUEUE_RESTORE);
EXPECT_EQ(queue_event.timestamp, 38652512365099);
EXPECT_EQ(queue_event.pid, 131057);
EXPECT_EQ(queue_event.agent_id.handle, 0xfc7d);
}
{
const auto event = parse("a 40082605896929 -148516 fc7d R");
const auto queue_event = event.data.queue_event;
EXPECT_EQ(event.kind, ROCPROFILER_BUFFER_TRACING_KFD_EVENT_QUEUE);
EXPECT_EQ(event.operation, ROCPROFILER_KFD_EVENT_QUEUE_RESTORE_RESCHEDULED);
EXPECT_EQ(queue_event.kind, ROCPROFILER_BUFFER_TRACING_KFD_EVENT_QUEUE);
EXPECT_EQ(queue_event.operation, ROCPROFILER_KFD_EVENT_QUEUE_RESTORE_RESCHEDULED);
EXPECT_EQ(queue_event.timestamp, 40082605896929);
EXPECT_EQ(queue_event.pid, 148516);
EXPECT_EQ(queue_event.agent_id.handle, 0xfc7d);
}
}
TEST(rocprofiler_lib, parse_kfd_event_unmap_from_gpu)
{
using namespace rocprofiler::kfd;
const auto event = parse("b 15203214457186 -152990 @7f93ac200(200) 1ea0 2");
const auto unmap = event.data.unmap_event;
EXPECT_EQ(event.kind, ROCPROFILER_BUFFER_TRACING_KFD_EVENT_UNMAP_FROM_GPU);
EXPECT_EQ(event.operation, ROCPROFILER_KFD_EVENT_QUEUE_EVICT_TTM);
EXPECT_EQ(unmap.kind, ROCPROFILER_BUFFER_TRACING_KFD_EVENT_UNMAP_FROM_GPU);
EXPECT_EQ(unmap.operation, ROCPROFILER_KFD_EVENT_QUEUE_EVICT_TTM);
EXPECT_EQ(unmap.timestamp, 15203214457186);
EXPECT_EQ(unmap.pid, 152990);
EXPECT_EQ(unmap.start_address.handle, 0x7f93ac200 << 12);
EXPECT_EQ(unmap.end_address.handle, (0x7f93ac200 + 0x200) << 12);
EXPECT_EQ(unmap.agent_id.handle, 0x1ea0);
}
TEST(rocprofiler_lib, parse_kfd_event_dropped_event)
{
using namespace rocprofiler::kfd;
const auto event = parse("e 18203445217186 -323990 5");
const auto dropped = event.data.dropped_event;
EXPECT_EQ(event.kind, ROCPROFILER_BUFFER_TRACING_KFD_EVENT_DROPPED_EVENTS);
EXPECT_EQ(event.operation, ROCPROFILER_KFD_EVENT_DROPPED_EVENTS);
EXPECT_EQ(dropped.kind, ROCPROFILER_BUFFER_TRACING_KFD_EVENT_DROPPED_EVENTS);
EXPECT_EQ(dropped.operation, ROCPROFILER_KFD_EVENT_DROPPED_EVENTS);
EXPECT_EQ(dropped.timestamp, 18203445217186);
EXPECT_EQ(dropped.pid, 323990);
EXPECT_EQ(dropped.count, 5);
}
@@ -1,6 +1,6 @@
// MIT License
//
// Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
@@ -30,94 +30,57 @@
#include <algorithm>
#include <cstdint>
#include <limits>
#include <string_view>
#include <unordered_map>
#include <utility>
namespace rocprofiler
{
namespace page_migration
namespace kfd
{
/* serves as an overview of what events we capture and report
struct event_page_fault_start_t
enum rocprofiler_kfd_event_id
{
int kind;
uint64_t timestamp;
int pid;
int node_id;
uint64_t address;
fault_t fault;
KFD_EVENT_NONE = -1,
KFD_EVENT_PAGE_MIGRATE_START,
KFD_EVENT_PAGE_MIGRATE_END,
KFD_EVENT_PAGE_FAULT_START,
KFD_EVENT_PAGE_FAULT_END,
KFD_EVENT_QUEUE_EVICTION,
KFD_EVENT_QUEUE_RESTORE,
KFD_EVENT_UNMAP_FROM_GPU,
KFD_EVENT_DROPPED_EVENT,
KFD_EVENT_LAST,
};
struct event_page_fault_end_t
struct kfd_event_record
{
int kind;
uint64_t timestamp;
uint32_t pid;
int node_id;
uint64_t address;
bool migrated;
rocprofiler_buffer_tracing_kind_t kind{ROCPROFILER_BUFFER_TRACING_NONE};
int operation{-1};
union
{
rocprofiler_buffer_tracing_kfd_event_page_migrate_record_t page_migrate_event;
rocprofiler_buffer_tracing_kfd_event_page_fault_record_t page_fault_event;
rocprofiler_buffer_tracing_kfd_event_queue_record_t queue_event;
rocprofiler_buffer_tracing_kfd_event_unmap_from_gpu_record_t unmap_event;
rocprofiler_buffer_tracing_kfd_event_dropped_events_record_t dropped_event;
rocprofiler_buffer_tracing_kfd_page_migrate_record_t page_migrate_record;
rocprofiler_buffer_tracing_kfd_page_fault_record_t page_fault_record;
rocprofiler_buffer_tracing_kfd_queue_record_t queue_record;
} data;
};
struct event_migrate_start_t
{
int kind;
uint64_t timestamp;
uint32_t pid;
uint64_t start;
uint64_t end_offset;
uint32_t from;
uint32_t to;
uint32_t prefetch_node; // last prefetch location, 0 for CPU, or GPU id
uint32_t preferred_node; // perferred location, 0 for CPU, or GPU id
uint32_t trigger;
};
using agent_id_map_t = std::unordered_map<uint64_t, rocprofiler_agent_id_t>;
struct event_migrate_end_t
{
int kind;
uint64_t timestamp;
uint32_t pid;
uint64_t start;
uint64_t end_offset;
uint32_t from;
uint32_t to;
uint32_t trigger;
};
template <uint32_t>
struct kfd_event_info;
struct event_queue_eviction_t
{
int kind;
uint64_t timestamp;
uint32_t pid;
int node_id;
uint32_t trigger;
};
template <uint32_t>
struct kfd_kind_info;
struct event_queue_restore_t
{
int kind;
uint64_t timestamp;
uint32_t pid;
int node_id;
bool rescheduled;
};
struct event_unmap_from_gpu_t
{
int kind;
uint64_t timestamp;
uint32_t pid;
uint64_t address;
uint64_t size;
int node_id;
uint32_t trigger;
};
*/
template <size_t>
struct page_migration_info;
template <uint32_t, uint32_t>
struct kfd_operation_info;
using namespace rocprofiler::common;
@@ -145,14 +108,14 @@ constexpr size_t bitmask(std::index_sequence<Args...>)
template <size_t... Ints>
constexpr size_t kfd_bitmask(std::index_sequence<Ints...>)
{
return (page_migration_info<Ints>::kfd_bitmask | ...);
return (kfd_event_info<Ints>::kfd_bitmask | ...);
}
template <size_t OpInx, size_t... OpInxs>
constexpr size_t
kfd_bitmask_impl(size_t rocprof_op, std::index_sequence<OpInx, OpInxs...>)
{
if(rocprof_op == OpInx) return page_migration_info<OpInx>::kfd_bitmask;
if(rocprof_op == OpInx) return kfd_event_info<OpInx>::kfd_bitmask;
if constexpr(sizeof...(OpInxs) > 0)
return kfd_bitmask_impl(rocprof_op, std::index_sequence<OpInxs...>{});
else
@@ -174,24 +137,25 @@ kfd_bitmask(const container::small_vector<size_t>& rocprof_event_ids,
template <size_t OpInx, size_t... OpInxs>
constexpr size_t
kfd_to_rocprof_op(size_t kfd_id, std::index_sequence<OpInx, OpInxs...>)
to_rocprofiler_kfd_event_id_func(size_t kfd_id, std::index_sequence<OpInx, OpInxs...>)
{
if(kfd_id == page_migration_info<OpInx>::kfd_operation) return OpInx;
if(kfd_id == kfd_event_info<OpInx>::kfd_id) return OpInx;
if constexpr(sizeof...(OpInxs) > 0)
return kfd_to_rocprof_op(kfd_id, std::index_sequence<OpInxs...>{});
return to_rocprofiler_kfd_event_id_func(kfd_id, std::index_sequence<OpInxs...>{});
else
return 0;
return std::numeric_limits<size_t>::max();
}
kfd_event_record
parse_event(size_t event_id, const agent_id_map_t& agents, std::string_view strn);
size_t
get_rocprof_op(const std::string_view event_data);
void
kfd_readlines(const std::string_view str, void(handler)(std::string_view));
using rocprof_buffer_op_t = rocprofiler_page_migration_operation_t;
using node_fd_t = int;
} // namespace page_migration
} // namespace kfd
} // namespace rocprofiler
@@ -1,7 +0,0 @@
#
#
set(ROCPROFILER_LIB_UVM_SOURCES abi.cpp page_migration.cpp)
set(ROCPROFILER_LIB_UVM_HEADERS defines.hpp page_migration.hpp utils.hpp)
target_sources(rocprofiler-sdk-object-library PRIVATE ${ROCPROFILER_LIB_UVM_SOURCES}
${ROCPROFILER_LIB_UVM_HEADERS})
@@ -1,107 +0,0 @@
// MIT License
//
// Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include <fmt/core.h>
#include "lib/common/container/small_vector.hpp"
#include "lib/common/defines.hpp"
#include "lib/common/mpl.hpp"
#include "lib/rocprofiler-sdk/details/kfd_ioctl.h"
#include "lib/rocprofiler-sdk/page_migration/utils.hpp"
#define ASSERT_SAME(A, B) static_assert(static_cast<size_t>(A) == static_cast<size_t>(B))
#define ROCPROFILER_LIB_ROCPROFILER_SDK_PAGE_MIGRATION_PAGE_MIGRATION_CPP_IMPL 1
#include "lib/rocprofiler-sdk/page_migration/page_migration.def.cpp"
#undef ROCPROFILER_LIB_ROCPROFILER_SDK_PAGE_MIGRATION_PAGE_MIGRATION_CPP_IMPL
namespace rocprofiler
{
namespace page_migration
{
using namespace rocprofiler::page_migration;
using namespace rocprofiler::common::container;
using rocprofiler_page_migration_seq_t = std::make_index_sequence<ROCPROFILER_PAGE_MIGRATION_LAST>;
static_assert(KFD_SMI_EVENT_NONE == 0);
static_assert(KFD_SMI_EVENT_MIGRATE_START == 5);
static_assert(KFD_SMI_EVENT_MIGRATE_END == 6);
static_assert(KFD_SMI_EVENT_PAGE_FAULT_START == 7);
static_assert(KFD_SMI_EVENT_PAGE_FAULT_END == 8);
static_assert(KFD_SMI_EVENT_QUEUE_EVICTION == 9);
static_assert(KFD_SMI_EVENT_QUEUE_RESTORE == 10);
static_assert(KFD_SMI_EVENT_UNMAP_FROM_GPU == 11);
static_assert(KFD_SMI_EVENT_DROPPED_EVENT == 12);
static_assert(KFD_SMI_EVENT_ALL_PROCESS == 64);
// Update page_migration.def.cpp with event mappings
// Update page_migration.cpp to parse and report new event
static_assert(ROCPROFILER_PAGE_MIGRATION_LAST == 9,
"New event added, update KFD to ROCPROFILER mappings");
// clang-format off
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_TRIGGER_PAGEFAULT_GPU, KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU );
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_TRIGGER_PAGEFAULT_CPU, KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU );
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_TRIGGER_TTM_EVICTION, KFD_MIGRATE_TRIGGER_TTM_EVICTION );
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_SVM, KFD_QUEUE_EVICTION_TRIGGER_SVM );
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_USERPTR, KFD_QUEUE_EVICTION_TRIGGER_USERPTR );
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_TTM, KFD_QUEUE_EVICTION_TRIGGER_TTM );
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_SUSPEND, KFD_QUEUE_EVICTION_TRIGGER_SUSPEND );
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_CRIU_CHECKPOINT, KFD_QUEUE_EVICTION_CRIU_CHECKPOINT );
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_CRIU_RESTORE, KFD_QUEUE_EVICTION_CRIU_RESTORE );
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_MMU_NOTIFY, KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY );
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_MMU_NOTIFY_MIGRATE, KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY_MIGRATE);
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_UNMAP_FROM_CPU, KFD_SVM_UNMAP_TRIGGER_UNMAP_FROM_CPU );
// clang-format on
static_assert(kfd_bitmask(std::index_sequence<ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT_START,
ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_END,
ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU>()) ==
(KFD_SMI_EVENT_MASK_FROM_INDEX(KFD_SMI_EVENT_PAGE_FAULT_START) |
KFD_SMI_EVENT_MASK_FROM_INDEX(KFD_SMI_EVENT_MIGRATE_END) |
KFD_SMI_EVENT_MASK_FROM_INDEX(KFD_SMI_EVENT_UNMAP_FROM_GPU)));
static_assert((page_migration_info<ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_END>::kfd_bitmask |
page_migration_info<ROCPROFILER_PAGE_MIGRATION_QUEUE_EVICTION>::kfd_bitmask |
page_migration_info<ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU>::kfd_bitmask) ==
(KFD_SMI_EVENT_MASK_FROM_INDEX(KFD_SMI_EVENT_MIGRATE_END) |
KFD_SMI_EVENT_MASK_FROM_INDEX(KFD_SMI_EVENT_QUEUE_EVICTION) |
KFD_SMI_EVENT_MASK_FROM_INDEX(KFD_SMI_EVENT_UNMAP_FROM_GPU)));
static_assert(kfd_to_rocprof_op(KFD_SMI_EVENT_MIGRATE_START, rocprofiler_page_migration_seq_t{}) ==
ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_START);
static_assert(kfd_to_rocprof_op(KFD_SMI_EVENT_MIGRATE_END, rocprofiler_page_migration_seq_t{}) ==
ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_END);
static_assert(kfd_to_rocprof_op(KFD_SMI_EVENT_PAGE_FAULT_START,
rocprofiler_page_migration_seq_t{}) ==
ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT_START);
static_assert(kfd_to_rocprof_op(KFD_SMI_EVENT_PAGE_FAULT_END, rocprofiler_page_migration_seq_t{}) ==
ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT_END);
static_assert(kfd_to_rocprof_op(KFD_SMI_EVENT_QUEUE_EVICTION, rocprofiler_page_migration_seq_t{}) ==
ROCPROFILER_PAGE_MIGRATION_QUEUE_EVICTION);
static_assert(kfd_to_rocprof_op(KFD_SMI_EVENT_QUEUE_RESTORE, rocprofiler_page_migration_seq_t{}) ==
ROCPROFILER_PAGE_MIGRATION_QUEUE_RESTORE);
static_assert(kfd_to_rocprof_op(KFD_SMI_EVENT_UNMAP_FROM_GPU, rocprofiler_page_migration_seq_t{}) ==
ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU);
} // namespace page_migration
} // namespace rocprofiler
@@ -1,937 +0,0 @@
// MIT License
//
// Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#include "lib/rocprofiler-sdk/page_migration/page_migration.hpp"
#include "lib/common/logging.hpp"
#include "lib/common/mpl.hpp"
#include "lib/common/static_object.hpp"
#include "lib/common/utility.hpp"
#include "lib/rocprofiler-sdk/agent.hpp"
#include "lib/rocprofiler-sdk/buffer.hpp"
#include "lib/rocprofiler-sdk/context/context.hpp"
#include "lib/rocprofiler-sdk/details/kfd_ioctl.h"
#include "lib/rocprofiler-sdk/internal_threading.hpp"
#include "lib/rocprofiler-sdk/page_migration/utils.hpp"
#include <rocprofiler-sdk/agent.h>
#include <rocprofiler-sdk/buffer_tracing.h>
#include <rocprofiler-sdk/fwd.h>
#include <rocprofiler-sdk/hsa/api_id.h>
#include <rocprofiler-sdk/hsa/table_id.h>
#include <fmt/core.h>
#include <hsa/amd_hsa_signal.h>
#include <hsa/hsa.h>
#include <sys/poll.h>
#include <unistd.h>
#include <atomic>
#include <cerrno>
#include <chrono>
#include <cstddef>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <limits>
#include <memory>
#include <ratio>
#include <stdexcept>
#include <string>
#include <string_view>
#include <thread>
#include <type_traits>
#include <unordered_map>
#include <utility>
#include <fcntl.h>
#include <poll.h>
#include <sys/eventfd.h>
#include <sys/ioctl.h>
#define ROCPROFILER_LIB_ROCPROFILER_SDK_PAGE_MIGRATION_PAGE_MIGRATION_CPP_IMPL 1
#include "page_migration.def.cpp"
#undef ROCPROFILER_LIB_ROCPROFILER_SDK_PAGE_MIGRATION_PAGE_MIGRATION_CPP_IMPL
namespace rocprofiler
{
namespace page_migration
{
template <typename T>
using small_vector = common::container::small_vector<T>;
using context_t = context::context;
using context_array_t = common::container::small_vector<const context_t*>;
template <size_t>
struct page_migration_info;
template <size_t>
struct kfd_event_info;
template <typename EnumT, int ValueE>
struct page_migration_enum_info;
template <typename EnumT>
struct page_migration_bounds;
// Parsing and utilities
namespace
{
constexpr auto
page_to_bytes(size_t val)
{
// each page is 4KB = 4096 bytes
return val << 12;
}
template <size_t>
page_migration_record_t parse_event(std::string_view)
{
ROCP_FATAL_IF(false) << page_migration_info<ROCPROFILER_PAGE_MIGRATION_NONE>::format_str;
return {};
}
auto
get_node_agent_id(uint32_t _node_id)
{
using agent_id_map_t = std::unordered_map<uint64_t, rocprofiler_agent_id_t>;
static auto*& _data = static_object<agent_id_map_t>::construct([]() {
auto _v = std::unordered_map<uint64_t, rocprofiler_agent_id_t>{};
for(const auto* agent : agent::get_agents())
_v.emplace(agent->gpu_id, agent->id);
return _v;
}());
CHECK(_data != nullptr);
ROCP_FATAL_IF(_data->count(_node_id) == 0) << "page_migration: unknown node id: " << _node_id;
return _data->at(_node_id);
}
template <>
page_migration_record_t
parse_event<ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT_START>(std::string_view str)
{
auto rec = page_migration_record_t{};
auto& e = rec.args.page_fault_start;
uint32_t kind{};
uint32_t _node_id = 0;
char fault;
std::sscanf(str.data(),
page_migration_info<ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT_START>::format_str.data(),
&kind,
&rec.timestamp,
&rec.pid,
&e.address,
&_node_id,
&fault);
e.read_fault = (fault == 'R');
e.address = page_to_bytes(e.address);
e.agent_id = get_node_agent_id(_node_id);
ROCP_TRACE << fmt::format("Page fault start [ ts: {} pid: {} addr: 0x{:X} node: {} ] \n",
rec.timestamp,
rec.pid,
e.address,
e.agent_id.handle);
return rec;
}
template <>
page_migration_record_t
parse_event<ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT_END>(std::string_view str)
{
auto rec = page_migration_record_t{};
auto& e = rec.args.page_fault_end;
uint32_t kind{};
uint32_t _node_id = 0;
char migrated;
std::sscanf(str.data(),
page_migration_info<ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT_END>::format_str.data(),
&kind,
&rec.timestamp,
&rec.pid,
&e.address,
&_node_id,
&migrated);
// M or U -> migrated / unmigrated?
if(migrated == 'M')
e.migrated = true;
else if(migrated == 'U')
e.migrated = false;
else
ROCP_WARNING << "Unknown PAGE_FAULT_END migrated/unmigrated state";
e.address = page_to_bytes(e.address);
e.agent_id = get_node_agent_id(_node_id);
ROCP_TRACE << fmt::format(
"Page fault end [ ts: {} pid: {} addr: 0x{:X} node: {} migrated: {} ] \n",
rec.timestamp,
rec.pid,
e.address,
e.agent_id.handle,
migrated);
return rec;
}
template <>
page_migration_record_t
parse_event<ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_START>(std::string_view str)
{
auto rec = page_migration_record_t{};
auto& e = rec.args.page_migrate_start;
uint32_t kind{};
uint32_t trigger{};
uint32_t _from_node = 0;
uint32_t _to_node = 0;
uint32_t _prefetch_node = 0;
uint32_t _preferred_node = 0;
std::sscanf(
str.data(),
page_migration_info<ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_START>::format_str.data(),
&kind,
&rec.timestamp,
&rec.pid,
&e.start_addr,
&e.end_addr,
&_from_node,
&_to_node,
&_prefetch_node,
&_preferred_node,
&trigger);
e.end_addr += e.start_addr;
e.trigger = static_cast<migrate_trigger_t>(trigger);
e.start_addr = page_to_bytes(e.start_addr);
e.end_addr = page_to_bytes(e.end_addr) - 1;
e.from_agent = get_node_agent_id(_from_node);
e.to_agent = get_node_agent_id(_to_node);
e.prefetch_agent = get_node_agent_id(_prefetch_node);
e.preferred_agent = get_node_agent_id(_preferred_node);
ROCP_TRACE << fmt::format(
"Page migrate start [ ts: {} pid: {} addr s: 0x{:X} addr "
"e: 0x{:X} size: {}B from node: {} to node: {} prefetch node: {} preferred node: {} "
"trigger: {} ] \n",
rec.timestamp,
rec.pid,
e.start_addr,
e.end_addr,
(e.end_addr - e.start_addr),
e.from_agent.handle,
e.to_agent.handle,
e.prefetch_agent.handle,
e.preferred_agent.handle,
trigger);
return rec;
}
template <>
page_migration_record_t
parse_event<ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_END>(std::string_view str)
{
auto rec = page_migration_record_t{};
auto& e = rec.args.page_migrate_end;
uint32_t kind{};
uint32_t trigger{};
uint32_t _from_node = 0;
uint32_t _to_node = 0;
int32_t _error_code = 0;
std::sscanf(str.data(),
page_migration_info<ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_END>::format_str.data(),
&kind,
&rec.timestamp,
&rec.pid,
&e.start_addr,
&e.end_addr,
&_from_node,
&_to_node,
&trigger,
&_error_code);
e.end_addr += e.start_addr;
e.trigger = static_cast<migrate_trigger_t>(trigger);
e.start_addr = page_to_bytes(e.start_addr);
e.end_addr = page_to_bytes(e.end_addr) - 1;
e.from_agent = get_node_agent_id(_from_node);
e.to_agent = get_node_agent_id(_to_node);
// For older kernel versions, no event is generated in the error case,
// so the default value of 0 is correct for any given end event.
e.error_code = _error_code;
ROCP_TRACE << fmt::format("Page migrate end [ ts: {} pid: {} addr s: 0x{:X} addr e: "
"0x{:X} from node: {} to node: {} trigger: {} error code: {}] \n",
rec.timestamp,
rec.pid,
e.start_addr,
e.end_addr,
e.from_agent.handle,
e.to_agent.handle,
trigger,
_error_code);
return rec;
}
template <>
page_migration_record_t
parse_event<ROCPROFILER_PAGE_MIGRATION_QUEUE_EVICTION>(std::string_view str)
{
auto rec = page_migration_record_t{};
auto& e = rec.args.queue_eviction;
uint32_t kind{};
uint32_t trigger{};
uint32_t _node_id = 0;
std::sscanf(str.data(),
page_migration_info<ROCPROFILER_PAGE_MIGRATION_QUEUE_EVICTION>::format_str.data(),
&kind,
&rec.timestamp,
&rec.pid,
&_node_id,
&trigger);
e.trigger = static_cast<queue_suspend_trigger_t>(trigger);
e.agent_id = get_node_agent_id(_node_id);
ROCP_TRACE << fmt::format("Queue evict [ ts: {} pid: {} node: {} trigger: {} ] \n",
rec.timestamp,
rec.pid,
e.agent_id.handle,
trigger);
return rec;
}
template <>
page_migration_record_t
parse_event<ROCPROFILER_PAGE_MIGRATION_QUEUE_RESTORE>(std::string_view str)
{
auto rec = page_migration_record_t{};
auto& e = rec.args.queue_restore;
uint32_t kind{};
uint32_t _node_id = 0;
std::sscanf(str.data(),
page_migration_info<ROCPROFILER_PAGE_MIGRATION_QUEUE_RESTORE>::format_str.data(),
&kind,
&rec.timestamp,
&rec.pid,
&_node_id);
// check if we have a valid char at the end. -1 has \0
if(str[str.size() - 2] == 'R')
e.rescheduled = true;
else
e.rescheduled = false;
e.agent_id = get_node_agent_id(_node_id);
ROCP_TRACE << fmt::format(
"Queue restore [ ts: {} pid: {} node: {} ] \n", rec.timestamp, rec.pid, e.agent_id.handle);
return rec;
}
template <>
page_migration_record_t
parse_event<ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU>(std::string_view str)
{
auto rec = page_migration_record_t{};
auto& e = rec.args.unmap_from_gpu;
uint32_t kind{};
uint32_t trigger{};
uint32_t _node_id = 0;
std::sscanf(str.data(),
page_migration_info<ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU>::format_str.data(),
&kind,
&rec.timestamp,
&rec.pid,
&e.start_addr,
&e.end_addr,
&_node_id,
&trigger);
e.end_addr += e.start_addr;
e.trigger = static_cast<unmap_from_gpu_trigger_t>(trigger);
e.start_addr = page_to_bytes(e.start_addr);
e.end_addr = page_to_bytes(e.end_addr);
e.agent_id = get_node_agent_id(_node_id);
ROCP_TRACE << fmt::format(
"Unmap from GPU [ ts: {} pid: {} start addr: 0x{:X} end addr: 0x{:X} "
"node: {} trigger {} ] \n",
rec.timestamp,
rec.pid,
e.start_addr,
e.end_addr,
e.agent_id.handle,
trigger);
return rec;
}
template <>
page_migration_record_t
parse_event<ROCPROFILER_PAGE_MIGRATION_DROPPED_EVENT>(std::string_view str)
{
auto rec = page_migration_record_t{};
auto& e = rec.args.dropped_event;
uint32_t kind{};
std::sscanf(str.data(),
page_migration_info<ROCPROFILER_PAGE_MIGRATION_DROPPED_EVENT>::format_str.data(),
&kind,
&rec.timestamp,
&rec.pid,
&e.dropped_events_count);
ROCP_TRACE << fmt::format("Dropped events [ ts: {} pid: {} dropped count: {} ] \n",
rec.timestamp,
rec.pid,
e.dropped_events_count);
return rec;
}
template <>
page_migration_record_t parse_event<ROCPROFILER_PAGE_MIGRATION_NONE>(std::string_view)
{
ROCP_CI_LOG(WARNING)
<< "ROCPROFILER_PAGE_MIGRATION_NONE for parsing page migration events should not happen";
return common::init_public_api_struct(page_migration_record_t{});
}
template <size_t OpInx, size_t... OpInxs>
page_migration_record_t
parse_event(size_t event_id, std::string_view strn, std::index_sequence<OpInx, OpInxs...>)
{
if(OpInx == static_cast<uint32_t>(event_id))
{
auto rec = parse_event<OpInx>(strn);
rec.size = sizeof(page_migration_record_t);
rec.kind = ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION;
rec.operation = static_cast<rocprofiler_page_migration_operation_t>(OpInx);
return rec;
}
if constexpr(sizeof...(OpInxs) > 0)
return parse_event(event_id, strn, std::index_sequence<OpInxs...>{});
return page_migration_record_t{};
}
/* -----------------------------------------------------------------------------------*/
} // namespace
size_t
get_rocprof_op(const std::string_view event_data)
{
uint32_t kfd_event_id{};
std::sscanf(event_data.data(), "%x ", &kfd_event_id);
auto rocprof_id =
kfd_to_rocprof_op(static_cast<kfd_event_id_t>(kfd_event_id),
std::make_index_sequence<ROCPROFILER_PAGE_MIGRATION_LAST>{});
ROCP_CI_LOG_IF(WARNING, rocprof_id == 0)
<< fmt::format("Failed to parse KFD event ID {}. Parsed ID: {}, SDK ID: {}\n",
event_data[0],
kfd_event_id,
rocprof_id);
return rocprof_id;
}
void
kfd_readlines(const std::string_view str, void(handler)(std::string_view))
{
const auto find_newline = [&](auto b) { return std::find(b, str.cend(), '\n'); };
const auto* cursor = str.cbegin();
for(const auto* pos = find_newline(cursor); pos != str.cend(); pos = find_newline(cursor))
{
size_t char_count = pos - cursor;
assert(char_count > 0);
std::string_view event_str{cursor, char_count};
ROCP_INFO << fmt::format("KFD event: [{}]", event_str);
handler(event_str);
cursor = pos + 1;
}
}
// Event capture and reporting
namespace
{
constexpr auto kfd_ioctl_version = (1000 * KFD_IOCTL_MAJOR_VERSION) + KFD_IOCTL_MINOR_VERSION;
// Support has been added in kfdv >= 1.10+
static_assert(kfd_ioctl_version >= 1010, "KFD SMI support missing in kfd_ioctl.h");
auto
get_contexts(int operation)
{
auto active_contexts = context::get_active_contexts([](const auto* ctx) {
return (ctx->buffered_tracer &&
ctx->buffered_tracer->domains(ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION));
});
auto operation_ctxs = context::context_array_t{};
for(const auto* itr : active_contexts)
{
// if the given domain + op is not enabled, skip this context
if(itr->buffered_tracer->domains(ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION, operation))
{
operation_ctxs.emplace_back(itr);
}
}
return operation_ctxs;
}
void
handle_reporting(std::string_view event_data)
{
const auto op_inx = get_rocprof_op(event_data);
auto buffered_contexts = get_contexts(op_inx);
if(buffered_contexts.empty()) return;
// Parse and process the event
auto record = parse_event(
op_inx, event_data, std::make_index_sequence<ROCPROFILER_PAGE_MIGRATION_LAST>{});
for(const auto& itr : buffered_contexts)
{
auto* buffer = buffer::get_buffer(
itr->buffered_tracer->buffer_data.at(ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION));
CHECK_NOTNULL(buffer)->emplace(
ROCPROFILER_BUFFER_CATEGORY_TRACING, ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION, record);
}
}
} // namespace
namespace
{
void poll_events(small_vector<pollfd>);
}
// KFD utils
namespace kfd
{
using fd_flags_t = decltype(EFD_NONBLOCK);
using fd_t = decltype(pollfd::fd);
constexpr auto KFD_DEVICE_PATH{"/dev/kfd"};
SPECIALIZE_KFD_IOC_IOCTL(kfd_ioctl_get_version_args, AMDKFD_IOC_GET_VERSION);
SPECIALIZE_KFD_IOC_IOCTL(kfd_ioctl_smi_events_args, AMDKFD_IOC_SMI_EVENTS);
namespace
{
template <typename T>
auto
ioctl(int kfd_fd, T& args)
{
// from hsaKmt library (hsakmt/src/libhsakmt.c)
int exit_code{};
do
{
exit_code = ::ioctl(kfd_fd, IOC_event<T>::value, static_cast<void*>(&args));
} while(exit_code == -1 && (errno == EINTR || errno == EAGAIN));
if(exit_code == -1 && errno == EBADF)
{
/* In case pthread_atfork didn't catch it, this will
* make any subsequent hsaKmt calls fail in CHECK_KFD_OPEN.
*/
CHECK(true && "KFD file descriptor not valid in this process\n");
}
return exit_code;
}
struct kfd_device_fd
{
fd_t fd{-1};
kfd_device_fd()
{
fd = ::open(KFD_DEVICE_PATH, O_RDWR | O_CLOEXEC);
ROCP_FATAL_IF(fd == -1) << "Error opening KFD handle @ " << KFD_DEVICE_PATH;
}
~kfd_device_fd()
{
if(fd >= 0) close(fd);
}
};
const kfd_ioctl_get_version_args
get_version()
{
static kfd_ioctl_get_version_args version = [&]() {
auto args = kfd_ioctl_get_version_args{0, 0};
kfd_device_fd kfd_fd{};
if(ioctl(kfd_fd.fd, args) != -1)
ROCP_INFO << fmt::format("KFD v{}.{}", args.major_version, args.minor_version);
else
ROCP_ERROR << fmt::format("Could not determine KFD version");
return args;
}();
return version;
}
struct poll_kfd_t
{
static constexpr auto DEFAULT_FLAGS{EFD_CLOEXEC};
struct gpu_fd_t
{
unsigned int node_id = 0;
fd_t fd = {};
const rocprofiler_agent_t* agent = nullptr;
};
kfd_device_fd kfd_fd = {};
pollfd thread_notify = {};
std::thread bg_thread = {};
bool active = {false};
poll_kfd_t() = default;
poll_kfd_t(const small_vector<size_t>& rprof_ev)
: kfd_fd{kfd_device_fd{}}
{
small_vector<pollfd> file_handles = {};
const auto kfd_flags =
kfd_bitmask(rprof_ev, std::make_index_sequence<ROCPROFILER_PAGE_MIGRATION_LAST>{});
ROCP_TRACE << fmt::format("Setting KFD flags to [0b{:b}] \n", kfd_flags);
// Create fd for notifying thread when we want to wake it up, and an eventfd for any events
// to this thread
file_handles.emplace_back(
pollfd{.fd = eventfd(0, DEFAULT_FLAGS), .events = 0, .revents = 0});
fd_t thread_pipes[2]{};
[&]() {
const auto retcode = pipe2(&thread_pipes[0], DEFAULT_FLAGS);
const auto _err = errno;
ROCP_FATAL_IF(retcode != 0)
<< fmt::format("Pipe creation for page-migration thread notify returned {} :: {}\n",
retcode,
strerror(_err));
}();
thread_notify = pollfd{
.fd = thread_pipes[1],
.events = POLLIN,
.revents = 0,
};
// add pipe listening end to fds to watch
file_handles.emplace_back(pollfd{thread_pipes[0], POLLIN, 0});
// get FD, start thread, and then enable events
for(const auto& agent : agent::get_agents())
{
if(agent->type == ROCPROFILER_AGENT_TYPE_GPU)
{
auto gpu_event_fd = get_node_fd(agent->gpu_id);
file_handles.emplace_back(pollfd{gpu_event_fd, POLLIN, 0});
ROCP_TRACE << fmt::format(
"GPU node {} with fd {} added\n", agent->gpu_id, gpu_event_fd);
}
}
// Enable KFD masked events by writing flags to kfd fd
for(size_t i = 2; i < file_handles.size(); ++i)
{
auto& fd = file_handles[i];
auto write_size = write(fd.fd, &kfd_flags, sizeof(kfd_flags));
ROCP_TRACE << fmt::format(
"Writing {} to GPU fd {} ({} bytes)\n", kfd_flags, fd.fd, write_size);
CHECK(write_size == sizeof(kfd_flags));
}
// start bg thread
internal_threading::notify_pre_internal_thread_create(ROCPROFILER_LIBRARY);
bg_thread = std::thread{poll_events, file_handles};
internal_threading::notify_post_internal_thread_create(ROCPROFILER_LIBRARY);
active = true;
}
static auto get_event_id(const std::string_view& strn)
{
uint32_t event_id{std::numeric_limits<uint32_t>::max()};
std::sscanf(strn.data(), "%x ", &event_id);
CHECK(event_id <= KFD_SMI_EVENT_ALL_PROCESS);
}
poll_kfd_t(const poll_kfd_t&) = delete;
poll_kfd_t& operator=(const poll_kfd_t&) = delete;
poll_kfd_t(poll_kfd_t&&) noexcept = default;
poll_kfd_t& operator=(poll_kfd_t&&) noexcept = default;
~poll_kfd_t()
{
ROCP_TRACE << fmt::format("Terminating poll_kfd\n");
if(!active) return;
// wake thread up
auto bytes_written{-1};
do
{
bytes_written = write(thread_notify.fd, "E", 1);
} while(bytes_written == -1 && (errno == EINTR || errno == EAGAIN));
bg_thread.join();
close(thread_notify.fd);
ROCP_TRACE << fmt::format("Background thread signalled\n");
}
node_fd_t get_node_fd(int gpu_node_id) const
{
kfd_ioctl_smi_events_args args{};
args.gpuid = gpu_node_id;
if(auto ret = ioctl(kfd_fd.fd, args); ret == -1)
ROCP_ERROR << fmt::format(
"Could not get GPU node {} file descriptor (exit code: {})", gpu_node_id, ret);
return args.anon_fd;
}
};
} // namespace
} // namespace kfd
// for all contexts
struct config
{
private:
kfd::poll_kfd_t kfd_handle{};
static inline config* _config{nullptr};
config(const small_vector<size_t>& _event_ids)
: kfd_handle{_event_ids}
{}
public:
static void init(const small_vector<size_t>& event_ids) { _config = new config{event_ids}; }
static void reset()
{
config* ptr = nullptr;
std::swap(ptr, _config);
delete ptr;
}
static void reset_on_fork() { _config = nullptr; }
};
namespace
{
void
poll_events(small_vector<pollfd> file_handles)
{
// storage to write records to, 1MB
constexpr size_t PREALLOCATE_ELEMENT_COUNT{1024 * 128};
std::string scratch_buffer(PREALLOCATE_ELEMENT_COUNT, '\0');
auto& exitfd = file_handles[1];
// Wait or spin on events.
// 0 -> return immediately even if no events
// -1 -> wait indefinitely
pthread_setname_np(pthread_self(), "bg:pagemigr");
for(auto& fd : file_handles)
{
ROCP_TRACE << fmt::format(
"Handle = {}, events = {}, revents = {}\n", fd.fd, fd.events, fd.revents);
}
while(true)
{
auto poll_ret = poll(file_handles.data(), file_handles.size(), -1);
if(poll_ret == -1)
{
ROCP_CI_LOG(WARNING)
<< "Background thread file descriptors for page-migration are invalid";
return;
}
if((exitfd.revents & POLLIN) != 0)
{
for(const auto& f : file_handles)
{
close(f.fd);
}
ROCP_INFO << "Terminating background thread\n";
return;
}
using namespace std::chrono_literals;
// 0 and 1 are for generic and pipe-notify handles
for(size_t i = 2; i < file_handles.size(); ++i)
{
auto& fd = file_handles[i];
// We have data to read, perhaps multiple events
if((fd.revents & POLLIN) != 0)
{
size_t status_size = read(fd.fd, scratch_buffer.data(), scratch_buffer.size());
auto event_strings = std::string_view{scratch_buffer.data(), status_size};
kfd_readlines(event_strings, handle_reporting);
}
fd.revents = 0;
}
}
}
template <size_t Idx, size_t... IdxTail>
const char*
name_by_id(const uint32_t id, std::index_sequence<Idx, IdxTail...>)
{
if(Idx == id) return page_migration_info<Idx>::name;
if constexpr(sizeof...(IdxTail) > 0)
return name_by_id(id, std::index_sequence<IdxTail...>{});
else
return nullptr;
}
template <size_t... Idx>
void
get_ids(std::vector<uint32_t>& _id_list, std::index_sequence<Idx...>)
{
auto _emplace = [](auto& _vec, uint32_t _v) {
if(_v < static_cast<uint32_t>(ROCPROFILER_HSA_AMD_EXT_API_ID_LAST)) _vec.emplace_back(_v);
};
(_emplace(_id_list, page_migration_info<Idx>::operation), ...);
}
bool
context_filter(const context::context* ctx)
{
return (ctx->buffered_tracer &&
(ctx->buffered_tracer->domains(ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION)));
}
template <size_t... Idx>
void
to_bitmask(small_vector<size_t>& _id_list, std::index_sequence<Idx...>)
{
auto _emplace = [](auto& _vec, uint32_t _v) {
if(_v < static_cast<uint32_t>(ROCPROFILER_HSA_AMD_EXT_API_ID_LAST)) _vec.emplace_back(_v);
};
(_emplace(_id_list, page_migration_info<Idx>::operation), ...);
}
template <size_t... Inxs>
rocprofiler_status_t init(std::index_sequence<Inxs...>)
{
static const small_vector<size_t> event_ids{Inxs...};
// Check if version is more than 1.11
auto ver = kfd::get_version();
if(ver.major_version * 1000 + ver.minor_version > 1011)
{
if(!context::get_registered_contexts(context_filter).empty())
{
config::init(event_ids);
}
return ROCPROFILER_STATUS_SUCCESS;
}
else
{
// Add a buffer record with this info
ROCP_ERROR << fmt::format(
"KFD does not support SVM event reporting in v{}.{} (requires v1.11)",
ver.major_version,
ver.minor_version);
return ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_KERNEL;
}
}
} // namespace
} // namespace page_migration
} // namespace rocprofiler
namespace rocprofiler::page_migration
{
rocprofiler_status_t
init()
{
pthread_atfork(nullptr, nullptr, []() {
// null out child's copy on fork and reinitialize
// otherwise all children wait on the same thread to join
config::reset_on_fork();
init(std::make_index_sequence<ROCPROFILER_PAGE_MIGRATION_LAST>{});
});
return init(std::make_index_sequence<ROCPROFILER_PAGE_MIGRATION_LAST>{});
}
void
finalize()
{
config::reset();
}
const char*
name_by_id(uint32_t id)
{
return name_by_id(id, std::make_index_sequence<ROCPROFILER_PAGE_MIGRATION_LAST>{});
}
std::vector<uint32_t>
get_ids()
{
auto _data = std::vector<uint32_t>{};
_data.reserve(ROCPROFILER_PAGE_MIGRATION_LAST);
get_ids(_data, std::make_index_sequence<ROCPROFILER_PAGE_MIGRATION_LAST>{});
return _data;
}
} // namespace rocprofiler::page_migration
@@ -1,60 +0,0 @@
// MIT License
//
// Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#include "lib/rocprofiler-sdk/page_migration/defines.hpp"
#include "lib/rocprofiler-sdk/page_migration/page_migration.hpp"
#if defined(ROCPROFILER_LIB_ROCPROFILER_SDK_PAGE_MIGRATION_PAGE_MIGRATION_CPP_IMPL) && \
ROCPROFILER_LIB_ROCPROFILER_SDK_PAGE_MIGRATION_PAGE_MIGRATION_CPP_IMPL == 1
namespace rocprofiler
{
namespace page_migration
{
using namespace rocprofiler::common;
using kfd_event_id_t = decltype(KFD_SMI_EVENT_NONE);
using migrate_trigger_t = rocprofiler_page_migration_trigger_t;
using page_migration_record_t = rocprofiler_buffer_tracing_page_migration_record_t;
using queue_suspend_trigger_t = rocprofiler_page_migration_queue_suspend_trigger_t;
using unmap_from_gpu_trigger_t = rocprofiler_page_migration_unmap_from_gpu_trigger_t;
using trigger_type_list_t = common::mpl::type_list<rocprofiler_page_migration_trigger_t,
queue_suspend_trigger_t,
unmap_from_gpu_trigger_t>;
// clang-format off
// Map ROCPROF UVM enums to KFD enums
SPECIALIZE_PAGE_MIGRATION_INFO(NONE, NONE, "Error: Invalid UVM event from KFD" );
SPECIALIZE_PAGE_MIGRATION_INFO(PAGE_MIGRATE_START, MIGRATE_START, "%x %ld -%d @%lx(%lx) %x->%x %x:%x %d\n");
SPECIALIZE_PAGE_MIGRATION_INFO(PAGE_MIGRATE_END, MIGRATE_END, "%x %ld -%d @%lx(%lx) %x->%x %d %d\n" );
SPECIALIZE_PAGE_MIGRATION_INFO(PAGE_FAULT_START, PAGE_FAULT_START, "%x %ld -%d @%lx(%x) %c\n" );
SPECIALIZE_PAGE_MIGRATION_INFO(PAGE_FAULT_END, PAGE_FAULT_END, "%x %ld -%d @%lx(%x) %c\n" );
SPECIALIZE_PAGE_MIGRATION_INFO(QUEUE_EVICTION, QUEUE_EVICTION, "%x %ld -%d %x %d\n" );
SPECIALIZE_PAGE_MIGRATION_INFO(QUEUE_RESTORE, QUEUE_RESTORE, "%x %ld -%d %x\n" );
SPECIALIZE_PAGE_MIGRATION_INFO(UNMAP_FROM_GPU, UNMAP_FROM_GPU, "%x %ld -%d @%lx(%lx) %x %d\n" );
SPECIALIZE_PAGE_MIGRATION_INFO(DROPPED_EVENT, DROPPED_EVENT, "%x %ld -%d %d\n" );
#undef SPECIALIZE_PAGE_MIGRATION_INFO
// clang-format on
} // namespace page_migration
} // namespace rocprofiler
#endif
@@ -43,9 +43,9 @@
#include "lib/rocprofiler-sdk/hsa/scratch_memory.hpp"
#include "lib/rocprofiler-sdk/intercept_table.hpp"
#include "lib/rocprofiler-sdk/internal_threading.hpp"
#include "lib/rocprofiler-sdk/kfd/kfd.hpp"
#include "lib/rocprofiler-sdk/marker/marker.hpp"
#include "lib/rocprofiler-sdk/ompt.hpp"
#include "lib/rocprofiler-sdk/page_migration/page_migration.hpp"
#include "lib/rocprofiler-sdk/pc_sampling/code_object.hpp"
#include "lib/rocprofiler-sdk/pc_sampling/service.hpp"
#include "lib/rocprofiler-sdk/rccl/rccl.hpp"
@@ -757,7 +757,7 @@ finalize()
hsa::queue_controller_fini();
thread_trace::finalize();
ompt::finalize_ompt();
page_migration::finalize();
kfd::finalize();
#if ROCPROFILER_SDK_HSA_PC_SAMPLING > 0
// WARNING: this must precede `code_object::finalize()`
pc_sampling::code_object::finalize();
@@ -20,8 +20,7 @@ set(rocprofiler_lib_sources
naming.cpp
timestamp.cpp
version.cpp
hsa_barrier.cpp
page_migration.cpp)
hsa_barrier.cpp)
add_executable(rocprofiler-sdk-lib-tests)
target_sources(rocprofiler-sdk-lib-tests PRIVATE ${rocprofiler_lib_sources}
@@ -91,9 +91,6 @@ TEST(enum_string, fwd_h)
TEST_STR(ROCPROFILER_STATUS_ERROR_BUFFER_BUSY);
TEST_STR(ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED);
TEST_STR(ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT_START);
TEST_STR(ROCPROFILER_PAGE_MIGRATION_QUEUE_EVICTION);
// rocprofiler_buffer_category_t
TEST_STR(ROCPROFILER_BUFFER_CATEGORY_TRACING);
TEST_STR(ROCPROFILER_BUFFER_CATEGORY_COUNTERS);
@@ -116,6 +113,7 @@ TEST(enum_string, fwd_h)
TEST_STR(ROCPROFILER_BUFFER_TRACING_MARKER_NAME_API);
TEST_STR(ROCPROFILER_BUFFER_TRACING_SCRATCH_MEMORY);
TEST_STR(ROCPROFILER_BUFFER_TRACING_ROCDECODE_API);
TEST_STR(ROCPROFILER_BUFFER_TRACING_KFD_QUEUE);
// rocprofiler_code_object_operation_t
TEST_STR(ROCPROFILER_CODE_OBJECT_NONE);
@@ -152,10 +150,6 @@ TEST(enum_string, fwd_h)
TEST_STR(ROCPROFILER_BUFFER_POLICY_DISCARD);
TEST_STR(ROCPROFILER_BUFFER_POLICY_LOSSLESS);
// rocprofiler_page_migration_operation_t
TEST_STR(ROCPROFILER_PAGE_MIGRATION_NONE);
TEST_STR(ROCPROFILER_PAGE_MIGRATION_DROPPED_EVENT);
// rocprofiler_scratch_memory_operation_t
TEST_STR(ROCPROFILER_SCRATCH_MEMORY_NONE);
TEST_STR(ROCPROFILER_SCRATCH_MEMORY_ALLOC);
@@ -265,16 +259,18 @@ TEST(enum_string, roctx_api_id)
TEST_API_ID_STR(ROCPROFILER_MARKER_NAME_API_ID, roctxNameHipStream);
}
TEST(enum_string, page_migration_triggers)
TEST(enum_string, kfd_events)
{
TEST_STR(ROCPROFILER_PAGE_MIGRATION_TRIGGER_PREFETCH);
TEST_STR(ROCPROFILER_PAGE_MIGRATION_TRIGGER_TTM_EVICTION);
TEST_STR(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_SVM);
TEST_STR(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_CRIU_RESTORE);
TEST_STR(ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_MMU_NOTIFY);
TEST_STR(ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_UNMAP_FROM_CPU);
TEST_STR(ROCPROFILER_KFD_EVENT_PAGE_MIGRATE_PAGEFAULT_CPU);
TEST_STR(ROCPROFILER_KFD_EVENT_PAGE_FAULT_END_PAGE_MIGRATED);
TEST_STR(ROCPROFILER_KFD_EVENT_QUEUE_EVICT_SUSPEND);
TEST_STR(ROCPROFILER_KFD_EVENT_QUEUE_RESTORE);
TEST_STR(ROCPROFILER_KFD_EVENT_UNMAP_FROM_GPU_UNMAP_FROM_CPU);
TEST_STR(ROCPROFILER_KFD_EVENT_DROPPED_EVENTS);
TEST_STR(ROCPROFILER_KFD_PAGE_MIGRATE_PAGEFAULT_CPU);
TEST_STR(ROCPROFILER_KFD_PAGE_MIGRATE_TTM_EVICTION);
TEST_STR(ROCPROFILER_KFD_PAGE_FAULT_WRITE_FAULT_UPDATED);
TEST_STR(ROCPROFILER_KFD_QUEUE_EVICT_CRIU_RESTORE);
}
TEST(enum_string, rccl_api_id)
@@ -1,106 +0,0 @@
// MIT License
//
// Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include "lib/common/container/small_vector.hpp"
#include "lib/common/defines.hpp"
#include "lib/common/mpl.hpp"
#include "lib/rocprofiler-sdk/page_migration/utils.hpp"
#include <rocprofiler-sdk/buffer_tracing.h>
#include <rocprofiler-sdk/fwd.h>
#include <rocprofiler-sdk/rocprofiler.h>
#include <fmt/core.h>
#include <gtest/gtest.h>
#include <sstream>
#include <string_view>
#include <utility>
#define ROCPROFILER_LIB_ROCPROFILER_SDK_PAGE_MIGRATION_PAGE_MIGRATION_CPP_IMPL 1
#include "lib/rocprofiler-sdk/page_migration/page_migration.def.cpp"
#undef ROCPROFILER_LIB_ROCPROFILER_SDK_PAGE_MIGRATION_PAGE_MIGRATION_CPP_IMPL
namespace
{
constexpr std::string_view MULTILINE_STRING = "This is 0 Line 0\n"
"This is 10 Line 1\n"
"This is 20 Line 2\n"
"This is 30 Line 3\n"
"This is 40 Line 4\n";
}
void
return_line(const std::string_view line)
{
static int line_no = 0;
std::stringstream strs{};
strs << fmt::format("This is {} Line {}", line_no * 10, line_no);
EXPECT_EQ(strs.str(), line);
line_no++;
}
auto
parse_lines()
{
rocprofiler::page_migration::kfd_readlines(MULTILINE_STRING, return_line);
}
TEST(page_migration, readlines)
{
// Ensure all lines are read
parse_lines();
}
TEST(page_migration, rocprof_kfd_map)
{
using namespace rocprofiler::page_migration;
using namespace rocprofiler::common::container;
using rocprofiler_page_migration_seq_t =
std::make_index_sequence<ROCPROFILER_PAGE_MIGRATION_LAST>;
const small_vector<size_t> vec{ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_END,
ROCPROFILER_PAGE_MIGRATION_QUEUE_EVICTION,
ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU};
EXPECT_EQ((page_migration_info<ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_END>::kfd_bitmask |
page_migration_info<ROCPROFILER_PAGE_MIGRATION_QUEUE_EVICTION>::kfd_bitmask |
page_migration_info<ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU>::kfd_bitmask),
kfd_bitmask(vec, rocprofiler_page_migration_seq_t{}));
const auto to_kfd_str = [](kfd_smi_event e) {
std::string str = fmt::format("{:x} ", static_cast<size_t>(e));
return rocprofiler::page_migration::get_rocprof_op({str});
};
// clang-format off
EXPECT_EQ(to_kfd_str(KFD_SMI_EVENT_MIGRATE_START), ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_START);
EXPECT_EQ(to_kfd_str(KFD_SMI_EVENT_MIGRATE_END), ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_END);
EXPECT_EQ(to_kfd_str(KFD_SMI_EVENT_PAGE_FAULT_START), ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT_START);
EXPECT_EQ(to_kfd_str(KFD_SMI_EVENT_PAGE_FAULT_END), ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT_END);
EXPECT_EQ(to_kfd_str(KFD_SMI_EVENT_QUEUE_EVICTION), ROCPROFILER_PAGE_MIGRATION_QUEUE_EVICTION);
EXPECT_EQ(to_kfd_str(KFD_SMI_EVENT_QUEUE_RESTORE), ROCPROFILER_PAGE_MIGRATION_QUEUE_RESTORE);
EXPECT_EQ(to_kfd_str(KFD_SMI_EVENT_UNMAP_FROM_GPU), ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU);
EXPECT_EQ(to_kfd_str(KFD_SMI_EVENT_DROPPED_EVENT), ROCPROFILER_PAGE_MIGRATION_DROPPED_EVENT);
// clang-format on
}
@@ -71,7 +71,6 @@ add_subdirectory(async-copy-tracing)
add_subdirectory(hsa-memory-allocation)
add_subdirectory(scratch-memory-tracing)
add_subdirectory(c-tool)
add_subdirectory(page-migration)
add_subdirectory(thread-trace)
add_subdirectory(pc_sampling)
add_subdirectory(hip-graph-tracing)
@@ -27,7 +27,6 @@ add_subdirectory(multistream)
add_subdirectory(vector-operations)
add_subdirectory(hip-in-libraries)
add_subdirectory(scratch-memory)
add_subdirectory(page-migration)
add_subdirectory(hsa-queue-dependency)
add_subdirectory(hip-graph)
add_subdirectory(hsa-memory-allocation)
@@ -1,42 +0,0 @@
#
#
#
cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR)
if(NOT CMAKE_HIP_COMPILER)
find_program(
amdclangpp_EXECUTABLE
NAMES amdclang++
HINTS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm
PATHS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm
PATH_SUFFIXES bin llvm/bin NO_CACHE)
mark_as_advanced(amdclangpp_EXECUTABLE)
if(amdclangpp_EXECUTABLE)
set(CMAKE_HIP_COMPILER "${amdclangpp_EXECUTABLE}")
endif()
endif()
project(rocprofiler-sdk-tests-bin-page-migration LANGUAGES CXX HIP)
foreach(_TYPE DEBUG MINSIZEREL RELEASE RELWITHDEBINFO)
if("${CMAKE_HIP_FLAGS_${_TYPE}}" STREQUAL "")
set(CMAKE_HIP_FLAGS_${_TYPE} "${CMAKE_CXX_FLAGS_${_TYPE}}")
endif()
endforeach()
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_EXTENSIONS OFF)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_HIP_STANDARD 17)
set(CMAKE_HIP_EXTENSIONS OFF)
set(CMAKE_HIP_STANDARD_REQUIRED ON)
set_source_files_properties(page-migration.cpp PROPERTIES LANGUAGE HIP)
add_executable(page-migration)
target_sources(page-migration PRIVATE page-migration.cpp)
target_compile_options(page-migration PRIVATE -W -Wall -Wextra -Wpedantic -Wshadow
-Werror)
find_package(Threads REQUIRED)
target_link_libraries(page-migration PRIVATE Threads::Threads)
@@ -1,300 +0,0 @@
// MIT License
//
// Copyright (c) 2023-2025 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include <hip/hip_runtime.h>
#include <cerrno>
#include <cstddef>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <iostream>
#include <mutex>
#include <sstream>
#include <stdexcept>
#include <string>
#include <string_view>
#include <thread>
#include <vector>
#include <fcntl.h>
#include <sys/mman.h>
#include <unistd.h>
#define HIP_API_CALL(CALL) \
{ \
hipError_t error_ = (CALL); \
if(error_ != hipSuccess) \
{ \
auto _hip_api_print_lk = auto_lock_t{print_lock}; \
fprintf(stderr, \
"%s:%d :: HIP error : %s\n", \
__FILE__, \
__LINE__, \
hipGetErrorString(error_)); \
throw std::runtime_error("hip_api_call"); \
} \
}
using auto_lock_t = std::unique_lock<std::mutex>;
auto print_lock = std::mutex{};
__global__ void
kernel(size_t* __restrict__ data, int size)
{
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
int stride = hipBlockDim_x * hipGridDim_x;
for(int i = x; i < size; i += stride)
{
data[i] *= 2;
}
}
struct mmap_allocator
{
explicit mmap_allocator(size_t num_pages)
{
m_size = num_pages * sysconf(_SC_PAGE_SIZE);
void* ret = mmap(nullptr, // addr: null. Kernel gives us page-aligned memory
m_size, // length: num bytes to allocate
PROT_WRITE | PROT_READ, // mem_prot: Allow read/write
MAP_ANONYMOUS | MAP_PRIVATE, // flags: No file handle
-1, // no fd, use memory "MAP_ANONYMOUS"
0); // offset into fd
if(ret == ((void*) -1)) // NOLINT(performance-no-int-to-ptr)
{
auto ecode = errno;
fprintf(stderr, "mmap error %d: %s", ecode, strerror(ecode));
throw std::runtime_error("mmap failed");
}
else
{
m_addr = ret;
::memset(m_addr, 0, m_size);
}
}
~mmap_allocator()
{
auto ret = munmap(m_addr, m_size);
if(ret != 0) perror("munmap failed");
}
mmap_allocator(const mmap_allocator&) = delete;
mmap_allocator(mmap_allocator&&) noexcept = default;
mmap_allocator& operator=(const mmap_allocator&) = delete;
mmap_allocator& operator=(mmap_allocator&&) noexcept = default;
template <typename Up>
Up* get() const
{
static_assert(!std::is_pointer<Up>::value, "must not be pointer type");
return static_cast<Up*>(m_addr);
}
private:
size_t m_size = 0;
void* m_addr = nullptr;
};
int
run_test(int dev_idx, int num_iter)
{
using namespace std::chrono_literals;
constexpr size_t NUM_PAGES = 512;
const size_t PAGE_SIZE_BYTES = ::sysconf(_SC_PAGE_SIZE);
const size_t elem_count = (NUM_PAGES * PAGE_SIZE_BYTES) / sizeof(size_t);
auto alloc = mmap_allocator(NUM_PAGES);
void* data_v = alloc.get<void>();
auto* data = alloc.get<size_t>();
for(size_t i = 0; i < elem_count; ++i)
if(data[i] != 0) throw std::runtime_error{"bad init"};
printf("Allocated size: %lu bytes (%lu KB), (%lu MB), %zu elements @ %p\n",
sizeof(size_t) * elem_count,
sizeof(size_t) * elem_count / 1024,
sizeof(size_t) * elem_count / 1024 / 1024,
elem_count,
data_v);
HIP_API_CALL(hipSetDevice(dev_idx));
HIP_API_CALL(hipHostRegister(data, elem_count * sizeof(size_t), hipHostRegisterDefault));
constexpr size_t MAPS_BUFFER_SIZE = 1024 * 1024;
char maps[MAPS_BUFFER_SIZE];
std::memset(maps, '\0', MAPS_BUFFER_SIZE);
auto fd = open("/proc/self/maps", O_RDONLY | O_CLOEXEC);
if(fd == -1)
{
auto ecode = errno;
fprintf(stderr, "mmap error %d: %s", ecode, strerror(ecode));
exit(-1);
}
auto bytes = read(fd, maps, MAPS_BUFFER_SIZE - 1);
if(bytes == -1)
{
auto ecode = errno;
fprintf(stderr, "mmap error %d: %s", ecode, strerror(ecode));
exit(-1);
}
close(fd);
std::string_view maps_data{maps, static_cast<size_t>(bytes)};
std::cout << "------------\n";
std::cout << maps_data;
std::cout << "------------\n";
std::istringstream maps_stream{maps_data.data()};
std::string line(1024, '\0');
while(std::getline(maps_stream, line))
{
char __[1024];
int _{};
void* start{};
void* end{};
auto ret =
std::sscanf(line.data(), "%p-%p %s %d %d:%d %d\n", &start, &end, __, &_, &_, &_, &_);
if(ret > 0 && (start == data_v))
{
size_t ptr_diff = ((size_t) end - (size_t) start);
printf("Found match: %zu %zu KB, %zu 4K > %s\n",
ptr_diff,
ptr_diff / 1024,
ptr_diff / 4096,
line.data());
}
}
hipStream_t stream{};
HIP_API_CALL(hipStreamCreate(&stream));
for(int iter = 0; iter < num_iter; ++iter)
{
for(size_t i = 0; i < elem_count; ++i)
{
data[i] = i;
}
// std::cout << "launching..." << std::endl;
hipLaunchKernelGGL(kernel, 1024, 1024, 0, stream, data, elem_count);
HIP_API_CALL(hipStreamSynchronize(stream));
for(size_t i = 0; i < elem_count; ++i)
{
const auto data_i = data[i];
if(data_i != (i * 2))
{
auto msg = std::stringstream{};
msg << "GPU computed value at " << i << " in iteration " << iter
<< " is incorrect. Expected " << (i * 2) << ", found " << data_i;
throw std::runtime_error{msg.str()};
}
}
}
HIP_API_CALL(hipStreamSynchronize(stream));
HIP_API_CALL(hipStreamDestroy(stream));
HIP_API_CALL(hipHostUnregister(data));
return 0;
}
int
main(int argc, const char** argv)
{
using namespace std::chrono_literals;
const auto usage_msg = [](const char** _argv) {
fprintf(stderr, "usage: %s <NUMBER OF THREADS> <ITERATIONS PER THREAD>\n", _argv[0]);
};
if(argc != 3)
{
usage_msg(argv);
exit(EXIT_FAILURE);
}
for(int i = 1; i < argc; ++i)
{
auto _arg = std::string{argv[i]};
if(_arg == "?" || _arg == "-h" || _arg == "--help")
{
usage_msg(argv);
exit(EXIT_SUCCESS);
}
}
const auto num_threads = std::atoi(argv[1]);
if(num_threads < 1)
{
fprintf(stderr, "Error: Invalid value %d for num_threads (min 1)\n", num_threads);
exit(EXIT_FAILURE);
}
const auto num_iter = std::atoi(argv[2]);
if(num_iter < 1)
{
fprintf(stderr, "Error: Invalid value %d for num_iter (min 1)\n", num_iter);
exit(EXIT_FAILURE);
}
int ndevice = 0;
HIP_API_CALL(hipGetDeviceCount(&ndevice));
run_test(0, num_iter);
std::vector<std::thread> threads;
threads.reserve(num_threads);
std::cerr << "Running " << num_iter << " iterations/thread on " << num_threads << " threads\n";
for(auto i = 0; i < num_threads; ++i)
{
auto _dev_idx = i % ndevice;
threads.emplace_back([_dev_idx, _num_iter = num_iter]() { run_test(_dev_idx, _num_iter); });
}
std::cerr << "Waiting for threads\n";
for(auto& t : threads)
{
t.join();
}
for(int i = 0; i < ndevice; ++i)
{
HIP_API_CALL(hipSetDevice(i));
HIP_API_CALL(hipDeviceSynchronize());
HIP_API_CALL(hipDeviceReset());
}
return 0;
}
@@ -1,76 +0,0 @@
#
#
#
cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR)
project(
rocprofiler-sdk-tests-page-migration
LANGUAGES CXX
VERSION 0.0.0)
find_package(rocprofiler-sdk REQUIRED)
if(ROCPROFILER_MEMCHECK_PRELOAD_ENV)
set(PRELOAD_ENV
"${ROCPROFILER_MEMCHECK_PRELOAD_ENV}:$<TARGET_FILE:rocprofiler-sdk-json-tool>")
else()
set(PRELOAD_ENV "LD_PRELOAD=$<TARGET_FILE:rocprofiler-sdk-json-tool>")
endif()
list(GET rocprofiler-sdk-tests-gfx-info 0 page-migration-gpu-0-gfx-info)
# disabled on all architectures
if("${page-migration-gpu-0-gfx-info}" MATCHES "^gfx([0-9a-fA-F]+)$")
set(IS_DISABLED ON)
endif()
add_test(NAME test-page-migration-execute COMMAND $<TARGET_FILE:page-migration> 4 1024)
set(page-migration-env
"${PRELOAD_ENV}"
"ROCPROFILER_DISABLE_PERFETTO_ANNOTATIONS=1"
"ROCPROFILER_TOOL_OUTPUT_FILE=page-migration-test.json"
"ROCPROFILER_TOOL_CONTEXTS_EXCLUDE=HSA_API_BUFFERED,HSA_API_CALLBACK"
"LD_LIBRARY_PATH=$<TARGET_FILE_DIR:rocprofiler-sdk::rocprofiler-sdk-shared-library>:$ENV{LD_LIBRARY_PATH}"
)
set_tests_properties(
test-page-migration-execute
PROPERTIES TIMEOUT
60
LABELS
"integration-tests"
ENVIRONMENT
"${page-migration-env}"
FAIL_REGULAR_EXPRESSION
"${ROCPROFILER_DEFAULT_FAIL_REGEX}"
SKIP_REGULAR_EXPRESSION
"KFD does not support SVM event reporting"
WORKING_DIRECTORY
${CMAKE_CURRENT_BINARY_DIR}
DISABLED
"${IS_DISABLED}")
# copy to binary directory
rocprofiler_configure_pytest_files(COPY validate.py conftest.py CONFIG pytest.ini)
add_test(NAME test-page-migration-validate
COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/validate.py --input
${CMAKE_CURRENT_BINARY_DIR}/page-migration-test.json)
set_tests_properties(
test-page-migration-validate
PROPERTIES TIMEOUT
45
LABELS
"integration-tests"
DEPENDS
test-page-migration-execute
FAIL_REGULAR_EXPRESSION
"${ROCPROFILER_DEFAULT_FAIL_REGEX}"
SKIP_REGULAR_EXPRESSION
"KFD does not support SVM event reporting"
WORKING_DIRECTORY
${CMAKE_CURRENT_BINARY_DIR}
DISABLED
"${IS_DISABLED}")
@@ -1,50 +0,0 @@
#!/usr/bin/env python3
# MIT License
#
# Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
import json
import pytest
from rocprofiler_sdk.pytest_utils.dotdict import dotdict
def pytest_addoption(parser):
parser.addoption(
"--input",
action="store",
default="page-migration-test.json",
help="Input JSON",
)
@pytest.fixture
def input_data(request):
filename = request.config.getoption("--input")
data = None
with open(filename, "r") as inp:
data = json.load(inp)
if data["rocprofiler-sdk-json-tool"]["metadata"]["validate_page_migration"] is False:
return pytest.skip(
"Skipping test because KFD does not support SVM event reporting"
)
return dotdict(data)
@@ -1,5 +0,0 @@
[pytest]
addopts = --durations=20 -rA -s -vv
testpaths = validate.py
pythonpath = @ROCPROFILER_SDK_TESTS_BINARY_DIR@/pytest-packages
@@ -1,377 +0,0 @@
#!/usr/bin/env python3
# MIT License
#
# Copyright (c) 2024-2025 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
from collections import defaultdict
import sys
import pytest
test_api_traces = [
"hsa_api_traces",
"marker_api_traces",
"hip_api_traces",
"rccl_api_traces",
"scratch_memory_traces",
]
# helper function
def node_exists(name, data, min_len=1):
assert name in data
assert data[name] is not None
if isinstance(data[name], (list, tuple, dict, set)):
assert len(data[name]) >= min_len
def to_dict(key_values):
a = defaultdict()
for kv in key_values:
a[kv["key"]] = kv["value"]
return a
def get_operation(record, kind_name, op_name=None):
for idx, itr in enumerate(record["names"]):
if kind_name == itr["kind"]:
if op_name is None:
return idx, itr["operations"]
else:
for oidx, oname in enumerate(itr["operations"]):
if op_name == oname:
return oidx
return None
def dict_from_value_key(d):
ret_d = defaultdict()
for k, v in d.items():
assert v not in ret_d
ret_d[v] = k
return ret_d
def sort_by_timestamp(lines):
timestamp_line_map = {}
for log_line in lines:
timestamp = log_line.split(" ")[1]
timestamp_line_map[timestamp] = log_line
timestamps_sorted = sorted([l.split(" ")[1] for l in lines])
return timestamps_sorted, timestamp_line_map
# ------------------------------ Tests ------------------------------ #
def test_data_structure(input_data):
"""verify minimum amount of expected data is present"""
data = input_data
node_exists("rocprofiler-sdk-json-tool", data)
sdk_data = data["rocprofiler-sdk-json-tool"]
node_exists("metadata", sdk_data)
node_exists("pid", sdk_data["metadata"])
node_exists("main_tid", sdk_data["metadata"])
node_exists("init_time", sdk_data["metadata"])
node_exists("fini_time", sdk_data["metadata"])
node_exists("validate_page_migration", sdk_data["metadata"])
assert sdk_data["metadata"]["validate_page_migration"] is True
node_exists("agents", sdk_data)
node_exists("call_stack", sdk_data)
node_exists("callback_records", sdk_data)
node_exists("buffer_records", sdk_data)
node_exists("names", sdk_data["callback_records"])
node_exists("code_objects", sdk_data["callback_records"])
node_exists("kernel_symbols", sdk_data["callback_records"])
node_exists("host_functions", sdk_data["callback_records"])
node_exists("hsa_api_traces", sdk_data["callback_records"], 0)
node_exists("hip_api_traces", sdk_data["callback_records"])
node_exists("marker_api_traces", sdk_data["callback_records"], 0)
node_exists("names", sdk_data["buffer_records"])
node_exists("kernel_dispatch", sdk_data["buffer_records"])
node_exists("memory_copies", sdk_data["buffer_records"], 0)
node_exists("hsa_api_traces", sdk_data["buffer_records"], 0)
node_exists("hip_api_traces", sdk_data["buffer_records"])
node_exists("marker_api_traces", sdk_data["buffer_records"], 0)
node_exists("retired_correlation_ids", sdk_data["buffer_records"])
node_exists("page_migration", sdk_data["buffer_records"])
def test_timestamps(input_data):
data = input_data
sdk_data = data["rocprofiler-sdk-json-tool"]
cb_start = {}
cb_end = {}
for titr in test_api_traces:
for itr in sdk_data["callback_records"][titr]:
cid = itr["correlation_id"]["internal"]
phase = itr["phase"]
if phase == 1:
cb_start[cid] = itr["timestamp"]
elif phase == 2:
cb_end[cid] = itr["timestamp"]
assert cb_start[cid] <= itr["timestamp"]
else:
assert phase == 1 or phase == 2
for itr in sdk_data["buffer_records"][titr]:
assert itr["start_timestamp"] <= itr["end_timestamp"]
for titr in ["kernel_dispatch", "memory_copies"]:
for itr in sdk_data["buffer_records"][titr]:
assert itr["start_timestamp"] < itr["end_timestamp"]
assert itr["correlation_id"]["internal"] > 0
assert itr["correlation_id"]["external"] > 0
assert sdk_data["metadata"]["init_time"] < itr["start_timestamp"]
assert sdk_data["metadata"]["init_time"] < itr["end_timestamp"]
assert sdk_data["metadata"]["fini_time"] > itr["start_timestamp"]
assert sdk_data["metadata"]["fini_time"] > itr["end_timestamp"]
# api_start = cb_start[itr["correlation_id"]["internal"]]
# api_end = cb_end[itr["correlation_id"]["internal"]]
# assert api_start < itr["start_timestamp"]
# assert api_end <= itr["end_timestamp"]
def test_internal_correlation_ids(input_data):
data = input_data
sdk_data = data["rocprofiler-sdk-json-tool"]
api_corr_ids = []
for titr in test_api_traces:
for itr in sdk_data["callback_records"][titr]:
api_corr_ids.append(itr["correlation_id"]["internal"])
for itr in sdk_data["buffer_records"][titr]:
api_corr_ids.append(itr["correlation_id"]["internal"])
api_corr_ids_sorted = sorted(api_corr_ids)
api_corr_ids_unique = list(set(api_corr_ids))
for itr in sdk_data["buffer_records"]["kernel_dispatch"]:
assert itr["correlation_id"]["internal"] in api_corr_ids_unique
for itr in sdk_data["buffer_records"]["memory_copies"]:
assert itr["correlation_id"]["internal"] in api_corr_ids_unique
len_corr_id_unq = len(api_corr_ids_unique)
assert len(api_corr_ids) != len_corr_id_unq
assert max(api_corr_ids_sorted) == len_corr_id_unq
def test_external_correlation_ids(input_data):
data = input_data
sdk_data = data["rocprofiler-sdk-json-tool"]
extern_corr_ids = []
for titr in test_api_traces:
for itr in sdk_data["callback_records"][titr]:
assert itr["correlation_id"]["external"] > 0
assert itr["thread_id"] == itr["correlation_id"]["external"]
extern_corr_ids.append(itr["correlation_id"]["external"])
extern_corr_ids = list(set(sorted(extern_corr_ids)))
for titr in test_api_traces:
for itr in sdk_data["buffer_records"][titr]:
assert itr["correlation_id"]["external"] > 0
assert itr["thread_id"] == itr["correlation_id"]["external"]
assert itr["thread_id"] in extern_corr_ids
assert itr["correlation_id"]["external"] in extern_corr_ids
for itr in sdk_data["buffer_records"]["kernel_dispatch"]:
assert itr["correlation_id"]["external"] > 0
assert itr["correlation_id"]["external"] in extern_corr_ids
for itr in sdk_data["buffer_records"]["memory_copies"]:
assert itr["correlation_id"]["external"] > 0
assert itr["correlation_id"]["external"] in extern_corr_ids
def test_kernel_ids(input_data):
data = input_data
sdk_data = data["rocprofiler-sdk-json-tool"]
symbol_info = {}
for itr in sdk_data["callback_records"]["kernel_symbols"]:
phase = itr["phase"]
payload = itr["payload"]
kern_id = payload["kernel_id"]
assert phase == 1 or phase == 2
assert kern_id > 0
if phase == 1:
assert len(payload["kernel_name"]) > 0
symbol_info[kern_id] = payload
elif phase == 2:
assert payload["kernel_id"] in symbol_info.keys()
assert payload["kernel_name"] == symbol_info[kern_id]["kernel_name"]
for itr in sdk_data["buffer_records"]["kernel_dispatch"]:
assert itr["dispatch_info"]["kernel_id"] in symbol_info.keys()
for itr in sdk_data["callback_records"]["kernel_dispatch"]:
assert itr["payload"]["dispatch_info"]["kernel_id"] in symbol_info.keys()
def get_allocated_pages(callback_records):
# Get how many pages we allocated
op_idx = get_operation(callback_records, "HIP_RUNTIME_API", "hipHostRegister")
rt_idx, rt_data = get_operation(callback_records, "HIP_RUNTIME_API")
assert op_idx is not None, f"{rt_idx}:\n{rt_data}"
host_register_record = []
for itr in callback_records["hip_api_traces"]:
if itr["kind"] == rt_idx and itr["operation"] == op_idx and itr["phase"] == 2:
assert "sizeBytes" in itr["args"].keys(), f"{itr}"
assert "hostPtr" in itr["args"].keys(), f"{itr}"
host_register_record.append(itr)
num_host_register_calls = len(host_register_record)
assert num_host_register_calls == 5, "Expected 5 hipHostRegister calls in test"
ret = []
for i in range(num_host_register_calls):
alloc_size = int(host_register_record[0]["args"]["sizeBytes"], 10)
start_addr = int(host_register_record[0]["args"]["hostPtr"], 16)
end_addr = start_addr + alloc_size
ret.append((start_addr, end_addr))
return ret
def validate_node(id, nodes):
assert id.handle in nodes
def test_page_migration_data(input_data):
data = input_data
sdk_data = data["rocprofiler-sdk-json-tool"]
buffer_records = sdk_data.buffer_records
callback_records = sdk_data.callback_records
page_migtation_buffers = buffer_records.page_migration
_, bf_op_names = get_operation(buffer_records, "PAGE_MIGRATION")
assert bf_op_names[0] == "PAGE_MIGRATION_NONE"
for op_name in bf_op_names:
assert "PAGE_MIGRATION" in op_name
assert len(bf_op_names) == 9
nodes = set(x.id.handle for x in sdk_data.agents)
allocations = get_allocated_pages(callback_records)
for start_addr, end_addr in allocations:
assert (
start_addr < end_addr
), "Expected start address less than end address for mmap range"
alloc_size = end_addr - start_addr
assert int(alloc_size) == 512 * 4096 # We allocated 512 pages in the test
# PID must be same
assert len(set(r.pid for r in page_migtation_buffers)) == 1
for r in page_migtation_buffers:
op = r.operation
assert r.size == 160
assert op != 0 and bf_op_names[op] != "PAGE_MIGRATION_NONE"
assert bf_op_names[op].lower().replace("page_migration_", "") in r.keys()
if "page_fault_start" in r:
arg = r.page_fault_start
assert arg.read_fault < 2
validate_node(arg.agent_id, nodes)
assert arg.address > 0
if "page_fault_end" in r:
arg = r.page_fault_end
assert arg.migrated < 2
validate_node(arg.agent_id, nodes)
assert arg.address > 0
if "page_migrate_start" in r:
arg = r.page_migrate_start
assert (
0 < arg.start_addr < arg.end_addr
), "Expected start addr to be less than end addr"
if arg.start_addr == start_addr:
assert arg.end_addr == end_addr
validate_node(arg.from_agent, nodes)
validate_node(arg.to_agent, nodes)
validate_node(arg.prefetch_agent, nodes)
validate_node(arg.preferred_agent, nodes)
assert 0 <= arg.trigger < 4
if "page_migrate_end" in r:
arg = r.page_migrate_end
assert (
0 < arg.start_addr < arg.end_addr
), "Expected start addr to be less than end addr"
if arg.start_addr == start_addr:
assert arg.end_addr == end_addr
validate_node(arg.from_agent, nodes)
validate_node(arg.to_agent, nodes)
assert 0 <= arg.trigger < 4
if "queue_eviction" in r:
arg = r.queue_eviction
validate_node(arg.agent_id, nodes)
assert 0 <= arg.trigger < 6
if "queue_restore" in r:
arg = r.queue_restore
assert arg.rescheduled < 2
validate_node(arg.agent_id, nodes)
if "unmap_from_gpu" in r:
arg = r.unmap_from_gpu
assert (
0 < arg.start_addr < arg.end_addr
), "Expected start addr to be less than end addr"
if arg.start_addr == start_addr:
assert arg.end_addr == end_addr
validate_node(arg.agent_id, nodes)
assert 0 <= arg.trigger < 3
if "dropped_event" in r:
arg = r.dropped_event
# We shouldn't get any dropped events. If we do, our test needs to be redesigned.
assert arg.dropped_events_count == 0
if __name__ == "__main__":
exit_code = pytest.main(["-x", __file__] + sys.argv[1:])
sys.exit(exit_code)
@@ -903,7 +903,6 @@ auto memory_copy_bf_records = std::deque<rocprofiler_buffer_tracing_memory_c
auto memory_allocation_bf_records =
std::deque<rocprofiler_buffer_tracing_memory_allocation_record_t>{};
auto scratch_memory_records = std::deque<rocprofiler_buffer_tracing_scratch_memory_record_t>{};
auto page_migration_records = std::deque<rocprofiler_buffer_tracing_page_migration_record_t>{};
auto corr_id_retire_records =
std::deque<rocprofiler_buffer_tracing_correlation_id_retirement_record_t>{};
auto rccl_api_bf_records = std::deque<rocprofiler_buffer_tracing_rccl_api_record_t>{};
@@ -912,6 +911,18 @@ auto rocdecode_api_ext_bf_records =
std::deque<rocprofiler_buffer_tracing_rocdecode_api_ext_record_t>{};
auto rocjpeg_api_bf_records = std::deque<rocprofiler_buffer_tracing_rocjpeg_api_record_t>{};
auto ompt_bf_records = std::deque<rocprofiler_buffer_tracing_ompt_record_t>{};
auto kfd_page_migrate_event_records =
std::deque<rocprofiler_buffer_tracing_kfd_event_page_migrate_record_t>{};
auto kfd_page_fault_event_records =
std::deque<rocprofiler_buffer_tracing_kfd_event_page_fault_record_t>{};
auto kfd_queue_event_records = std::deque<rocprofiler_buffer_tracing_kfd_event_queue_record_t>{};
auto kfd_unmap_from_gpu_event_records =
std::deque<rocprofiler_buffer_tracing_kfd_event_unmap_from_gpu_record_t>{};
auto kfd_dropped_events_event_records =
std::deque<rocprofiler_buffer_tracing_kfd_event_dropped_events_record_t>{};
auto kfd_page_migrate_records = std::deque<rocprofiler_buffer_tracing_kfd_page_migrate_record_t>{};
auto kfd_page_fault_records = std::deque<rocprofiler_buffer_tracing_kfd_page_fault_record_t>{};
auto kfd_queue_records = std::deque<rocprofiler_buffer_tracing_kfd_queue_record_t>{};
void
tool_tracing_buffered(rocprofiler_context_id_t /*context*/,
@@ -1002,13 +1013,6 @@ tool_tracing_buffered(rocprofiler_context_id_t /*context*/,
scratch_memory_records.emplace_back(*record);
}
else if(header->kind == ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION)
{
auto* record = static_cast<rocprofiler_buffer_tracing_page_migration_record_t*>(
header->payload);
page_migration_records.emplace_back(*record);
}
else if(header->kind == ROCPROFILER_BUFFER_TRACING_CORRELATION_ID_RETIREMENT)
{
auto* record =
@@ -1060,6 +1064,66 @@ tool_tracing_buffered(rocprofiler_context_id_t /*context*/,
rocjpeg_api_bf_records.emplace_back(*record);
}
else if(header->kind == ROCPROFILER_BUFFER_TRACING_KFD_EVENT_PAGE_MIGRATE)
{
auto* record =
static_cast<rocprofiler_buffer_tracing_kfd_event_page_migrate_record_t*>(
header->payload);
kfd_page_migrate_event_records.emplace_back(*record);
}
else if(header->kind == ROCPROFILER_BUFFER_TRACING_KFD_EVENT_PAGE_FAULT)
{
auto* record =
static_cast<rocprofiler_buffer_tracing_kfd_event_page_fault_record_t*>(
header->payload);
kfd_page_fault_event_records.emplace_back(*record);
}
else if(header->kind == ROCPROFILER_BUFFER_TRACING_KFD_EVENT_QUEUE)
{
auto* record = static_cast<rocprofiler_buffer_tracing_kfd_event_queue_record_t*>(
header->payload);
kfd_queue_event_records.emplace_back(*record);
}
else if(header->kind == ROCPROFILER_BUFFER_TRACING_KFD_EVENT_UNMAP_FROM_GPU)
{
auto* record =
static_cast<rocprofiler_buffer_tracing_kfd_event_unmap_from_gpu_record_t*>(
header->payload);
kfd_unmap_from_gpu_event_records.emplace_back(*record);
}
else if(header->kind == ROCPROFILER_BUFFER_TRACING_KFD_EVENT_DROPPED_EVENTS)
{
auto* record =
static_cast<rocprofiler_buffer_tracing_kfd_event_dropped_events_record_t*>(
header->payload);
kfd_dropped_events_event_records.emplace_back(*record);
}
else if(header->kind == ROCPROFILER_BUFFER_TRACING_KFD_PAGE_MIGRATE)
{
auto* record = static_cast<rocprofiler_buffer_tracing_kfd_page_migrate_record_t*>(
header->payload);
kfd_page_migrate_records.emplace_back(*record);
}
else if(header->kind == ROCPROFILER_BUFFER_TRACING_KFD_PAGE_FAULT)
{
auto* record = static_cast<rocprofiler_buffer_tracing_kfd_page_fault_record_t*>(
header->payload);
kfd_page_fault_records.emplace_back(*record);
}
else if(header->kind == ROCPROFILER_BUFFER_TRACING_KFD_QUEUE)
{
auto* record =
static_cast<rocprofiler_buffer_tracing_kfd_queue_record_t*>(header->payload);
kfd_queue_records.emplace_back(*record);
}
else
{
throw std::runtime_error{
@@ -1155,7 +1219,6 @@ rocprofiler_context_id_t scratch_memory_ctx = {0};
rocprofiler_context_id_t corr_id_retire_ctx = {0};
rocprofiler_context_id_t kernel_dispatch_callback_ctx = {0};
rocprofiler_context_id_t kernel_dispatch_buffered_ctx = {0};
rocprofiler_context_id_t page_migration_ctx = {0};
rocprofiler_context_id_t runtime_init_callback_ctx = {};
rocprofiler_context_id_t runtime_init_buffered_ctx = {};
rocprofiler_context_id_t rocdecode_api_callback_ctx = {0};
@@ -1163,24 +1226,39 @@ rocprofiler_context_id_t rocdecode_api_buffered_ctx = {0};
rocprofiler_context_id_t rocdecode_api_ext_buffered_ctx = {0};
rocprofiler_context_id_t rocjpeg_api_callback_ctx = {0};
rocprofiler_context_id_t rocjpeg_api_buffered_ctx = {0};
rocprofiler_context_id_t page_migrate_event_ctx = {0};
rocprofiler_context_id_t kfd_page_fault_event_ctx = {0};
rocprofiler_context_id_t kfd_queue_event_ctx = {0};
rocprofiler_context_id_t kfd_unmap_from_gpu_event_ctx = {0};
rocprofiler_context_id_t kfd_droped_events_event_ctx = {0};
rocprofiler_context_id_t kfd_page_migrate_records_ctx = {0};
rocprofiler_context_id_t kfd_page_fault_records_ctx = {0};
rocprofiler_context_id_t kfd_queue_records_ctx = {0};
// buffers
rocprofiler_buffer_id_t runtime_init_buffered_buffer = {};
rocprofiler_buffer_id_t hsa_api_buffered_buffer = {};
rocprofiler_buffer_id_t hip_api_buffered_buffer = {};
rocprofiler_buffer_id_t marker_api_buffered_buffer = {};
rocprofiler_buffer_id_t kernel_dispatch_buffer = {};
rocprofiler_buffer_id_t memory_copy_buffer = {};
rocprofiler_buffer_id_t memory_allocation_buffer = {};
rocprofiler_buffer_id_t page_migration_buffer = {};
rocprofiler_buffer_id_t counter_collection_buffer = {};
rocprofiler_buffer_id_t scratch_memory_buffer = {};
rocprofiler_buffer_id_t corr_id_retire_buffer = {};
rocprofiler_buffer_id_t rccl_api_buffered_buffer = {};
rocprofiler_buffer_id_t rocdecode_api_buffer = {};
rocprofiler_buffer_id_t rocdecode_api_ext_buffer = {};
rocprofiler_buffer_id_t rocjpeg_api_buffer = {};
rocprofiler_buffer_id_t ompt_buffered_buffer = {};
rocprofiler_buffer_id_t runtime_init_buffered_buffer = {};
rocprofiler_buffer_id_t hsa_api_buffered_buffer = {};
rocprofiler_buffer_id_t hip_api_buffered_buffer = {};
rocprofiler_buffer_id_t marker_api_buffered_buffer = {};
rocprofiler_buffer_id_t kernel_dispatch_buffer = {};
rocprofiler_buffer_id_t memory_copy_buffer = {};
rocprofiler_buffer_id_t memory_allocation_buffer = {};
rocprofiler_buffer_id_t counter_collection_buffer = {};
rocprofiler_buffer_id_t scratch_memory_buffer = {};
rocprofiler_buffer_id_t corr_id_retire_buffer = {};
rocprofiler_buffer_id_t rccl_api_buffered_buffer = {};
rocprofiler_buffer_id_t rocdecode_api_buffer = {};
rocprofiler_buffer_id_t rocdecode_api_ext_buffer = {};
rocprofiler_buffer_id_t rocjpeg_api_buffer = {};
rocprofiler_buffer_id_t ompt_buffered_buffer = {};
rocprofiler_buffer_id_t page_migrate_event_buffer = {};
rocprofiler_buffer_id_t kfd_page_fault_event_buffer = {};
rocprofiler_buffer_id_t kfd_queue_event_buffer = {};
rocprofiler_buffer_id_t kfd_unmap_from_gpu_event_buffer = {};
rocprofiler_buffer_id_t kfd_droped_events_event_buffer = {};
rocprofiler_buffer_id_t kfd_page_migrate_records_buffer = {};
rocprofiler_buffer_id_t kfd_page_fault_records_buffer = {};
rocprofiler_buffer_id_t kfd_queue_records_buffer = {};
auto contexts = std::unordered_map<std::string_view, rocprofiler_context_id_t*>{
{"RUNTIME_INIT_CALLBACK", &runtime_init_callback_ctx},
@@ -1200,7 +1278,6 @@ auto contexts = std::unordered_map<std::string_view, rocprofiler_context_id_t*>{
{"KERNEL_DISPATCH_BUFFERED", &kernel_dispatch_buffered_ctx},
{"MEMORY_COPY_BUFFERED", &memory_copy_buffered_ctx},
{"MEMORY_ALLOCATION_BUFFERED", &memory_allocation_buffered_ctx},
{"PAGE_MIGRATION", &page_migration_ctx},
{"COUNTER_COLLECTION", &counter_collection_ctx},
{"SCRATCH_MEMORY", &scratch_memory_ctx},
{"CORRELATION_ID_RETIREMENT", &corr_id_retire_ctx},
@@ -1211,9 +1288,17 @@ auto contexts = std::unordered_map<std::string_view, rocprofiler_context_id_t*>{
{"ROCJPEG_API_CALLBACK", &rocjpeg_api_callback_ctx},
{"ROCJPEG_API_BUFFERED", &rocjpeg_api_buffered_ctx},
{"OMPT_BUFFERED", &ompt_buffered_ctx},
{"KFD_EVENT_PAGE_MIGRATE", &page_migrate_event_ctx},
{"KFD_EVENT_PAGE_FAULT", &kfd_page_fault_event_ctx},
{"KFD_EVENT_QUEUE", &kfd_queue_event_ctx},
{"KFD_EVENT_UNMAP_FROM_GPU", &kfd_unmap_from_gpu_event_ctx},
{"KFD_DROPPED_EVENTS", &kfd_droped_events_event_ctx},
{"KFD_PAGE_MIGRATE", &kfd_page_migrate_records_ctx},
{"KFD_PAGE_FAULT", &kfd_page_fault_records_ctx},
{"KFD_QUEUE", &kfd_queue_records_ctx},
};
auto buffers = std::array<rocprofiler_buffer_id_t*, 16>{&runtime_init_buffered_buffer,
auto buffers = std::array<rocprofiler_buffer_id_t*, 22>{&runtime_init_buffered_buffer,
&hsa_api_buffered_buffer,
&hip_api_buffered_buffer,
&marker_api_buffered_buffer,
@@ -1221,22 +1306,28 @@ auto buffers = std::array<rocprofiler_buffer_id_t*, 16>{&runtime_init_buffered_b
&memory_copy_buffer,
&memory_allocation_buffer,
&scratch_memory_buffer,
&page_migration_buffer,
&counter_collection_buffer,
&corr_id_retire_buffer,
&rccl_api_buffered_buffer,
&ompt_buffered_buffer,
&rocdecode_api_buffer,
&rocdecode_api_ext_buffer,
&rocjpeg_api_buffer};
&rocjpeg_api_buffer,
&kfd_page_fault_event_buffer,
&kfd_queue_event_buffer,
&kfd_unmap_from_gpu_event_buffer,
&kfd_droped_events_event_buffer,
&kfd_page_migrate_records_buffer,
&kfd_page_fault_records_buffer,
&kfd_queue_records_buffer};
auto agents = std::vector<rocprofiler_agent_t>{};
auto agents_map = std::unordered_map<rocprofiler_agent_id_t, rocprofiler_agent_t>{};
rocprofiler_timestamp_t init_time = 0;
rocprofiler_timestamp_t fini_time = 0;
rocprofiler_thread_id_t main_tid = 0;
auto page_migration_status = ROCPROFILER_STATUS_SUCCESS;
rocprofiler_timestamp_t init_time = 0;
rocprofiler_timestamp_t fini_time = 0;
rocprofiler_thread_id_t main_tid = 0;
auto kfd_configure_status = ROCPROFILER_STATUS_SUCCESS;
int
tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
@@ -1505,15 +1596,6 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
&scratch_memory_buffer),
"buffer creation");
ROCPROFILER_CALL(rocprofiler_create_buffer(page_migration_ctx,
buffer_size,
watermark,
ROCPROFILER_BUFFER_POLICY_LOSSLESS,
tool_tracing_buffered,
tool_data,
&page_migration_buffer),
"buffer creation");
ROCPROFILER_CALL(rocprofiler_create_buffer(corr_id_retire_ctx,
buffer_size,
watermark,
@@ -1576,6 +1658,78 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
&ompt_buffered_buffer),
"buffer creation");
ROCPROFILER_CALL(rocprofiler_create_buffer(page_migrate_event_ctx,
buffer_size,
watermark,
ROCPROFILER_BUFFER_POLICY_LOSSLESS,
tool_tracing_buffered,
tool_data,
&page_migrate_event_buffer),
"buffer creation");
ROCPROFILER_CALL(rocprofiler_create_buffer(kfd_page_fault_event_ctx,
buffer_size,
watermark,
ROCPROFILER_BUFFER_POLICY_LOSSLESS,
tool_tracing_buffered,
tool_data,
&kfd_page_fault_event_buffer),
"buffer creation");
ROCPROFILER_CALL(rocprofiler_create_buffer(kfd_queue_event_ctx,
buffer_size,
watermark,
ROCPROFILER_BUFFER_POLICY_LOSSLESS,
tool_tracing_buffered,
tool_data,
&kfd_queue_event_buffer),
"buffer creation");
ROCPROFILER_CALL(rocprofiler_create_buffer(kfd_unmap_from_gpu_event_ctx,
buffer_size,
watermark,
ROCPROFILER_BUFFER_POLICY_LOSSLESS,
tool_tracing_buffered,
tool_data,
&kfd_unmap_from_gpu_event_buffer),
"buffer creation");
ROCPROFILER_CALL(rocprofiler_create_buffer(kfd_droped_events_event_ctx,
buffer_size,
watermark,
ROCPROFILER_BUFFER_POLICY_LOSSLESS,
tool_tracing_buffered,
tool_data,
&kfd_droped_events_event_buffer),
"buffer creation");
ROCPROFILER_CALL(rocprofiler_create_buffer(kfd_page_migrate_records_ctx,
buffer_size,
watermark,
ROCPROFILER_BUFFER_POLICY_LOSSLESS,
tool_tracing_buffered,
tool_data,
&kfd_page_migrate_records_buffer),
"buffer creation");
ROCPROFILER_CALL(rocprofiler_create_buffer(kfd_page_fault_records_ctx,
buffer_size,
watermark,
ROCPROFILER_BUFFER_POLICY_LOSSLESS,
tool_tracing_buffered,
tool_data,
&kfd_page_fault_records_buffer),
"buffer creation");
ROCPROFILER_CALL(rocprofiler_create_buffer(kfd_queue_records_ctx,
buffer_size,
watermark,
ROCPROFILER_BUFFER_POLICY_LOSSLESS,
tool_tracing_buffered,
tool_data,
&kfd_queue_records_buffer),
"buffer creation");
ROCPROFILER_CALL(rocprofiler_configure_buffer_tracing_service(
runtime_init_buffered_ctx,
ROCPROFILER_BUFFER_TRACING_RUNTIME_INITIALIZATION,
@@ -1667,20 +1821,139 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
"buffer tracing service for scratch memory configure");
{
page_migration_status =
rocprofiler_configure_buffer_tracing_service(page_migration_ctx,
ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION,
nullptr,
0,
page_migration_buffer);
kfd_configure_status = rocprofiler_configure_buffer_tracing_service(
page_migrate_event_ctx,
ROCPROFILER_BUFFER_TRACING_KFD_EVENT_PAGE_MIGRATE,
nullptr,
0,
page_migrate_event_buffer);
constexpr auto message = "buffer tracing service for page migration configure";
if(page_migration_status == ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_KERNEL)
if(kfd_configure_status == ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_KERNEL)
std::cerr << message
<< " failed: " << rocprofiler_get_status_string(page_migration_status)
<< " failed: " << rocprofiler_get_status_string(kfd_configure_status)
<< std::endl;
else
ROCPROFILER_CALL(page_migration_status, message);
ROCPROFILER_CALL(kfd_configure_status, message);
}
{
kfd_configure_status = rocprofiler_configure_buffer_tracing_service(
kfd_page_fault_event_ctx,
ROCPROFILER_BUFFER_TRACING_KFD_EVENT_PAGE_FAULT,
nullptr,
0,
kfd_page_fault_event_buffer);
constexpr auto message = "buffer tracing service for page migration configure";
if(kfd_configure_status == ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_KERNEL)
std::cerr << message
<< " failed: " << rocprofiler_get_status_string(kfd_configure_status)
<< std::endl;
else
ROCPROFILER_CALL(kfd_configure_status, message);
}
{
kfd_configure_status =
rocprofiler_configure_buffer_tracing_service(kfd_queue_event_ctx,
ROCPROFILER_BUFFER_TRACING_KFD_EVENT_QUEUE,
nullptr,
0,
kfd_queue_event_buffer);
constexpr auto message = "buffer tracing service for page migration configure";
if(kfd_configure_status == ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_KERNEL)
std::cerr << message
<< " failed: " << rocprofiler_get_status_string(kfd_configure_status)
<< std::endl;
else
ROCPROFILER_CALL(kfd_configure_status, message);
}
{
kfd_configure_status = rocprofiler_configure_buffer_tracing_service(
kfd_unmap_from_gpu_event_ctx,
ROCPROFILER_BUFFER_TRACING_KFD_EVENT_UNMAP_FROM_GPU,
nullptr,
0,
kfd_unmap_from_gpu_event_buffer);
constexpr auto message = "buffer tracing service for page migration configure";
if(kfd_configure_status == ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_KERNEL)
std::cerr << message
<< " failed: " << rocprofiler_get_status_string(kfd_configure_status)
<< std::endl;
else
ROCPROFILER_CALL(kfd_configure_status, message);
}
{
kfd_configure_status = rocprofiler_configure_buffer_tracing_service(
kfd_droped_events_event_ctx,
ROCPROFILER_BUFFER_TRACING_KFD_EVENT_DROPPED_EVENTS,
nullptr,
0,
kfd_droped_events_event_buffer);
constexpr auto message = "buffer tracing service for page migration configure";
if(kfd_configure_status == ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_KERNEL)
std::cerr << message
<< " failed: " << rocprofiler_get_status_string(kfd_configure_status)
<< std::endl;
else
ROCPROFILER_CALL(kfd_configure_status, message);
}
{
kfd_configure_status = rocprofiler_configure_buffer_tracing_service(
kfd_page_migrate_records_ctx,
ROCPROFILER_BUFFER_TRACING_KFD_PAGE_MIGRATE,
nullptr,
0,
kfd_page_migrate_records_buffer);
constexpr auto message = "buffer tracing service for page migration configure";
if(kfd_configure_status == ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_KERNEL)
std::cerr << message
<< " failed: " << rocprofiler_get_status_string(kfd_configure_status)
<< std::endl;
else
ROCPROFILER_CALL(kfd_configure_status, message);
}
{
kfd_configure_status =
rocprofiler_configure_buffer_tracing_service(kfd_page_fault_records_ctx,
ROCPROFILER_BUFFER_TRACING_KFD_PAGE_FAULT,
nullptr,
0,
kfd_page_fault_records_buffer);
constexpr auto message = "buffer tracing service for page migration configure";
if(kfd_configure_status == ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_KERNEL)
std::cerr << message
<< " failed: " << rocprofiler_get_status_string(kfd_configure_status)
<< std::endl;
else
ROCPROFILER_CALL(kfd_configure_status, message);
}
{
kfd_configure_status =
rocprofiler_configure_buffer_tracing_service(kfd_queue_records_ctx,
ROCPROFILER_BUFFER_TRACING_KFD_QUEUE,
nullptr,
0,
kfd_queue_records_buffer);
constexpr auto message = "buffer tracing service for page migration configure";
if(kfd_configure_status == ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_KERNEL)
std::cerr << message
<< " failed: " << rocprofiler_get_status_string(kfd_configure_status)
<< std::endl;
else
ROCPROFILER_CALL(kfd_configure_status, message);
}
ROCPROFILER_CALL(rocprofiler_configure_buffer_tracing_service(
@@ -1883,7 +2156,14 @@ tool_fini(void* tool_data)
<< ", memory_copy_bf_records=" << memory_copy_bf_records.size()
<< ", memory_allocation_bf_records=" << memory_allocation_bf_records.size()
<< ", scratch_memory_records=" << scratch_memory_records.size()
<< ", page_migration=" << page_migration_records.size()
<< ", kfd_page_migrate_event=" << kfd_page_migrate_event_records.size()
<< ", kfd_page_fault_event=" << kfd_page_fault_event_records.size()
<< ", kfd_queue_event=" << kfd_queue_event_records.size()
<< ", kfd_unmap_from_gpu_event=" << kfd_unmap_from_gpu_event_records.size()
<< ", kfd_droped_events_event=" << kfd_dropped_events_event_records.size()
<< ", kfd_page_migrate_record=" << kfd_page_migrate_records.size()
<< ", kfd_page_fault_record=" << kfd_page_fault_records.size()
<< ", kfd_queue_record=" << kfd_queue_records.size()
<< ", runtime_init_bf_records=" << runtime_init_bf_records.size()
<< ", hsa_api_bf_records=" << hsa_api_bf_records.size()
<< ", hip_api_bf_records=" << hip_api_bf_records.size()
@@ -1955,8 +2235,8 @@ write_json(call_stack_t* _call_stack)
auto json_ar = JSONOutputArchive{*ofs, json_opts};
auto buffer_names = sdk::get_buffer_tracing_names();
auto callbk_names = sdk::get_callback_tracing_names();
auto validate_page_migration =
(page_migration_status != ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_KERNEL);
auto validate_kfd_events =
(kfd_configure_status != ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_KERNEL);
json_ar.setNextName("rocprofiler-sdk-json-tool");
json_ar.startNode();
@@ -1967,7 +2247,7 @@ write_json(call_stack_t* _call_stack)
json_ar(cereal::make_nvp("main_tid", main_tid));
json_ar(cereal::make_nvp("init_time", init_time));
json_ar(cereal::make_nvp("fini_time", fini_time));
json_ar(cereal::make_nvp("validate_page_migration", validate_page_migration));
json_ar(cereal::make_nvp("validate_kfd_events", validate_kfd_events));
json_ar.finishNode();
json_ar(cereal::make_nvp("agents", agents));
@@ -2012,7 +2292,15 @@ write_json(call_stack_t* _call_stack)
json_ar(cereal::make_nvp("memory_copies", memory_copy_bf_records));
json_ar(cereal::make_nvp("memory_allocations", memory_allocation_bf_records));
json_ar(cereal::make_nvp("scratch_memory_traces", scratch_memory_records));
json_ar(cereal::make_nvp("page_migration", page_migration_records));
json_ar(cereal::make_nvp("kfd_page_migrate_events", kfd_page_migrate_event_records));
json_ar(cereal::make_nvp("kfd_page_fault_events", kfd_page_fault_event_records));
json_ar(cereal::make_nvp("kfd_queue_events", kfd_queue_event_records));
json_ar(
cereal::make_nvp("kfd_unmap_from_gpu_events", kfd_unmap_from_gpu_event_records));
json_ar(cereal::make_nvp("kfd_droped_events", kfd_dropped_events_event_records));
json_ar(cereal::make_nvp("kfd_page_migrate_records", kfd_page_migrate_records));
json_ar(cereal::make_nvp("kfd_page_fault_records", kfd_page_fault_records));
json_ar(cereal::make_nvp("kfd_queue_records", kfd_queue_records));
json_ar(cereal::make_nvp("hsa_api_traces", hsa_api_bf_records));
json_ar(cereal::make_nvp("hip_api_traces", hip_api_bf_records));
json_ar(cereal::make_nvp("marker_api_traces", marker_api_bf_records));