Report page migration events as start/end (#793)

* Squashed commit of the following:

commit b76f2635f4b65599f03812a73d0cf410f5ada213
Author: Mythreya <mythreya.kuricheti@amd.com>
Date:   Fri Apr 26 00:29:09 2024 +0000

    Changed for PR feedback

commit bedb8ad566ff42fbf117b19202c26c507abcf8ac
Author: Jonathan R. Madsen <jonathanrmadsen@gmail.com>
Date:   Thu Apr 25 19:20:06 2024 -0500

    Fix installation

commit a98f8a69459a1450a1be9c98e20b3c1e7f2568c2
Author: Jonathan R. Madsen <jonathanrmadsen@gmail.com>
Date:   Thu Apr 25 19:16:35 2024 -0500

    Restructure the headers

commit 46489a020ffafdd5f4ce3f580469ff233ef67fe1
Author: Mythreya <mythreya.kuricheti@amd.com>
Date:   Tue Apr 23 23:31:10 2024 +0000

    Update hsa include

commit 8e795282cce348fc6aa736b7857b21aeb32aa20a
Author: Mythreya <mythreya.kuricheti@amd.com>
Date:   Tue Apr 23 23:02:32 2024 +0000

    Report page migration events as start/end

    * Updated tests accordingly
    * Page migration events are reported independently

commit 8784e5ad4895a626a2a8e4ac12f8021b34172bd4
Author: Mythreya <mythreya.kuricheti@amd.com>
Date:   Tue Apr 16 17:01:57 2024 +0000

    Update handling of dropped page migration events

    Previously, we dropped all locally buffered events when we detect that
    KFD has dropped some events. This may drop too many pending events too eagerly.

    When we receive an end event and cannot find the corresponding start,
    we can be sure that KFD has dropped some events in the immediate past.

    When this happens, we look through all locally buffered events and report
    the start events that are older than 10s as partial events --- they have
    no "end" information (we expect that the end events have been dropped).

    We also set the polling timeout to 10s to prevent the local buffer from
    getting too large with events waiting to be paired up.

    Updated tests

commit 2e8e0b07eeda9b5990e1ae8d28dcd3a035ce38e1
Author: Mythreya <mythreya.kuricheti@amd.com>
Date:   Tue Apr 16 17:01:31 2024 +0000

    Docs for triggers

* Fix page migration sample

* Fix hasher, kfd install

* Add hsa include
* Install KFD include dir

* Updates from code review

- single timestamp field
- node_id -> agent_id
- from_node -> from_agent
- to_node -> to_agent

* Misc revisions

* Remove page-migration install target

* Update page-migration pytest

* Tweak to serialization

* Address PR comments

* Update page-migration test

* Add cli args, update iterations

* Address PR comments

* Add abi.cpp for static_asserts
* Update page_migration gtest with only runtime tests
* Moved helpers into utils.hpp

---------

Co-authored-by: Jonathan R. Madsen <jonathanrmadsen@gmail.com>

[ROCm/rocprofiler-sdk commit: 363f85dc72]
Este commit está contenido en:
Mythreya
2024-11-11 09:08:47 -08:00
cometido por GitHub
padre 46cb82b75f
commit 36d357337d
Se han modificado 27 ficheros con 1161 adiciones y 1098 borrados
@@ -86,6 +86,22 @@ rocprofiler_buffer_id_t client_buffer = {};
buffer_name_info client_name_info = {};
kernel_symbol_map_t client_kernels = {};
template <typename Tp>
std::string
as_hex(Tp _v, size_t _width = 16)
{
uintptr_t _vp = 0;
if constexpr(std::is_pointer<Tp>::value)
_vp = reinterpret_cast<uintptr_t>(_v);
else
_vp = _v;
auto _ss = std::stringstream{};
_ss.fill('0');
_ss << "0x" << std::hex << std::setw(_width) << _vp;
return _ss.str();
}
void
print_call_stack(const call_stack_t& _call_stack)
{
@@ -290,43 +306,71 @@ tool_tracing_callback(rocprofiler_context_id_t context,
auto info = std::stringstream{};
info << "kind=" << record->kind << ", operation=" << record->operation
<< ", pid=" << record->pid << ", start=" << record->start_timestamp
<< ", stop=" << record->end_timestamp
<< ", name=" << client_name_info.at(record->kind, record->operation);
<< ", pid=" << record->pid << ", timestamp=" << record->timestamp
<< ", name=" << client_name_info.at(record->kind, record->operation)
<< std::boolalpha;
switch(record->operation)
{
case ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE:
case ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_START:
{
info << ", page_fault=(" << record->page_fault.read_fault << ", "
<< record->page_fault.migrated << ", " << record->page_fault.node_id
<< ", " << std::hex << "0x" << record->page_fault.address << ")";
const auto& arg = record->args;
info << ", page_migrate_start=(" << as_hex(arg.page_migrate_start.start_addr)
<< ", " << as_hex(arg.page_migrate_start.end_addr) << ", "
<< arg.page_migrate_start.from_agent.handle << ", "
<< arg.page_migrate_start.to_agent.handle << ", "
<< arg.page_migrate_start.prefetch_agent.handle << ", "
<< arg.page_migrate_start.preferred_agent.handle << ", "
<< arg.page_migrate_start.trigger << ")";
break;
}
case ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT:
case ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_END:
{
info << ", page_migrate=(" << std::hex << "0x"
<< record->page_migrate.start_addr << ", 0x"
<< record->page_migrate.end_addr << ", " << std::dec
<< record->page_migrate.from_node << ", " << record->page_migrate.to_node
<< ", " << record->page_migrate.prefetch_node << ", "
<< record->page_migrate.preferred_node << ", "
<< record->page_migrate.trigger << ")";
const auto& arg = record->args;
info << ", page_migrate_end=(" << as_hex(arg.page_migrate_end.start_addr)
<< ", " << as_hex(arg.page_migrate_end.end_addr) << ", "
<< arg.page_migrate_end.from_agent.handle << ", "
<< arg.page_migrate_end.to_agent.handle << ", "
<< arg.page_migrate_end.trigger << ")";
break;
}
case ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND:
case ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT_START:
{
info << ", queue_suspend=(" << record->queue_suspend.rescheduled << ", "
<< record->queue_suspend.node_id << ", " << record->queue_suspend.trigger
<< ")";
const auto& arg = record->args;
info << ", page_fault_start=(" << arg.page_fault_start.read_fault << ", "
<< arg.page_fault_start.agent_id.handle << ", "
<< as_hex(arg.page_fault_start.address) << ")";
break;
}
case ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT_END:
{
const auto& arg = record->args;
info << ", page_fault_end=(" << arg.page_fault_end.migrated << ", "
<< arg.page_fault_end.agent_id.handle << ", "
<< as_hex(arg.page_fault_end.address) << ")";
break;
}
case ROCPROFILER_PAGE_MIGRATION_QUEUE_EVICTION:
{
const auto& arg = record->args;
info << ", queue_eviction=(" << arg.queue_eviction.agent_id.handle << ", "
<< arg.queue_eviction.trigger << ")";
break;
}
case ROCPROFILER_PAGE_MIGRATION_QUEUE_RESTORE:
{
const auto& arg = record->args;
info << ", queue_restore=(" << arg.queue_restore.rescheduled << ", "
<< arg.queue_restore.agent_id.handle << ")";
break;
}
case ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU:
{
info << ", unmap_from_gpu=(" << record->unmap_from_gpu.node_id << std::hex
<< ", 0x" << record->unmap_from_gpu.start_addr << ", 0x"
<< record->unmap_from_gpu.end_addr << ", " << std::dec
<< record->unmap_from_gpu.trigger << ")";
const auto& arg = record->args;
info << ", unmap_from_gpu=(" << as_hex(arg.unmap_from_gpu.start_addr) << ", "
<< as_hex(arg.unmap_from_gpu.end_addr) << ", "
<< arg.unmap_from_gpu.agent_id.handle << ", " << arg.unmap_from_gpu.trigger
<< ")";
break;
}
case ROCPROFILER_PAGE_MIGRATION_NONE:
@@ -337,8 +381,7 @@ tool_tracing_callback(rocprofiler_context_id_t context,
}
}
if(record->start_timestamp > record->end_timestamp)
throw std::runtime_error("page migration: start > end");
if(record->timestamp == 0) throw std::runtime_error("page migration: timestamp == 0");
static_cast<call_stack_t*>(user_data)->emplace_back(
source_location{__FUNCTION__, __FILE__, __LINE__, kind_name + info.str()});
@@ -46,4 +46,5 @@ add_subdirectory(marker)
add_subdirectory(openmp)
add_subdirectory(rccl)
add_subdirectory(cxx)
add_subdirectory(kfd)
add_subdirectory(amd_detail)
@@ -25,6 +25,7 @@
#include <rocprofiler-sdk/agent.h>
#include <rocprofiler-sdk/defines.h>
#include <rocprofiler-sdk/fwd.h>
#include <rocprofiler-sdk/kfd/page_migration_args.h>
#include <stdint.h>
@@ -37,49 +38,6 @@ ROCPROFILER_EXTERN_C_INIT
* @{
*/
/**
* @brief Page migration triggers
*
*/
typedef enum
{
ROCPROFILER_PAGE_MIGRATION_TRIGGER_NONE = -1,
ROCPROFILER_PAGE_MIGRATION_TRIGGER_PREFETCH,
ROCPROFILER_PAGE_MIGRATION_TRIGGER_PAGEFAULT_GPU,
ROCPROFILER_PAGE_MIGRATION_TRIGGER_PAGEFAULT_CPU,
ROCPROFILER_PAGE_MIGRATION_TRIGGER_TTM_EVICTION,
ROCPROFILER_PAGE_MIGRATION_TRIGGER_LAST,
} rocprofiler_page_migration_trigger_t;
/**
* @brief Page migration triggers causing the queue to suspend
*
*/
typedef enum
{
ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_NONE = -1,
ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_SVM,
ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_USERPTR,
ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_TTM,
ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_SUSPEND,
ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_CRIU_CHECKPOINT,
ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_CRIU_RESTORE,
ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_LAST,
} rocprofiler_page_migration_queue_suspend_trigger_t;
/**
* @brief Page migration triggers causing an unmap from the GPU
*
*/
typedef enum
{
ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_NONE = -1,
ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_MMU_NOTIFY,
ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_MMU_NOTIFY_MIGRATE,
ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_UNMAP_FROM_CPU,
ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_LAST,
} rocprofiler_page_migration_unmap_from_gpu_trigger_t;
/**
* @brief ROCProfiler Buffer HSA API Tracer Record.
*/
@@ -265,63 +223,17 @@ typedef struct rocprofiler_buffer_tracing_kernel_dispatch_record_t
/// dispatch
} rocprofiler_buffer_tracing_kernel_dispatch_record_t;
typedef struct
{
uint8_t read_fault : 1; ///< Is the fault due to a read or a write
uint8_t migrated : 1;
uint32_t node_id; ///< GPU or CPU node ID which reports a page fault
uint64_t address; ///< Address access that caused the page fault
} rocprofiler_buffer_tracing_page_migration_page_fault_record_t;
typedef struct
{
uint64_t start_addr; ///< Start address of the page being migrated
uint64_t end_addr; ///< End address of the page being migrated
uint32_t from_node; ///< Source node
uint32_t to_node; ///< Destination node
uint32_t prefetch_node; ///< Node from which page was prefetched
uint32_t preferred_node; ///< Preferred destinaion node
rocprofiler_page_migration_trigger_t trigger; ///< Cause of migration
} rocprofiler_buffer_tracing_page_migration_page_migrate_record_t;
typedef struct
{
uint8_t rescheduled : 1;
uint32_t node_id; ///< GPU node from which the queue was suspended
rocprofiler_page_migration_queue_suspend_trigger_t trigger; ///< Cause of queue suspension
} rocprofiler_buffer_tracing_page_migration_queue_suspend_record_t;
typedef struct
{
uint32_t node_id; ///< Node ID from which page was unmapped
uint64_t start_addr; ///< Start address of unmapped page
uint64_t end_addr; ///< End address of unmapped page
rocprofiler_page_migration_unmap_from_gpu_trigger_t trigger; ///< Cause of unmap
} rocprofiler_buffer_tracing_page_migration_unmap_from_gpu_record_t;
/**
* @brief ROCProfiler Buffer Page Migration Tracer Record
*/
typedef struct rocprofiler_buffer_tracing_page_migration_record_t
{
uint64_t size; ///< size of this struct
rocprofiler_buffer_tracing_kind_t kind; ///< ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION
rocprofiler_tracing_operation_t operation;
rocprofiler_timestamp_t start_timestamp; ///< start time in nanoseconds
rocprofiler_timestamp_t end_timestamp; ///< end time in nanoseconds
uint32_t pid;
union
{
rocprofiler_buffer_tracing_page_migration_page_fault_record_t page_fault;
rocprofiler_buffer_tracing_page_migration_page_migrate_record_t page_migrate;
rocprofiler_buffer_tracing_page_migration_queue_suspend_record_t queue_suspend;
rocprofiler_buffer_tracing_page_migration_unmap_from_gpu_record_t unmap_from_gpu;
struct
{
uint64_t reserved[12];
};
};
uint64_t size; ///< size of this struct
rocprofiler_buffer_tracing_kind_t kind; ///< ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION
rocprofiler_page_migration_operation_t operation;
rocprofiler_timestamp_t timestamp; ///< start time in nanoseconds
uint32_t pid;
rocprofiler_page_migration_args_t args;
} rocprofiler_buffer_tracing_page_migration_record_t;
/**
@@ -24,6 +24,7 @@
#pragma once
#include <rocprofiler-sdk/fwd.h>
#include <rocprofiler-sdk/hsa.h>
#include <rocprofiler-sdk/internal_threading.h>
namespace rocprofiler
@@ -59,6 +59,7 @@
#include <string>
#include <string_view>
#include <utility>
#include <vector>
#define ROCP_SDK_SAVE_DATA_FIELD(FIELD) ar(make_nvp(#FIELD, data.FIELD))
@@ -460,6 +461,122 @@ save(ArchiveT& ar, rocprofiler_buffer_tracing_memory_copy_record_t data)
ROCP_SDK_SAVE_DATA_FIELD(bytes);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, const rocprofiler_page_migration_page_fault_start_t& data)
{
ROCP_SDK_SAVE_DATA_BITFIELD("read_fault", read_fault);
ROCP_SDK_SAVE_DATA_FIELD(agent_id);
ROCP_SDK_SAVE_DATA_FIELD(address);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, const rocprofiler_page_migration_page_fault_end_t& data)
{
ROCP_SDK_SAVE_DATA_BITFIELD("migrated", migrated);
ROCP_SDK_SAVE_DATA_FIELD(agent_id);
ROCP_SDK_SAVE_DATA_FIELD(address);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, const rocprofiler_page_migration_page_migrate_start_t& data)
{
ROCP_SDK_SAVE_DATA_FIELD(start_addr);
ROCP_SDK_SAVE_DATA_FIELD(end_addr);
ROCP_SDK_SAVE_DATA_FIELD(from_agent);
ROCP_SDK_SAVE_DATA_FIELD(to_agent);
ROCP_SDK_SAVE_DATA_FIELD(prefetch_agent);
ROCP_SDK_SAVE_DATA_FIELD(preferred_agent);
ROCP_SDK_SAVE_DATA_FIELD(trigger);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, const rocprofiler_page_migration_page_migrate_end_t& data)
{
ROCP_SDK_SAVE_DATA_FIELD(start_addr);
ROCP_SDK_SAVE_DATA_FIELD(end_addr);
ROCP_SDK_SAVE_DATA_FIELD(from_agent);
ROCP_SDK_SAVE_DATA_FIELD(to_agent);
ROCP_SDK_SAVE_DATA_FIELD(trigger);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, const rocprofiler_page_migration_queue_eviction_t& data)
{
ROCP_SDK_SAVE_DATA_FIELD(agent_id);
ROCP_SDK_SAVE_DATA_FIELD(trigger);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, const rocprofiler_page_migration_queue_restore_t& data)
{
ROCP_SDK_SAVE_DATA_BITFIELD("rescheduled", rescheduled);
ROCP_SDK_SAVE_DATA_FIELD(agent_id);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, const rocprofiler_page_migration_unmap_from_gpu_t& data)
{
ROCP_SDK_SAVE_DATA_FIELD(start_addr);
ROCP_SDK_SAVE_DATA_FIELD(end_addr);
ROCP_SDK_SAVE_DATA_FIELD(agent_id);
ROCP_SDK_SAVE_DATA_FIELD(trigger);
}
namespace details
{
template <size_t Idx>
struct save_page_migration_arg;
#define ROCP_SDK_SPECIALIZE_PAGE_MIGRATION_ARG(ENUM_VALUE, UNION_ARG) \
template <> \
struct save_page_migration_arg<ROCPROFILER_PAGE_MIGRATION_##ENUM_VALUE> \
{ \
static constexpr auto value = ROCPROFILER_PAGE_MIGRATION_##ENUM_VALUE; \
template <typename ArchiveT> \
void operator()(ArchiveT& ar, rocprofiler_page_migration_args_t args) \
{ \
ar(make_nvp(#UNION_ARG, args.UNION_ARG)); \
} \
};
ROCP_SDK_SPECIALIZE_PAGE_MIGRATION_ARG(NONE, none)
ROCP_SDK_SPECIALIZE_PAGE_MIGRATION_ARG(PAGE_MIGRATE_START, page_migrate_start)
ROCP_SDK_SPECIALIZE_PAGE_MIGRATION_ARG(PAGE_MIGRATE_END, page_migrate_end)
ROCP_SDK_SPECIALIZE_PAGE_MIGRATION_ARG(PAGE_FAULT_START, page_fault_start)
ROCP_SDK_SPECIALIZE_PAGE_MIGRATION_ARG(PAGE_FAULT_END, page_fault_end)
ROCP_SDK_SPECIALIZE_PAGE_MIGRATION_ARG(QUEUE_EVICTION, queue_eviction)
ROCP_SDK_SPECIALIZE_PAGE_MIGRATION_ARG(QUEUE_RESTORE, queue_restore)
ROCP_SDK_SPECIALIZE_PAGE_MIGRATION_ARG(UNMAP_FROM_GPU, unmap_from_gpu)
#undef ROCP_SDK_SPECIALIZE_PAGE_MIGRATION_ARG
template <typename ArchiveT, size_t Idx, size_t... IdxTail>
void
save_page_migration_args(ArchiveT& ar,
rocprofiler_page_migration_operation_t op,
rocprofiler_page_migration_args_t args,
std::index_sequence<Idx, IdxTail...>)
{
using save_page_migration_type = save_page_migration_arg<Idx>;
if(op == save_page_migration_type::value)
{
if constexpr(save_page_migration_type::value != ROCPROFILER_PAGE_MIGRATION_NONE)
save_page_migration_type{}(ar, args);
}
else if constexpr(sizeof...(IdxTail) > 0)
{
save_page_migration_args(ar, op, args, std::index_sequence<IdxTail...>{});
}
}
} // namespace details
template <typename ArchiveT>
void
save(ArchiveT& ar, const rocprofiler_buffer_tracing_page_migration_record_t& data)
@@ -467,81 +584,10 @@ save(ArchiveT& ar, const rocprofiler_buffer_tracing_page_migration_record_t& dat
ROCP_SDK_SAVE_DATA_FIELD(size);
ROCP_SDK_SAVE_DATA_FIELD(kind);
ROCP_SDK_SAVE_DATA_FIELD(operation);
ROCP_SDK_SAVE_DATA_FIELD(start_timestamp);
ROCP_SDK_SAVE_DATA_FIELD(end_timestamp);
ROCP_SDK_SAVE_DATA_FIELD(timestamp);
ROCP_SDK_SAVE_DATA_FIELD(pid);
switch(data.operation)
{
case ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT:
{
ar(make_nvp("page_fault", data.page_fault));
break;
}
case ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE:
{
ar(make_nvp("page_migrate", data.page_migrate));
break;
}
case ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND:
{
ar(make_nvp("queue_suspend", data.queue_suspend));
break;
}
case ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU:
{
ar(make_nvp("unmap_from_gpu", data.unmap_from_gpu));
break;
}
case ROCPROFILER_PAGE_MIGRATION_NONE:
case ROCPROFILER_PAGE_MIGRATION_LAST:
{
throw std::runtime_error{"unsupported page migration operation type"};
break;
}
}
}
template <typename ArchiveT>
void
save(ArchiveT& ar, const rocprofiler_buffer_tracing_page_migration_page_fault_record_t& data)
{
ROCP_SDK_SAVE_DATA_FIELD(node_id);
ROCP_SDK_SAVE_DATA_FIELD(address);
ROCP_SDK_SAVE_DATA_FIELD(read_fault);
ROCP_SDK_SAVE_DATA_FIELD(migrated);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, const rocprofiler_buffer_tracing_page_migration_page_migrate_record_t& data)
{
ROCP_SDK_SAVE_DATA_FIELD(start_addr);
ROCP_SDK_SAVE_DATA_FIELD(end_addr);
ROCP_SDK_SAVE_DATA_FIELD(from_node);
ROCP_SDK_SAVE_DATA_FIELD(to_node);
ROCP_SDK_SAVE_DATA_FIELD(prefetch_node);
ROCP_SDK_SAVE_DATA_FIELD(preferred_node);
ROCP_SDK_SAVE_DATA_FIELD(trigger);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, const rocprofiler_buffer_tracing_page_migration_queue_suspend_record_t& data)
{
ROCP_SDK_SAVE_DATA_FIELD(node_id);
ROCP_SDK_SAVE_DATA_FIELD(trigger);
ROCP_SDK_SAVE_DATA_FIELD(rescheduled);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, const rocprofiler_buffer_tracing_page_migration_unmap_from_gpu_record_t& data)
{
ROCP_SDK_SAVE_DATA_FIELD(node_id);
ROCP_SDK_SAVE_DATA_FIELD(start_addr);
ROCP_SDK_SAVE_DATA_FIELD(end_addr);
ROCP_SDK_SAVE_DATA_FIELD(trigger);
details::save_page_migration_args(
ar, data.operation, data.args, std::make_index_sequence<ROCPROFILER_PAGE_MIGRATION_LAST>{});
}
template <typename ArchiveT>
@@ -24,8 +24,6 @@
#include <rocprofiler-sdk/defines.h>
#include <hsa/hsa_amd_tool.h>
#include <stddef.h>
#include <stdint.h>
@@ -226,21 +224,6 @@ typedef enum // NOLINT(performance-enum-size)
ROCPROFILER_MEMORY_COPY_LAST,
} rocprofiler_memory_copy_operation_t;
/**
* @brief Page migration event.
*/
typedef enum // NOLINT(performance-enum-size)
{
ROCPROFILER_PAGE_MIGRATION_NONE = 0, ///< Unknown event
ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE,
ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT,
ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND,
ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU,
// Any and all events, from all processes. Requires superuser
// ROCPROFILER_PAGE_MIGRATION_ANY_ALL_PROCESSES,
ROCPROFILER_PAGE_MIGRATION_LAST,
} rocprofiler_page_migration_operation_t;
/**
* @brief ROCProfiler Kernel Dispatch Tracing Operation Types.
*/
@@ -321,6 +304,22 @@ typedef enum // NOLINT(performance-enum-size)
ROCPROFILER_BUFFER_POLICY_LAST,
} rocprofiler_buffer_policy_t;
/**
* @brief Page migration event.
*/
typedef enum // NOLINT(performance-enum-size)
{
ROCPROFILER_PAGE_MIGRATION_NONE = 0, ///< Unknown event
ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_START,
ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_END,
ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT_START,
ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT_END,
ROCPROFILER_PAGE_MIGRATION_QUEUE_EVICTION,
ROCPROFILER_PAGE_MIGRATION_QUEUE_RESTORE,
ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU,
ROCPROFILER_PAGE_MIGRATION_LAST,
} rocprofiler_page_migration_operation_t;
/**
* @brief Scratch event kind
*/
@@ -333,19 +332,6 @@ typedef enum
ROCPROFILER_SCRATCH_MEMORY_LAST,
} rocprofiler_scratch_memory_operation_t;
/**
* @brief Allocation flags for @see rocprofiler_buffer_tracing_scratch_memory_record_t
*/
typedef enum
{
ROCPROFILER_SCRATCH_ALLOC_FLAG_NONE = 0,
ROCPROFILER_SCRATCH_ALLOC_FLAG_USE_ONCE =
HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_USE_ONCE, ///< This scratch allocation is only valid for 1
///< dispatch.
ROCPROFILER_SCRATCH_ALLOC_FLAG_ALT =
HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_ALT, ///< Used alternate scratch instead of main scratch
} rocprofiler_scratch_alloc_flag_t;
/**
* @brief Enumeration for specifying runtime libraries supported by rocprofiler. This enumeration is
* used for thread creation callbacks. @see INTERNAL_THREADING.
@@ -39,6 +39,7 @@
#include <rocprofiler-sdk/hsa/api_args.h>
#include <rocprofiler-sdk/hsa/api_id.h>
#include <rocprofiler-sdk/hsa/scratch_memory_args.h>
#include <rocprofiler-sdk/hsa/scratch_memory_id.h>
#include <rocprofiler-sdk/hsa/table_id.h>
#if defined(ROCPROFILER_DEFINED_AMD_INTERNAL_BUILD) && ROCPROFILER_DEFINED_AMD_INTERNAL_BUILD > 0
@@ -12,6 +12,7 @@ set(ROCPROFILER_HSA_HEADER_FILES
finalize_ext_api_id.h
image_ext_api_id.h
scratch_memory_args.h
scratch_memory_id.h
table_id.h)
install(
@@ -0,0 +1,39 @@
// MIT License
//
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#pragma once
#include <hsa/hsa_amd_tool.h>
/**
* @brief Allocation flags for @see rocprofiler_buffer_tracing_scratch_memory_record_t
*/
// NOLINTNEXTLINE(performance-enum-size)
typedef enum
{
ROCPROFILER_SCRATCH_ALLOC_FLAG_NONE = 0,
ROCPROFILER_SCRATCH_ALLOC_FLAG_USE_ONCE =
HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_USE_ONCE, ///< This scratch allocation is only valid for 1
///< dispatch.
ROCPROFILER_SCRATCH_ALLOC_FLAG_ALT =
HSA_AMD_EVENT_SCRATCH_ALLOC_FLAG_ALT, ///< Used alternate scratch instead of main scratch
} rocprofiler_scratch_alloc_flag_t;
@@ -0,0 +1,11 @@
#
#
# Installation of public KFD headers
#
#
set(ROCPROFILER_KFD_HEADER_FILES page_migration_args.h page_migration_id.h)
install(
FILES ${ROCPROFILER_KFD_HEADER_FILES}
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/rocprofiler-sdk/kfd
COMPONENT development)
@@ -0,0 +1,104 @@
// MIT License
//
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#pragma once
#include <rocprofiler-sdk/defines.h>
#include <rocprofiler-sdk/kfd/page_migration_id.h>
#include <stdint.h>
ROCPROFILER_EXTERN_C_INIT
typedef struct rocprofiler_page_migration_none_t
{
char empty;
} rocprofiler_page_migration_none_t;
typedef struct rocprofiler_page_migration_page_migrate_start_t
{
uint64_t start_addr;
uint64_t end_addr;
rocprofiler_agent_id_t from_agent;
rocprofiler_agent_id_t to_agent;
rocprofiler_agent_id_t prefetch_agent;
rocprofiler_agent_id_t preferred_agent;
rocprofiler_page_migration_trigger_t trigger;
} rocprofiler_page_migration_page_migrate_start_t;
typedef struct rocprofiler_page_migration_page_migrate_end_t
{
uint64_t start_addr;
uint64_t end_addr;
rocprofiler_agent_id_t from_agent;
rocprofiler_agent_id_t to_agent;
rocprofiler_page_migration_trigger_t trigger;
} rocprofiler_page_migration_page_migrate_end_t;
typedef struct rocprofiler_page_migration_page_fault_start_t
{
uint32_t read_fault : 1;
rocprofiler_agent_id_t agent_id;
uint64_t address;
} rocprofiler_page_migration_page_fault_start_t;
typedef struct rocprofiler_page_migration_page_fault_end_t
{
uint32_t migrated : 1;
rocprofiler_agent_id_t agent_id;
uint64_t address;
} rocprofiler_page_migration_page_fault_end_t;
typedef struct rocprofiler_page_migration_queue_eviction_t
{
rocprofiler_agent_id_t agent_id;
rocprofiler_page_migration_queue_suspend_trigger_t trigger;
} rocprofiler_page_migration_queue_eviction_t;
typedef struct rocprofiler_page_migration_queue_restore_t
{
uint32_t rescheduled : 1;
rocprofiler_agent_id_t agent_id;
} rocprofiler_page_migration_queue_restore_t;
typedef struct rocprofiler_page_migration_unmap_from_gpu_t
{
uint64_t start_addr;
uint64_t end_addr;
rocprofiler_agent_id_t agent_id;
rocprofiler_page_migration_unmap_from_gpu_trigger_t trigger;
} rocprofiler_page_migration_unmap_from_gpu_t;
typedef union
{
rocprofiler_page_migration_none_t none;
rocprofiler_page_migration_page_migrate_start_t page_migrate_start;
rocprofiler_page_migration_page_migrate_end_t page_migrate_end;
rocprofiler_page_migration_page_fault_start_t page_fault_start;
rocprofiler_page_migration_page_fault_end_t page_fault_end;
rocprofiler_page_migration_queue_eviction_t queue_eviction;
rocprofiler_page_migration_queue_restore_t queue_restore;
rocprofiler_page_migration_unmap_from_gpu_t unmap_from_gpu;
uint64_t reserved[16];
} rocprofiler_page_migration_args_t;
ROCPROFILER_EXTERN_C_FINI
@@ -0,0 +1,83 @@
// MIT License
//
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#pragma once
#include <rocprofiler-sdk/defines.h>
#include <rocprofiler-sdk/fwd.h>
#include <rocprofiler-sdk/hsa.h>
#include <rocprofiler-sdk/hsa/api_trace_version.h>
#include <rocprofiler-sdk/version.h>
#include <stdint.h>
ROCPROFILER_EXTERN_C_INIT
/**
* @brief Page migration triggers
*
*/
typedef enum
{
ROCPROFILER_PAGE_MIGRATION_TRIGGER_NONE = -1,
ROCPROFILER_PAGE_MIGRATION_TRIGGER_PREFETCH, ///< Migration triggered by a prefetch
ROCPROFILER_PAGE_MIGRATION_TRIGGER_PAGEFAULT_GPU, ///< Triggered by a page fault on the GPU
ROCPROFILER_PAGE_MIGRATION_TRIGGER_PAGEFAULT_CPU, ///< Triggered by a page fault on the CPU
ROCPROFILER_PAGE_MIGRATION_TRIGGER_TTM_EVICTION, ///< Page evicted by linux TTM (Translation
///< Table Manager)
ROCPROFILER_PAGE_MIGRATION_TRIGGER_LAST,
} rocprofiler_page_migration_trigger_t;
/**
* @brief Page migration triggers causing the queue to suspend
*
*/
typedef enum
{
ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_NONE = -1,
ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_SVM,
ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_USERPTR,
ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_TTM, ///< Queue suspended by TTM (Translation
///< Table Manager) operation
ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_SUSPEND,
ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_CRIU_CHECKPOINT, ///< Queues evicted due to
///< process save
///< (checkpoint) by CRIU
ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_CRIU_RESTORE, ///< Queues restored during
///< process restore by CRIU
ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_LAST,
} rocprofiler_page_migration_queue_suspend_trigger_t;
/**
* @brief Page migration triggers causing an unmap from the GPU
*
*/
typedef enum
{
ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_NONE = -1,
ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_MMU_NOTIFY,
ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_MMU_NOTIFY_MIGRATE,
ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_UNMAP_FROM_CPU,
ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_LAST,
} rocprofiler_page_migration_unmap_from_gpu_trigger_t;
ROCPROFILER_EXTERN_C_FINI
@@ -77,7 +77,7 @@ ROCPROFILER_EXTERN_C_FINI
#include "rocprofiler-sdk/hsa.h"
#include "rocprofiler-sdk/intercept_table.h"
#include "rocprofiler-sdk/internal_threading.h"
// #include "rocprofiler-sdk/marker.h"
#include "rocprofiler-sdk/marker.h"
#include "rocprofiler-sdk/pc_sampling.h"
#include "rocprofiler-sdk/profile_config.h"
// #include "rocprofiler-sdk/spm.h"
@@ -1,6 +1,6 @@
#
#
set(ROCPROFILER_LIB_UVM_SOURCES page_migration.cpp)
set(ROCPROFILER_LIB_UVM_SOURCES abi.cpp page_migration.cpp)
set(ROCPROFILER_LIB_UVM_HEADERS defines.hpp page_migration.hpp utils.hpp)
target_sources(rocprofiler-sdk-object-library PRIVATE ${ROCPROFILER_LIB_UVM_SOURCES}
@@ -0,0 +1,106 @@
// MIT License
//
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include <fmt/core.h>
#include "lib/common/container/small_vector.hpp"
#include "lib/common/defines.hpp"
#include "lib/common/mpl.hpp"
#include "lib/rocprofiler-sdk/details/kfd_ioctl.h"
#include "lib/rocprofiler-sdk/page_migration/utils.hpp"
#define ASSERT_SAME(A, B) static_assert(static_cast<size_t>(A) == static_cast<size_t>(B))
#define ROCPROFILER_LIB_ROCPROFILER_SDK_PAGE_MIGRATION_PAGE_MIGRATION_CPP_IMPL 1
#include "lib/rocprofiler-sdk/page_migration/page_migration.def.cpp"
#undef ROCPROFILER_LIB_ROCPROFILER_SDK_PAGE_MIGRATION_PAGE_MIGRATION_CPP_IMPL
namespace rocprofiler
{
namespace page_migration
{
using namespace rocprofiler::page_migration;
using namespace rocprofiler::common::container;
using rocprofiler_page_migration_seq_t = std::make_index_sequence<ROCPROFILER_PAGE_MIGRATION_LAST>;
static_assert(KFD_SMI_EVENT_NONE == 0);
static_assert(KFD_SMI_EVENT_MIGRATE_START == 5);
static_assert(KFD_SMI_EVENT_MIGRATE_END == 6);
static_assert(KFD_SMI_EVENT_PAGE_FAULT_START == 7);
static_assert(KFD_SMI_EVENT_PAGE_FAULT_END == 8);
static_assert(KFD_SMI_EVENT_QUEUE_EVICTION == 9);
static_assert(KFD_SMI_EVENT_QUEUE_RESTORE == 10);
static_assert(KFD_SMI_EVENT_UNMAP_FROM_GPU == 11);
static_assert(KFD_SMI_EVENT_ALL_PROCESS == 64);
// Update page_migration.def.cpp with event mappings
// Update page_migration.cpp to parse and report new event
static_assert(ROCPROFILER_PAGE_MIGRATION_LAST == 8,
"New event added, update KFD to ROCPROFILER mappings");
// clang-format off
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_TRIGGER_PAGEFAULT_GPU, KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU );
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_TRIGGER_PAGEFAULT_CPU, KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU );
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_TRIGGER_TTM_EVICTION, KFD_MIGRATE_TRIGGER_TTM_EVICTION );
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_SVM, KFD_QUEUE_EVICTION_TRIGGER_SVM );
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_USERPTR, KFD_QUEUE_EVICTION_TRIGGER_USERPTR );
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_TTM, KFD_QUEUE_EVICTION_TRIGGER_TTM );
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_SUSPEND, KFD_QUEUE_EVICTION_TRIGGER_SUSPEND );
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_CRIU_CHECKPOINT, KFD_QUEUE_EVICTION_CRIU_CHECKPOINT );
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_CRIU_RESTORE, KFD_QUEUE_EVICTION_CRIU_RESTORE );
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_MMU_NOTIFY, KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY );
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_MMU_NOTIFY_MIGRATE, KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY_MIGRATE);
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_UNMAP_FROM_CPU, KFD_SVM_UNMAP_TRIGGER_UNMAP_FROM_CPU );
// clang-format on
static_assert(kfd_bitmask(std::index_sequence<ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT_START,
ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_END,
ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU>()) ==
(KFD_SMI_EVENT_MASK_FROM_INDEX(KFD_SMI_EVENT_PAGE_FAULT_START) |
KFD_SMI_EVENT_MASK_FROM_INDEX(KFD_SMI_EVENT_MIGRATE_END) |
KFD_SMI_EVENT_MASK_FROM_INDEX(KFD_SMI_EVENT_UNMAP_FROM_GPU)));
static_assert((page_migration_info<ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_END>::kfd_bitmask |
page_migration_info<ROCPROFILER_PAGE_MIGRATION_QUEUE_EVICTION>::kfd_bitmask |
page_migration_info<ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU>::kfd_bitmask) ==
(KFD_SMI_EVENT_MASK_FROM_INDEX(KFD_SMI_EVENT_MIGRATE_END) |
KFD_SMI_EVENT_MASK_FROM_INDEX(KFD_SMI_EVENT_QUEUE_EVICTION) |
KFD_SMI_EVENT_MASK_FROM_INDEX(KFD_SMI_EVENT_UNMAP_FROM_GPU)));
static_assert(kfd_to_rocprof_op(KFD_SMI_EVENT_MIGRATE_START, rocprofiler_page_migration_seq_t{}) ==
ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_START);
static_assert(kfd_to_rocprof_op(KFD_SMI_EVENT_MIGRATE_END, rocprofiler_page_migration_seq_t{}) ==
ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_END);
static_assert(kfd_to_rocprof_op(KFD_SMI_EVENT_PAGE_FAULT_START,
rocprofiler_page_migration_seq_t{}) ==
ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT_START);
static_assert(kfd_to_rocprof_op(KFD_SMI_EVENT_PAGE_FAULT_END, rocprofiler_page_migration_seq_t{}) ==
ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT_END);
static_assert(kfd_to_rocprof_op(KFD_SMI_EVENT_QUEUE_EVICTION, rocprofiler_page_migration_seq_t{}) ==
ROCPROFILER_PAGE_MIGRATION_QUEUE_EVICTION);
static_assert(kfd_to_rocprof_op(KFD_SMI_EVENT_QUEUE_RESTORE, rocprofiler_page_migration_seq_t{}) ==
ROCPROFILER_PAGE_MIGRATION_QUEUE_RESTORE);
static_assert(kfd_to_rocprof_op(KFD_SMI_EVENT_UNMAP_FROM_GPU, rocprofiler_page_migration_seq_t{}) ==
ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU);
} // namespace page_migration
} // namespace rocprofiler
@@ -44,43 +44,17 @@
} \
} while(0)
#define APPEND_UVM_EVENT(X) ROCPROFILER_UVM_EVENT_##X
#define APPEND_1(X) APPEND_UVM_EVENT(X)
#define CONCAT(X, Y) X##Y
#define APPEND_2(A1, A2) APPEND_1(A1), APPEND_1(A2)
#define APPEND_3(A1, A2, A3) APPEND_2(A1, A2), APPEND_1(A3)
#define APPEND_4(A1, A2, A3, A4) APPEND_3(A1, A2, A3), APPEND_1(A4)
#define APPEND_5(A1, A2, A3, A4, A5) APPEND_4(A1, A2, A3, A4), APPEND_1(A5)
#define MACRO_N(MACRO, N, ...) CONCAT(MACRO, N)(__VA_ARGS__)
#define APPLY_N(MACRO, ...) MACRO_N(MACRO, IMPL_DETAIL_FOR_EACH_NARG(__VA_ARGS__), __VA_ARGS__)
#define GET_UVM_ENUMS(...) APPLY_N(APPEND_, __VA_ARGS__)
// static constexpr size_t uvm_event = UVM_ENUM;
#define SPECIALIZE_UVM_KFD_EVENT(UVM_ENUM, KFD_ENUM, FORMAT_STRING) \
template <> \
struct uvm_event_info<UVM_ENUM> \
{ \
static constexpr size_t kfd_event = KFD_ENUM; \
static constexpr std::string_view format_str{FORMAT_STRING}; \
#define SPECIALIZE_PAGE_MIGRATION_INFO(ROCPROF_NAME, KFD_NAME, FORMAT_STRING) \
template <> \
struct page_migration_info<ROCPROFILER_PAGE_MIGRATION_##ROCPROF_NAME> \
{ \
static constexpr auto name = "PAGE_MIGRATION_" #ROCPROF_NAME; \
static constexpr size_t operation = ROCPROFILER_PAGE_MIGRATION_##ROCPROF_NAME; \
static constexpr size_t kfd_operation = KFD_SMI_EVENT_##KFD_NAME; \
static constexpr size_t kfd_bitmask = bitmask(KFD_SMI_EVENT_##KFD_NAME); \
static constexpr std::string_view format_str = FORMAT_STRING; \
};
#define SPECIALIZE_PAGE_MIGRATION_INFO(TYPE, ...) \
template <> \
struct page_migration_info<ROCPROFILER_PAGE_MIGRATION_##TYPE> \
{ \
static constexpr auto operation_idx = ROCPROFILER_PAGE_MIGRATION_##TYPE; \
static constexpr auto name = "PAGE_MIGRATION_" #TYPE; \
static constexpr size_t uvm_bitmask = \
bitmask(std::index_sequence<GET_UVM_ENUMS(__VA_ARGS__)>()); \
static constexpr size_t kfd_bitmask = \
to_kfd_bitmask(std::index_sequence<GET_UVM_ENUMS(__VA_ARGS__)>()); \
}
#define COPY_FROM_START_1(MEMBER) end.MEMBER = start.MEMBER;
#define COPY_FROM_START_2(UNION_TYPE, MEMBER) end.UNION_TYPE.MEMBER = start.UNION_TYPE.MEMBER;
#define SPECIALIZE_KFD_IOC_IOCTL(STRUCT, ARG_IOC) \
template <> \
struct IOC_event<STRUCT> \
La diferencia del archivo ha sido suprimido porque es demasiado grande Cargar Diff
@@ -30,45 +30,30 @@ namespace rocprofiler
{
namespace page_migration
{
using namespace rocprofiler::common;
using kfd_event_id_t = decltype(KFD_SMI_EVENT_NONE);
using migrate_trigger_t = rocprofiler_page_migration_trigger_t;
using page_migration_record_t = rocprofiler_buffer_tracing_page_migration_record_t;
using queue_suspend_trigger_t = rocprofiler_page_migration_queue_suspend_trigger_t;
using unmap_from_gpu_trigger_t = rocprofiler_page_migration_unmap_from_gpu_trigger_t;
using trigger_type_list_t = common::mpl::type_list<rocprofiler_page_migration_trigger_t,
queue_suspend_trigger_t,
unmap_from_gpu_trigger_t>;
// clang-format off
// Map ROCPROF UVM enums to KFD enums
SPECIALIZE_UVM_KFD_EVENT(ROCPROFILER_UVM_EVENT_NONE, KFD_SMI_EVENT_NONE, "Error: Invalid UVM event from KFD" );
SPECIALIZE_UVM_KFD_EVENT(ROCPROFILER_UVM_EVENT_MIGRATE_START, KFD_SMI_EVENT_MIGRATE_START, "%x %ld -%d @%lx(%lx) %x->%x %x:%x %d\n" );
SPECIALIZE_UVM_KFD_EVENT(ROCPROFILER_UVM_EVENT_MIGRATE_END, KFD_SMI_EVENT_MIGRATE_END, "%x %ld -%d @%lx(%lx) %x->%x %d\n" );
SPECIALIZE_UVM_KFD_EVENT(ROCPROFILER_UVM_EVENT_PAGE_FAULT_START, KFD_SMI_EVENT_PAGE_FAULT_START, "%x %ld -%d @%lx(%x) %c\n" );
SPECIALIZE_UVM_KFD_EVENT(ROCPROFILER_UVM_EVENT_PAGE_FAULT_END, KFD_SMI_EVENT_PAGE_FAULT_END, "%x %ld -%d @%lx(%x) %c\n" );
SPECIALIZE_UVM_KFD_EVENT(ROCPROFILER_UVM_EVENT_QUEUE_EVICTION, KFD_SMI_EVENT_QUEUE_EVICTION, "%x %ld -%d %x %d\n" );
SPECIALIZE_UVM_KFD_EVENT(ROCPROFILER_UVM_EVENT_QUEUE_RESTORE, KFD_SMI_EVENT_QUEUE_RESTORE, "%x %ld -%d %x\n" );
SPECIALIZE_UVM_KFD_EVENT(ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU, KFD_SMI_EVENT_UNMAP_FROM_GPU, "%x %ld -%d @%lx(%lx) %x %d\n" );
SPECIALIZE_PAGE_MIGRATION_INFO(NONE, NONE, "Error: Invalid UVM event from KFD" );
SPECIALIZE_PAGE_MIGRATION_INFO(PAGE_MIGRATE_START, MIGRATE_START, "%x %ld -%d @%lx(%lx) %x->%x %x:%x %d\n");
SPECIALIZE_PAGE_MIGRATION_INFO(PAGE_MIGRATE_END, MIGRATE_END, "%x %ld -%d @%lx(%lx) %x->%x %d\n" );
SPECIALIZE_PAGE_MIGRATION_INFO(PAGE_FAULT_START, PAGE_FAULT_START, "%x %ld -%d @%lx(%x) %c\n" );
SPECIALIZE_PAGE_MIGRATION_INFO(PAGE_FAULT_END, PAGE_FAULT_END, "%x %ld -%d @%lx(%x) %c\n" );
SPECIALIZE_PAGE_MIGRATION_INFO(QUEUE_EVICTION, QUEUE_EVICTION, "%x %ld -%d %x %d\n" );
SPECIALIZE_PAGE_MIGRATION_INFO(QUEUE_RESTORE, QUEUE_RESTORE, "%x %ld -%d %x\n" );
SPECIALIZE_PAGE_MIGRATION_INFO(UNMAP_FROM_GPU, UNMAP_FROM_GPU, "%x %ld -%d @%lx(%lx) %x %d\n" );
#undef SPECIALIZE_PAGE_MIGRATION_INFO
// clang-format on
# undef SPECIALIZE_UVM_KFD_EVENT
SPECIALIZE_PAGE_MIGRATION_INFO(NONE, NONE);
SPECIALIZE_PAGE_MIGRATION_INFO(PAGE_MIGRATE, MIGRATE_START, MIGRATE_END);
SPECIALIZE_PAGE_MIGRATION_INFO(PAGE_FAULT, PAGE_FAULT_START, PAGE_FAULT_END);
SPECIALIZE_PAGE_MIGRATION_INFO(QUEUE_SUSPEND, QUEUE_EVICTION, QUEUE_RESTORE);
SPECIALIZE_PAGE_MIGRATION_INFO(UNMAP_FROM_GPU, UNMAP_FROM_GPU);
template <size_t UvmOpInx, size_t... OpInxs>
constexpr size_t to_rocprof_op_impl(std::index_sequence<OpInxs...>)
{
return ((((bitmask(UvmOpInx) & page_migration_info<OpInxs>::uvm_bitmask) != 0) * OpInxs) + ...);
}
template <size_t... OpInxs>
constexpr auto _to_rocprof_op_impl(std::index_sequence<OpInxs...>)
{
return std::array{
to_rocprof_op_impl<OpInxs>(std::make_index_sequence<ROCPROFILER_PAGE_MIGRATION_LAST>{})...};
}
constexpr auto
to_rocprof_op(size_t pos)
{
using rop = rocprofiler_page_migration_operation_t;
return static_cast<rop>(
_to_rocprof_op_impl(std::make_index_sequence<ROCPROFILER_UVM_EVENT_LAST>{})[pos]);
}
} // namespace page_migration
} // namespace rocprofiler
#endif
@@ -22,12 +22,15 @@
#pragma once
#include "lib/common/container/small_vector.hpp"
#include "lib/rocprofiler-sdk/details/kfd_ioctl.h"
#include <rocprofiler-sdk/buffer_tracing.h>
#include <rocprofiler-sdk/fwd.h>
#include <algorithm>
#include <cstdint>
#include <string_view>
#include <unordered_map>
#include <utility>
@@ -35,38 +38,32 @@ namespace rocprofiler
{
namespace page_migration
{
// serves as an overview of what events we capture and report
enum fault_type_t
{
NONE,
READ,
WRITE,
};
/* serves as an overview of what events we capture and report
struct uvm_event_page_fault_start_t
{
int kind;
uint64_t start_timestamp;
int pid;
int node_id;
uint64_t address;
fault_type_t fault;
};
struct uvm_event_page_fault_end_t
struct event_page_fault_start_t
{
int kind;
uint64_t end_timestamp;
uint64_t timestamp;
int pid;
int node_id;
uint64_t address;
fault_t fault;
};
struct event_page_fault_end_t
{
int kind;
uint64_t timestamp;
uint32_t pid;
int node_id;
uint64_t address;
bool migrated;
};
struct uvm_event_migrate_start_t
struct event_migrate_start_t
{
int kind;
uint64_t start_timestamp;
uint64_t timestamp;
uint32_t pid;
uint64_t start;
uint64_t end_offset;
@@ -77,10 +74,10 @@ struct uvm_event_migrate_start_t
uint32_t trigger;
};
struct uvm_event_migrate_end_t
struct event_migrate_end_t
{
int kind;
uint64_t end_timestamp;
uint64_t timestamp;
uint32_t pid;
uint64_t start;
uint64_t end_offset;
@@ -89,25 +86,25 @@ struct uvm_event_migrate_end_t
uint32_t trigger;
};
struct uvm_event_queue_eviction_t
struct event_queue_eviction_t
{
int kind;
uint64_t start_timestamp;
uint64_t timestamp;
uint32_t pid;
int node_id;
uint32_t trigger;
};
struct uvm_event_queue_restore_t
struct event_queue_restore_t
{
int kind;
uint64_t end_timestamp;
uint64_t timestamp;
uint32_t pid;
int node_id;
bool rescheduled;
};
struct uvm_event_unmap_from_gpu_t
struct event_unmap_from_gpu_t
{
int kind;
uint64_t timestamp;
@@ -117,13 +114,13 @@ struct uvm_event_unmap_from_gpu_t
int node_id;
uint32_t trigger;
};
template <size_t e>
struct uvm_event_info;
*/
template <size_t>
struct page_migration_info;
using namespace rocprofiler::common;
namespace kfd
{
template <typename T>
@@ -145,74 +142,56 @@ constexpr size_t bitmask(std::index_sequence<Args...>)
return (bitmask(Args) | ...);
}
enum uvm_event_id_t
template <size_t... Ints>
constexpr size_t kfd_bitmask(std::index_sequence<Ints...>)
{
ROCPROFILER_UVM_EVENT_NONE,
ROCPROFILER_UVM_EVENT_MIGRATE_START,
ROCPROFILER_UVM_EVENT_MIGRATE_END,
ROCPROFILER_UVM_EVENT_PAGE_FAULT_START,
ROCPROFILER_UVM_EVENT_PAGE_FAULT_END,
ROCPROFILER_UVM_EVENT_QUEUE_EVICTION,
ROCPROFILER_UVM_EVENT_QUEUE_RESTORE,
ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU,
ROCPROFILER_UVM_EVENT_LAST,
};
return (page_migration_info<Ints>::kfd_bitmask | ...);
}
static_assert(KFD_SMI_EVENT_NONE == 0);
static_assert(KFD_SMI_EVENT_MIGRATE_START == 5);
static_assert(KFD_SMI_EVENT_MIGRATE_END == 6);
static_assert(KFD_SMI_EVENT_PAGE_FAULT_START == 7);
static_assert(KFD_SMI_EVENT_PAGE_FAULT_END == 8);
static_assert(KFD_SMI_EVENT_QUEUE_EVICTION == 9);
static_assert(KFD_SMI_EVENT_QUEUE_RESTORE == 10);
static_assert(KFD_SMI_EVENT_UNMAP_FROM_GPU == 11);
static_assert(KFD_SMI_EVENT_ALL_PROCESS == 64);
template <size_t OpInx, size_t... OpInxs>
constexpr size_t
kfd_bitmask_impl(size_t rocprof_op, std::index_sequence<OpInx, OpInxs...>)
{
if(rocprof_op == OpInx) return page_migration_info<OpInx>::kfd_bitmask;
if constexpr(sizeof...(OpInxs) > 0)
return kfd_bitmask_impl(rocprof_op, std::index_sequence<OpInxs...>{});
else
return 0;
}
template <size_t... OpInxs>
constexpr auto
kfd_bitmask(const container::small_vector<size_t>& rocprof_event_ids,
std::index_sequence<OpInxs...>)
{
uint64_t m{};
for(const size_t& event_id : rocprof_event_ids)
{
m |= kfd_bitmask_impl(event_id, std::index_sequence<OpInxs...>{});
}
return m;
}
template <size_t OpInx, size_t... OpInxs>
constexpr size_t
kfd_to_rocprof_op(size_t kfd_id, std::index_sequence<OpInx, OpInxs...>)
{
if(kfd_id == page_migration_info<OpInx>::kfd_operation) return OpInx;
if constexpr(sizeof...(OpInxs) > 0)
return kfd_to_rocprof_op(kfd_id, std::index_sequence<OpInxs...>{});
else
return 0;
}
size_t
get_rocprof_op(const std::string_view event_data);
void
kfd_readlines(const std::string_view str, void(handler)(std::string_view));
using rocprof_buffer_op_t = rocprofiler_page_migration_operation_t;
using node_fd_t = int;
using event_map_t =
std::unordered_map<uint64_t, rocprofiler_buffer_tracing_page_migration_record_t>;
using events_cache_t = std::array<event_map_t, ROCPROFILER_PAGE_MIGRATION_LAST>;
template <size_t... Ints>
constexpr size_t to_kfd_bitmask(std::index_sequence<Ints...>)
{
return bitmask(std::index_sequence<uvm_event_info<Ints>::kfd_event...>());
}
template <rocprofiler_page_migration_operation_t... Ops>
constexpr size_t to_uvm_bitmask(std::index_sequence<Ops...>)
{
return bitmask(std::index_sequence<static_cast<uint32_t>(Ops)...>());
}
template <size_t RocprofOpIdx, size_t UvmOpIdx>
constexpr bool
is_rocprof_uvm_map()
{
return page_migration_info<RocprofOpIdx>::uvm_bitmask & bitmask(UvmOpIdx);
}
template <size_t RocprofOpIdx, size_t OpInx, size_t... OpInxs>
constexpr bool
_is_rocprof_uvm_map(size_t uvm_event, std::index_sequence<OpInx, OpInxs...>)
{
if(OpInx == uvm_event)
return is_rocprof_uvm_map<RocprofOpIdx, OpInx>();
else if constexpr(sizeof...(OpInxs) > 0)
return _is_rocprof_uvm_map<RocprofOpIdx>(uvm_event, std::index_sequence<OpInxs...>{});
else
return false;
}
template <size_t RocprofOpIdx>
constexpr bool
is_rocprof_uvm_map(size_t uvm_event)
{
return _is_rocprof_uvm_map<RocprofOpIdx>(
uvm_event, std::make_index_sequence<ROCPROFILER_UVM_EVENT_LAST>{});
}
} // namespace page_migration
} // namespace rocprofiler
@@ -11,8 +11,16 @@ include(GoogleTest)
#
# -------------------------------------------------------------------------------------- #
set(rocprofiler_lib_sources agent.cpp buffer.cpp contexts.cpp hsa.cpp naming.cpp
timestamp.cpp version.cpp hsa_barrier.cpp)
set(rocprofiler_lib_sources
agent.cpp
buffer.cpp
contexts.cpp
hsa.cpp
naming.cpp
timestamp.cpp
version.cpp
hsa_barrier.cpp
page_migration.cpp)
add_executable(rocprofiler-sdk-lib-tests)
target_sources(rocprofiler-sdk-lib-tests PRIVATE ${rocprofiler_lib_sources}
@@ -23,6 +31,7 @@ target_link_libraries(
rocprofiler-sdk::rocprofiler-sdk-common-library
rocprofiler-sdk::counter-test-constants
rocprofiler-sdk::rocprofiler-sdk-hsa-runtime
rocprofiler-sdk::rocprofiler-sdk-drm
GTest::gtest
GTest::gtest_main)
@@ -40,9 +49,8 @@ set_tests_properties(${lib_TESTS} PROPERTIES TIMEOUT 30 LABELS "unittests")
#
# -------------------------------------------------------------------------------------- #
set(rocprofiler_shared_lib_sources
external_correlation.cpp intercept_table.cpp page_migration.cpp registration.cpp
roctx.cpp status.cpp)
set(rocprofiler_shared_lib_sources external_correlation.cpp intercept_table.cpp
registration.cpp roctx.cpp status.cpp)
add_executable(rocprofiler-sdk-lib-tests-shared)
target_sources(rocprofiler-sdk-lib-tests-shared PRIVATE ${rocprofiler_shared_lib_sources})
@@ -20,25 +20,26 @@
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include "lib/common/container/small_vector.hpp"
#include "lib/common/defines.hpp"
#include "lib/rocprofiler-sdk/details/kfd_ioctl.h"
#include "lib/common/mpl.hpp"
#include "lib/rocprofiler-sdk/page_migration/utils.hpp"
#include <rocprofiler-sdk/buffer_tracing.h>
#include <rocprofiler-sdk/fwd.h>
#include <rocprofiler-sdk/rocprofiler.h>
#include <fmt/format.h>
#include <fmt/core.h>
#include <gtest/gtest.h>
#include <sstream>
#include <string_view>
#include <utility>
#define ROCPROFILER_LIB_ROCPROFILER_SDK_PAGE_MIGRATION_PAGE_MIGRATION_CPP_IMPL 1
#include "lib/rocprofiler-sdk/page_migration/page_migration.def.cpp"
#undef ROCPROFILER_LIB_ROCPROFILER_SDK_PAGE_MIGRATION_PAGE_MIGRATION_CPP_IMPL
#define ASSERT_SAME(A, B) static_assert(static_cast<size_t>(A) == static_cast<size_t>(B))
namespace
{
constexpr std::string_view MULTILINE_STRING = "This is 0 Line 0\n"
@@ -61,7 +62,7 @@ return_line(const std::string_view line)
auto
parse_lines()
{
KFD_EVENT_PARSE_EVENTS(MULTILINE_STRING, return_line);
rocprofiler::page_migration::kfd_readlines(MULTILINE_STRING, return_line);
}
TEST(page_migration, readlines)
@@ -70,107 +71,35 @@ TEST(page_migration, readlines)
parse_lines();
}
TEST(page_migration, parse_kvm_events)
{
// Ensure all lines are read
parse_lines();
}
TEST(page_migtation, rocprof_kfd_map)
{
using namespace ::rocprofiler::page_migration;
using namespace rocprofiler::page_migration;
using namespace rocprofiler::common::container;
using rocprofiler_page_migration_seq_t =
std::make_index_sequence<ROCPROFILER_PAGE_MIGRATION_LAST>;
const small_vector<size_t> vec{ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_END,
ROCPROFILER_PAGE_MIGRATION_QUEUE_EVICTION,
ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU};
EXPECT_EQ((page_migration_info<ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_END>::kfd_bitmask |
page_migration_info<ROCPROFILER_PAGE_MIGRATION_QUEUE_EVICTION>::kfd_bitmask |
page_migration_info<ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU>::kfd_bitmask),
kfd_bitmask(vec, rocprofiler_page_migration_seq_t{}));
const auto to_kfd_str = [](kfd_smi_event e) {
std::string str = fmt::format("{:x} ", static_cast<size_t>(e));
return rocprofiler::page_migration::get_rocprof_op({str});
};
// clang-format off
static_assert( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT, ROCPROFILER_UVM_EVENT_PAGE_FAULT_START >() );
static_assert( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT, ROCPROFILER_UVM_EVENT_PAGE_FAULT_END >() );
static_assert( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE, ROCPROFILER_UVM_EVENT_MIGRATE_START >() );
static_assert( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE, ROCPROFILER_UVM_EVENT_MIGRATE_END >() );
static_assert( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND, ROCPROFILER_UVM_EVENT_QUEUE_EVICTION >() );
static_assert( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND, ROCPROFILER_UVM_EVENT_QUEUE_RESTORE >() );
static_assert( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU, ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU >() );
EXPECT_TRUE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT >(ROCPROFILER_UVM_EVENT_PAGE_FAULT_START) );
EXPECT_TRUE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT >(ROCPROFILER_UVM_EVENT_PAGE_FAULT_END ) );
EXPECT_TRUE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE >(ROCPROFILER_UVM_EVENT_MIGRATE_START ) );
EXPECT_TRUE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE >(ROCPROFILER_UVM_EVENT_MIGRATE_END ) );
EXPECT_TRUE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND >(ROCPROFILER_UVM_EVENT_QUEUE_EVICTION ) );
EXPECT_TRUE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND >(ROCPROFILER_UVM_EVENT_QUEUE_RESTORE ) );
EXPECT_TRUE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU >(ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU ) );
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE, ROCPROFILER_UVM_EVENT_QUEUE_EVICTION >() );
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE, ROCPROFILER_UVM_EVENT_QUEUE_RESTORE >() );
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE, ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU >() );
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE, ROCPROFILER_UVM_EVENT_PAGE_FAULT_START >() );
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE, ROCPROFILER_UVM_EVENT_PAGE_FAULT_END >() );
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT, ROCPROFILER_UVM_EVENT_MIGRATE_START >() );
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT, ROCPROFILER_UVM_EVENT_MIGRATE_END >() );
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT, ROCPROFILER_UVM_EVENT_QUEUE_EVICTION >() );
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT, ROCPROFILER_UVM_EVENT_QUEUE_RESTORE >() );
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT, ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU >() );
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND, ROCPROFILER_UVM_EVENT_MIGRATE_START >() );
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND, ROCPROFILER_UVM_EVENT_MIGRATE_END >() );
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND, ROCPROFILER_UVM_EVENT_PAGE_FAULT_START >() );
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND, ROCPROFILER_UVM_EVENT_PAGE_FAULT_END >() );
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND, ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU >() );
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU, ROCPROFILER_UVM_EVENT_MIGRATE_START >() );
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU, ROCPROFILER_UVM_EVENT_MIGRATE_END >() );
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU, ROCPROFILER_UVM_EVENT_PAGE_FAULT_START >() );
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU, ROCPROFILER_UVM_EVENT_PAGE_FAULT_END >() );
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU, ROCPROFILER_UVM_EVENT_QUEUE_EVICTION >() );
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU, ROCPROFILER_UVM_EVENT_QUEUE_RESTORE >() );
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE >(ROCPROFILER_UVM_EVENT_QUEUE_EVICTION ) );
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE >(ROCPROFILER_UVM_EVENT_QUEUE_RESTORE ) );
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE >(ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU ) );
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE >(ROCPROFILER_UVM_EVENT_PAGE_FAULT_START) );
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE >(ROCPROFILER_UVM_EVENT_PAGE_FAULT_END ) );
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT >(ROCPROFILER_UVM_EVENT_MIGRATE_START ) );
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT >(ROCPROFILER_UVM_EVENT_MIGRATE_END ) );
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT >(ROCPROFILER_UVM_EVENT_QUEUE_EVICTION ) );
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT >(ROCPROFILER_UVM_EVENT_QUEUE_RESTORE ) );
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT >(ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU ) );
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND >(ROCPROFILER_UVM_EVENT_MIGRATE_START ) );
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND >(ROCPROFILER_UVM_EVENT_MIGRATE_END ) );
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND >(ROCPROFILER_UVM_EVENT_PAGE_FAULT_START) );
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND >(ROCPROFILER_UVM_EVENT_PAGE_FAULT_END ) );
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND >(ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU ) );
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU >(ROCPROFILER_UVM_EVENT_MIGRATE_START ) );
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU >(ROCPROFILER_UVM_EVENT_MIGRATE_END ) );
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU >(ROCPROFILER_UVM_EVENT_PAGE_FAULT_START) );
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU >(ROCPROFILER_UVM_EVENT_PAGE_FAULT_END ) );
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU >(ROCPROFILER_UVM_EVENT_QUEUE_EVICTION ) );
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU >(ROCPROFILER_UVM_EVENT_QUEUE_RESTORE ) );
static_assert(to_rocprof_op(ROCPROFILER_UVM_EVENT_MIGRATE_START) == ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE );
static_assert(to_rocprof_op(ROCPROFILER_UVM_EVENT_PAGE_FAULT_END) == ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT );
static_assert(to_rocprof_op(ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU) == ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU );
static_assert(to_rocprof_op(ROCPROFILER_UVM_EVENT_QUEUE_EVICTION) == ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND );
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_TRIGGER_PREFETCH, KFD_MIGRATE_TRIGGER_PREFETCH );
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_TRIGGER_PAGEFAULT_GPU, KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU );
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_TRIGGER_PAGEFAULT_CPU, KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU );
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_TRIGGER_TTM_EVICTION, KFD_MIGRATE_TRIGGER_TTM_EVICTION );
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_SVM, KFD_QUEUE_EVICTION_TRIGGER_SVM );
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_USERPTR, KFD_QUEUE_EVICTION_TRIGGER_USERPTR );
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_TTM, KFD_QUEUE_EVICTION_TRIGGER_TTM );
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_SUSPEND, KFD_QUEUE_EVICTION_TRIGGER_SUSPEND );
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_CRIU_CHECKPOINT, KFD_QUEUE_EVICTION_CRIU_CHECKPOINT );
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_CRIU_RESTORE, KFD_QUEUE_EVICTION_CRIU_RESTORE );
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_MMU_NOTIFY, KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY );
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_MMU_NOTIFY_MIGRATE, KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY_MIGRATE);
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_UNMAP_FROM_CPU, KFD_SVM_UNMAP_TRIGGER_UNMAP_FROM_CPU );
static_assert(to_kfd_bitmask(std::index_sequence<
ROCPROFILER_UVM_EVENT_PAGE_FAULT_START, ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU>()) ==
(KFD_SMI_EVENT_MASK_FROM_INDEX(KFD_SMI_EVENT_PAGE_FAULT_START)
| KFD_SMI_EVENT_MASK_FROM_INDEX(KFD_SMI_EVENT_UNMAP_FROM_GPU)));
EXPECT_EQ(to_kfd_str(KFD_SMI_EVENT_MIGRATE_START), ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_START);
EXPECT_EQ(to_kfd_str(KFD_SMI_EVENT_MIGRATE_END), ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_END);
EXPECT_EQ(to_kfd_str(KFD_SMI_EVENT_PAGE_FAULT_START), ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT_START);
EXPECT_EQ(to_kfd_str(KFD_SMI_EVENT_PAGE_FAULT_END), ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT_END);
EXPECT_EQ(to_kfd_str(KFD_SMI_EVENT_QUEUE_EVICTION), ROCPROFILER_PAGE_MIGRATION_QUEUE_EVICTION);
EXPECT_EQ(to_kfd_str(KFD_SMI_EVENT_QUEUE_RESTORE), ROCPROFILER_PAGE_MIGRATION_QUEUE_RESTORE);
EXPECT_EQ(to_kfd_str(KFD_SMI_EVENT_UNMAP_FROM_GPU), ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU);
// clang-format on
}
@@ -40,8 +40,3 @@ target_compile_options(page-migration PRIVATE -W -Wall -Wextra -Wpedantic -Wshad
find_package(Threads REQUIRED)
target_link_libraries(page-migration PRIVATE Threads::Threads)
install(
TARGETS page-migration
DESTINATION bin
COMPONENT tests)
@@ -27,13 +27,14 @@
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <iomanip>
#include <iostream>
#include <mutex>
#include <sstream>
#include <stdexcept>
#include <string>
#include <string_view>
#include <thread>
#include <vector>
#include <fcntl.h>
#include <sys/mman.h>
@@ -117,14 +118,14 @@ private:
};
int
main()
run_test(int num_iter)
{
using namespace std::chrono_literals;
static constexpr auto NUM_PAGES = 16;
const auto PAGE_SIZE_BYTES = ::sysconf(_SC_PAGE_SIZE);
constexpr size_t NUM_PAGES = 512;
const size_t PAGE_SIZE_BYTES = ::sysconf(_SC_PAGE_SIZE);
size_t elem_count = (NUM_PAGES * PAGE_SIZE_BYTES) / sizeof(size_t); // one page?
const size_t elem_count = (NUM_PAGES * PAGE_SIZE_BYTES) / sizeof(size_t);
auto alloc = mmap_allocator(NUM_PAGES);
void* data_v = alloc.get<void>();
@@ -142,8 +143,10 @@ main()
HIP_API_CALL(hipHostRegister(data, elem_count * sizeof(size_t), hipHostRegisterDefault));
char maps[1024 * 1024];
std::memset(maps, '\0', 1024 * 1024);
constexpr size_t MAPS_BUFFER_SIZE = 1024 * 1024;
char maps[MAPS_BUFFER_SIZE];
std::memset(maps, '\0', MAPS_BUFFER_SIZE);
auto fd = open("/proc/self/maps", O_RDONLY | O_CLOEXEC);
if(fd == -1)
@@ -153,7 +156,7 @@ main()
exit(-1);
}
auto bytes = read(fd, maps, 1024 * 1024 - 1);
auto bytes = read(fd, maps, MAPS_BUFFER_SIZE - 1);
if(bytes == -1)
{
auto ecode = errno;
@@ -190,33 +193,96 @@ main()
}
}
for(int iter = 0; iter < 1000; ++iter)
hipStream_t stream{};
HIP_API_CALL(hipStreamCreate(&stream));
for(int iter = 0; iter < num_iter; ++iter)
{
for(size_t i = 0; i < elem_count; ++i)
{
data[i] = i;
}
// std::cout << "launching..." << std::endl;
hipLaunchKernelGGL(kernel, 128, 64, 0, 0, data, elem_count);
hipLaunchKernelGGL(kernel, 1024, 1024, 0, stream, data, elem_count);
// std::cout << "syncing..." << std::endl;
HIP_API_CALL(hipDeviceSynchronize());
HIP_API_CALL(hipStreamSynchronize(stream));
// std::cout << "checking..." << std::endl;
for(size_t i = 0; i < elem_count; ++i)
{
if(data[i] != (i * 2))
const auto data_i = data[i];
if(data_i != (i * 2))
{
auto msg = std::stringstream{};
msg << "GPU computed value at " << i << " in iteration " << iter
<< " is incorrect. Expected " << (i * 2) << ", found " << data[i];
<< " is incorrect. Expected " << (i * 2) << ", found " << data_i;
throw std::runtime_error{msg.str()};
}
}
std::cout << "Iteration " << std::setw(2) << iter << ": correct\n" << std::flush;
}
HIP_API_CALL(hipStreamDestroy(stream));
HIP_API_CALL(hipDeviceSynchronize());
return 0;
}
int
main(int argc, const char** argv)
{
using namespace std::chrono_literals;
const auto usage_msg = [](const char** _argv) {
fprintf(stderr, "usage: %s <NUMBER OF THREADS> <ITERATIONS PER THREAD>\n", _argv[0]);
};
if(argc != 3)
{
usage_msg(argv);
exit(EXIT_FAILURE);
}
for(int i = 1; i < argc; ++i)
{
auto _arg = std::string{argv[i]};
if(_arg == "?" || _arg == "-h" || _arg == "--help")
{
usage_msg(argv);
exit(EXIT_SUCCESS);
}
}
const auto num_threads = std::atoi(argv[1]);
if(num_threads < 1)
{
fprintf(stderr, "Error: Invalid value %d for num_threads (min 1)\n", num_threads);
exit(EXIT_FAILURE);
}
const auto num_iter = std::atoi(argv[2]);
if(num_iter < 1)
{
fprintf(stderr, "Error: Invalid value %d for num_iter (min 1)\n", num_iter);
exit(EXIT_FAILURE);
}
run_test(num_iter);
std::vector<std::thread> threads;
threads.reserve(num_threads);
std::cerr << "Running " << num_iter << " iterations/thread on " << num_threads << " threads\n";
for(auto i = 0; i < num_threads; ++i)
{
threads.emplace_back([_num_iter = num_iter]() { run_test(_num_iter); });
}
std::cerr << "Waiting for threads\n";
for(auto& t : threads)
{
t.join();
}
return 0;
}
@@ -17,7 +17,7 @@ else()
set(PRELOAD_ENV "LD_PRELOAD=$<TARGET_FILE:rocprofiler-sdk-json-tool>")
endif()
add_test(NAME test-page-migration-execute COMMAND $<TARGET_FILE:page-migration>)
add_test(NAME test-page-migration-execute COMMAND $<TARGET_FILE:page-migration> 4 1024)
set(page-migration-env
"${PRELOAD_ENV}"
@@ -2,6 +2,7 @@
import json
import pytest
from rocprofiler_sdk.pytest_utils.dotdict import dotdict
def pytest_addoption(parser):
@@ -24,4 +25,4 @@ def input_data(request):
return pytest.skip(
"Skipping test because KFD does not support SVM event reporting"
)
return data
return dotdict(data)
@@ -271,71 +271,113 @@ def get_allocated_pages(callback_records):
assert "hostPtr" in itr["args"].keys(), f"{itr}"
host_register_record.append(itr)
assert len(host_register_record) == 1
alloc_size = int(host_register_record[0]["args"]["sizeBytes"], 10)
start_addr = int(host_register_record[0]["args"]["hostPtr"], 16)
end_addr = start_addr + alloc_size
num_host_register_calls = len(host_register_record)
assert num_host_register_calls == 5, "Expected 5 hipHostRegister calls in test"
return start_addr, end_addr, alloc_size
ret = []
for i in range(num_host_register_calls):
alloc_size = int(host_register_record[0]["args"]["sizeBytes"], 10)
start_addr = int(host_register_record[0]["args"]["hostPtr"], 16)
end_addr = start_addr + alloc_size
ret.append((start_addr, end_addr))
return ret
def validate_node(id, nodes):
assert id.handle in nodes
def test_page_migration_data(input_data):
data = input_data
sdk_data = data["rocprofiler-sdk-json-tool"]
buffer_records = sdk_data["buffer_records"]
callback_records = sdk_data["callback_records"]
page_migration_buffers = buffer_records["page_migration"]
buffer_records = sdk_data.buffer_records
callback_records = sdk_data.callback_records
page_migtation_buffers = buffer_records.page_migration
_, bf_op_names = get_operation(buffer_records, "PAGE_MIGRATION")
assert bf_op_names[0] == "PAGE_MIGRATION_NONE"
assert "PAGE_MIGRATION_PAGE_MIGRATE" in bf_op_names
assert len(bf_op_names) == 5
node_ids = set(x["gpu_id"] for x in sdk_data["agents"])
start_addr, end_addr, alloc_size = get_allocated_pages(callback_records)
for op_name in bf_op_names:
assert "PAGE_MIGRATION" in op_name
assert start_addr < end_addr and start_addr + alloc_size == end_addr
assert int(alloc_size) == 16 * 4096 # We allocated 16 pages in the test
assert len(bf_op_names) == 8
# PID must be same
assert len(set(r["pid"] for r in page_migration_buffers)) == 1
nodes = set(x.id.handle for x in sdk_data.agents)
allocations = get_allocated_pages(callback_records)
for r in page_migration_buffers:
op = r["operation"]
for start_addr, end_addr in allocations:
assert r["size"] == 136
assert op != 0 and bf_op_names[op] != "PAGE_MIGRATION_NONE"
assert bf_op_names[op].lower().replace("page_migration_", "") in r.keys()
assert (
start_addr < end_addr
), "Expected start address less than end address for mmap range"
alloc_size = end_addr - start_addr
assert int(alloc_size) == 512 * 4096 # We allocated 512 pages in the test
if "page_migrate" in r:
assert r["page_migrate"]["from_node"] in node_ids
assert r["page_migrate"]["to_node"] in node_ids
assert r["page_migrate"]["prefetch_node"] in node_ids
assert r["page_migrate"]["preferred_node"] in node_ids
assert r["page_migrate"]["trigger"] >= 0
# PID must be same
assert len(set(r.pid for r in page_migtation_buffers)) == 1
if "queue_suspend" in r:
assert r["queue_suspend"]["trigger"] >= 0
assert r["queue_suspend"]["node_id"] in node_ids
for r in page_migtation_buffers:
op = r.operation
if "unmap_from_gpu" in r:
assert r["unmap_from_gpu"]["trigger"] >= 0
# unmap is "instantaneous"
assert 0 < r["start_timestamp"] == r["end_timestamp"]
else:
assert 0 < r["start_timestamp"] < r["end_timestamp"]
assert r.size == 160
assert op != 0 and bf_op_names[op] != "PAGE_MIGRATION_NONE"
assert bf_op_names[op].lower().replace("page_migration_", "") in r.keys()
# Check for events with our page
for r in page_migration_buffers:
if "page_fault_start" in r:
arg = r.page_fault_start
assert arg.read_fault < 2
validate_node(arg.agent_id, nodes)
assert arg.address > 0
if "page_migrate" in r and r["page_migrate"]["start_addr"] == start_addr:
assert end_addr == r["page_migrate"]["end_addr"]
if "page_fault_end" in r:
arg = r.page_fault_end
assert arg.migrated < 2
validate_node(arg.agent_id, nodes)
assert arg.address > 0
if "unmap_from_gpu" in r and r["unmap_from_gpu"]["start_addr"] == start_addr:
assert end_addr == r["unmap_from_gpu"]["end_addr"]
if "page_migrate_start" in r:
arg = r.page_migrate_start
assert (
0 < arg.start_addr < arg.end_addr
), "Expected start addr to be less than end addr"
if arg.start_addr == start_addr:
assert arg.end_addr == end_addr
validate_node(arg.from_agent, nodes)
validate_node(arg.to_agent, nodes)
validate_node(arg.prefetch_agent, nodes)
validate_node(arg.preferred_agent, nodes)
assert 0 <= arg.trigger < 4
# TODO: Check if a migrate a->b is paired up with b->a
# It may not always be reported towards app finalization
if "page_migrate_end" in r:
arg = r.page_migrate_end
assert (
0 < arg.start_addr < arg.end_addr
), "Expected start addr to be less than end addr"
if arg.start_addr == start_addr:
assert arg.end_addr == end_addr
validate_node(arg.from_agent, nodes)
validate_node(arg.to_agent, nodes)
assert 0 <= arg.trigger < 4
if "queue_eviction" in r:
arg = r.queue_eviction
validate_node(arg.agent_id, nodes)
assert 0 <= arg.trigger < 6
if "queue_restore" in r:
arg = r.queue_restore
assert arg.rescheduled < 2
validate_node(arg.agent_id, nodes)
if "unmap_from_gpu" in r:
arg = r.unmap_from_gpu
assert (
0 < arg.start_addr < arg.end_addr
), "Expected start addr to be less than end addr"
if arg.start_addr == start_addr:
assert arg.end_addr == end_addr
validate_node(arg.agent_id, nodes)
assert 0 <= arg.trigger < 3
if __name__ == "__main__":
@@ -1277,7 +1277,7 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
0,
page_migration_buffer);
constexpr auto message = "page migration service for memory copy configure";
constexpr auto message = "buffer tracing service for page migration configure";
if(page_migration_status == ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_KERNEL)
std::cerr << message
<< " failed: " << rocprofiler_get_status_string(page_migration_status)