Page migration reporting (#651)
* Page migration reporting support * Page migration: Update parser and reporting Container does not lave latest KFD header, so CI might fail * Add kfd_ioctl.h * Formatting * Update get_key - get key was not used (and shouldn't be), so delete it * clang-tidy fixes * Tests for page migration * Apply suggestions from code review Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> * Update tests/bin/page-migration/CMakeLists.txt Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> * Update page-migration test app - add hipHostRegister to register mmap'ed allocation with HIP - misc cleanup and reorg - remove HSA_XNACK=1 from test env * Update lib/rocprofiler-sdk/tests/page_migration.cpp - fix compilation error * Minor updates (reorg, rename) * Page migration reporting support * Page migration: Update parser and reporting Container does not lave latest KFD header, so CI might fail * Update page migration tests, fix trigger types * Page Migration Tracing Support Refactoring (#753) * Reorganization * Update page migration init/fini * Formatting * Update page_migration.cpp - change logging severity * Skip test if KFD does not support page migration reporting * Rework skipping test if KFD does not support page migration * Fix event trigger enum values * Fix clang-diagnostic-unused-const-variable --------- Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: Jonathan R. Madsen <jonathanrmadsen@gmail.com> Co-authored-by: Jonathan R. Madsen <jrmadsen@users.noreply.github.com>
Tento commit je obsažen v:
@@ -26,6 +26,8 @@
|
||||
#include <rocprofiler-sdk/defines.h>
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
ROCPROFILER_EXTERN_C_INIT
|
||||
|
||||
/**
|
||||
@@ -35,6 +37,49 @@ ROCPROFILER_EXTERN_C_INIT
|
||||
* @{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Page migration triggers
|
||||
*
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
ROCPROFILER_PAGE_MIGRATION_TRIGGER_NONE = -1,
|
||||
ROCPROFILER_PAGE_MIGRATION_TRIGGER_PREFETCH,
|
||||
ROCPROFILER_PAGE_MIGRATION_TRIGGER_PAGEFAULT_GPU,
|
||||
ROCPROFILER_PAGE_MIGRATION_TRIGGER_PAGEFAULT_CPU,
|
||||
ROCPROFILER_PAGE_MIGRATION_TRIGGER_TTM_EVICTION,
|
||||
ROCPROFILER_PAGE_MIGRATION_TRIGGER_LAST,
|
||||
} rocprofiler_page_migration_trigger_t;
|
||||
|
||||
/**
|
||||
* @brief Page migration triggers causing the queue to suspend
|
||||
*
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_NONE = -1,
|
||||
ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_SVM,
|
||||
ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_USERPTR,
|
||||
ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_TTM,
|
||||
ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_SUSPEND,
|
||||
ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_CRIU_CHECKPOINT,
|
||||
ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_CRIU_RESTORE,
|
||||
ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_LAST,
|
||||
} rocprofiler_page_migration_queue_suspend_trigger_t;
|
||||
|
||||
/**
|
||||
* @brief Page migration triggers causing an unmap from the GPU
|
||||
*
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_NONE = -1,
|
||||
ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_MMU_NOTIFY,
|
||||
ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_MMU_NOTIFY_MIGRATE,
|
||||
ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_UNMAP_FROM_CPU,
|
||||
ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_LAST,
|
||||
} rocprofiler_page_migration_unmap_from_gpu_trigger_t;
|
||||
|
||||
/**
|
||||
* @brief ROCProfiler Buffer HSA API Tracer Record.
|
||||
*/
|
||||
@@ -150,17 +195,63 @@ typedef struct rocprofiler_buffer_tracing_kernel_dispatch_record_t
|
||||
/// @brief runtime grid size
|
||||
} rocprofiler_buffer_tracing_kernel_dispatch_record_t;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint8_t read_fault : 1; ///< Is the fault due to a read or a write
|
||||
uint8_t migrated : 1;
|
||||
uint32_t node_id; ///< GPU or CPU node ID which reports a page fault
|
||||
uint64_t address; ///< Address access that caused the page fault
|
||||
} rocprofiler_buffer_tracing_page_migration_page_fault_record_t;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint64_t start_addr; ///< Start address of the page being migrated
|
||||
uint64_t end_addr; ///< End address of the page being migrated
|
||||
uint32_t from_node; ///< Source node
|
||||
uint32_t to_node; ///< Destination node
|
||||
uint32_t prefetch_node; ///< Node from which page was prefetched
|
||||
uint32_t preferred_node; ///< Preferred destinaion node
|
||||
rocprofiler_page_migration_trigger_t trigger; ///< Cause of migration
|
||||
} rocprofiler_buffer_tracing_page_migration_page_migrate_record_t;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint8_t rescheduled : 1;
|
||||
uint32_t node_id; ///< GPU node from which the queue was suspended
|
||||
rocprofiler_page_migration_queue_suspend_trigger_t trigger; ///< Cause of queue suspension
|
||||
} rocprofiler_buffer_tracing_page_migration_queue_suspend_record_t;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint32_t node_id; ///< Node ID from which page was unmapped
|
||||
uint64_t start_addr; ///< Start address of unmapped page
|
||||
uint64_t end_addr; ///< End address of unmapped page
|
||||
rocprofiler_page_migration_unmap_from_gpu_trigger_t trigger; ///< Cause of unmap
|
||||
} rocprofiler_buffer_tracing_page_migration_unmap_from_gpu_record_t;
|
||||
|
||||
/**
|
||||
* @brief ROCProfiler Buffer Page Migration Tracer Record. Not implemented.
|
||||
* @brief ROCProfiler Buffer Page Migration Tracer Record
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
uint64_t size; ///< size of this struct
|
||||
rocprofiler_buffer_tracing_kind_t kind; ///< ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION
|
||||
rocprofiler_correlation_id_t correlation_id; ///< correlation ids for record
|
||||
rocprofiler_tracing_operation_t operation;
|
||||
rocprofiler_timestamp_t start_timestamp; ///< start time in nanoseconds
|
||||
rocprofiler_timestamp_t end_timestamp; ///< end time in nanoseconds
|
||||
// Not Sure What is the info needed here?
|
||||
uint32_t pid;
|
||||
|
||||
union
|
||||
{
|
||||
rocprofiler_buffer_tracing_page_migration_page_fault_record_t page_fault;
|
||||
rocprofiler_buffer_tracing_page_migration_page_migrate_record_t page_migrate;
|
||||
rocprofiler_buffer_tracing_page_migration_queue_suspend_record_t queue_suspend;
|
||||
rocprofiler_buffer_tracing_page_migration_unmap_from_gpu_record_t unmap_from_gpu;
|
||||
struct
|
||||
{
|
||||
uint64_t reserved[12];
|
||||
};
|
||||
};
|
||||
} rocprofiler_buffer_tracing_page_migration_record_t;
|
||||
|
||||
/**
|
||||
|
||||
@@ -89,6 +89,10 @@ typedef enum // NOLINT(performance-enum-size)
|
||||
ROCPROFILER_STATUS_ERROR_AST_GENERATION_FAILED, ///< AST could not be generated correctly
|
||||
ROCPROFILER_STATUS_ERROR_AST_NOT_FOUND, ///< AST was not found
|
||||
ROCPROFILER_STATUS_ERROR_AQL_NO_EVENT_COORD, ///< Event coordinate was not found by AQL profile
|
||||
ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_KERNEL, ///< A service depends on a newer version of KFD
|
||||
///< (amdgpu kernel driver). Check logs for
|
||||
///< service that report incompatibility
|
||||
|
||||
ROCPROFILER_STATUS_LAST,
|
||||
} rocprofiler_status_t;
|
||||
|
||||
@@ -202,6 +206,21 @@ typedef enum // NOLINT(performance-enum-size)
|
||||
ROCPROFILER_MEMORY_COPY_LAST,
|
||||
} rocprofiler_memory_copy_operation_t;
|
||||
|
||||
/**
|
||||
* @brief Page migration event.
|
||||
*/
|
||||
typedef enum // NOLINT(performance-enum-size)
|
||||
{
|
||||
ROCPROFILER_PAGE_MIGRATION_NONE = 0, ///< Unknown event
|
||||
ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE,
|
||||
ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT,
|
||||
ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND,
|
||||
ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU,
|
||||
// Any and all events, from all processes. Requires superuser
|
||||
// ROCPROFILER_PAGE_MIGRATION_ANY_ALL_PROCESSES,
|
||||
ROCPROFILER_PAGE_MIGRATION_LAST,
|
||||
} rocprofiler_page_migration_operation_t;
|
||||
|
||||
/**
|
||||
* @brief ROCProfiler Kernel Dispatch Tracing Operation Types.
|
||||
*/
|
||||
|
||||
@@ -45,6 +45,7 @@ add_subdirectory(marker)
|
||||
add_subdirectory(thread_trace)
|
||||
add_subdirectory(tracing)
|
||||
add_subdirectory(kernel_dispatch)
|
||||
add_subdirectory(page_migration)
|
||||
|
||||
target_link_libraries(
|
||||
rocprofiler-object-library
|
||||
|
||||
@@ -34,12 +34,15 @@
|
||||
#include "lib/rocprofiler-sdk/hsa/scratch_memory.hpp"
|
||||
#include "lib/rocprofiler-sdk/kernel_dispatch/kernel_dispatch.hpp"
|
||||
#include "lib/rocprofiler-sdk/marker/marker.hpp"
|
||||
#include "lib/rocprofiler-sdk/page_migration/page_migration.hpp"
|
||||
#include "lib/rocprofiler-sdk/registration.hpp"
|
||||
|
||||
#include <glog/logging.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <limits>
|
||||
#include <stdexcept>
|
||||
#include <string_view>
|
||||
#include <vector>
|
||||
|
||||
#define RETURN_STATUS_ON_FAIL(...) \
|
||||
@@ -105,8 +108,7 @@ rocprofiler_configure_buffer_tracing_service(rocprofiler_context_id_t c
|
||||
if(rocprofiler::registration::get_init_status() > -1)
|
||||
return ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED;
|
||||
|
||||
static auto unsupported = std::unordered_set<rocprofiler_buffer_tracing_kind_t>{
|
||||
ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION};
|
||||
static auto unsupported = std::unordered_set<rocprofiler_buffer_tracing_kind_t>{};
|
||||
if(unsupported.count(kind) > 0) return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED;
|
||||
|
||||
auto* ctx = rocprofiler::context::get_mutable_registered_context(context_id);
|
||||
@@ -137,6 +139,9 @@ rocprofiler_configure_buffer_tracing_service(rocprofiler_context_id_t c
|
||||
ctx->buffered_tracer->domains, kind, operations[i]));
|
||||
}
|
||||
|
||||
if(kind == ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION)
|
||||
RETURN_STATUS_ON_FAIL(rocprofiler::page_migration::init());
|
||||
|
||||
return ROCPROFILER_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -233,6 +238,10 @@ rocprofiler_query_buffer_tracing_kind_operation_name(rocprofiler_buffer_tracing_
|
||||
break;
|
||||
}
|
||||
case ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION:
|
||||
{
|
||||
val = rocprofiler::page_migration::name_by_id(operation);
|
||||
break;
|
||||
}
|
||||
case ROCPROFILER_BUFFER_TRACING_CORRELATION_ID_RETIREMENT:
|
||||
{
|
||||
return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED;
|
||||
@@ -340,6 +349,10 @@ rocprofiler_iterate_buffer_tracing_kind_operations(
|
||||
break;
|
||||
}
|
||||
case ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION:
|
||||
{
|
||||
ops = rocprofiler::page_migration::get_ids();
|
||||
break;
|
||||
}
|
||||
case ROCPROFILER_BUFFER_TRACING_CORRELATION_ID_RETIREMENT:
|
||||
{
|
||||
return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED;
|
||||
|
||||
@@ -0,0 +1,9 @@
|
||||
#
|
||||
#
|
||||
set(ROCPROFILER_LIB_UVM_SOURCES page_migration.cpp)
|
||||
set(ROCPROFILER_LIB_UVM_HEADERS defines.hpp page_migration.hpp utils.hpp)
|
||||
|
||||
target_sources(rocprofiler-object-library PRIVATE ${ROCPROFILER_LIB_UVM_SOURCES}
|
||||
${ROCPROFILER_LIB_UVM_HEADERS})
|
||||
|
||||
add_subdirectory(details)
|
||||
@@ -0,0 +1,87 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#define KFD_EVENT_PARSE_EVENTS(X, HANDLER) \
|
||||
do \
|
||||
{ \
|
||||
const auto find_newline = [&](auto b) { return std::find(b, X.cend(), '\n'); }; \
|
||||
\
|
||||
const auto* cursor = X.cbegin(); \
|
||||
\
|
||||
for(const auto* pos = find_newline(cursor); pos != X.cend(); pos = find_newline(cursor)) \
|
||||
{ \
|
||||
size_t char_count = pos - cursor; \
|
||||
assert(char_count > 0); \
|
||||
std::string_view event_str{cursor, char_count}; \
|
||||
\
|
||||
LOG(INFO) << fmt::format("KFD event: [{}]", event_str); \
|
||||
HANDLER(event_str); \
|
||||
\
|
||||
cursor = pos + 1; \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#define APPEND_UVM_EVENT(X) ROCPROFILER_UVM_EVENT_##X
|
||||
#define APPEND_1(X) APPEND_UVM_EVENT(X)
|
||||
#define CONCAT(X, Y) X##Y
|
||||
#define APPEND_2(A1, A2) APPEND_1(A1), APPEND_1(A2)
|
||||
#define APPEND_3(A1, A2, A3) APPEND_2(A1, A2), APPEND_1(A3)
|
||||
#define APPEND_4(A1, A2, A3, A4) APPEND_3(A1, A2, A3), APPEND_1(A4)
|
||||
#define APPEND_5(A1, A2, A3, A4, A5) APPEND_4(A1, A2, A3, A4), APPEND_1(A5)
|
||||
|
||||
#define MACRO_N(MACRO, N, ...) CONCAT(MACRO, N)(__VA_ARGS__)
|
||||
#define APPLY_N(MACRO, ...) MACRO_N(MACRO, IMPL_DETAIL_FOR_EACH_NARG(__VA_ARGS__), __VA_ARGS__)
|
||||
|
||||
#define GET_UVM_ENUMS(...) APPLY_N(APPEND_, __VA_ARGS__)
|
||||
|
||||
// static constexpr size_t uvm_event = UVM_ENUM;
|
||||
#define SPECIALIZE_UVM_KFD_EVENT(UVM_ENUM, KFD_ENUM, FORMAT_STRING) \
|
||||
template <> \
|
||||
struct uvm_event_info<UVM_ENUM> \
|
||||
{ \
|
||||
static constexpr size_t kfd_event = KFD_ENUM; \
|
||||
static constexpr std::string_view format_str{FORMAT_STRING}; \
|
||||
};
|
||||
|
||||
#define SPECIALIZE_PAGE_MIGRATION_INFO(TYPE, ...) \
|
||||
template <> \
|
||||
struct page_migration_info<ROCPROFILER_PAGE_MIGRATION_##TYPE> \
|
||||
{ \
|
||||
static constexpr auto operation_idx = ROCPROFILER_PAGE_MIGRATION_##TYPE; \
|
||||
static constexpr auto name = #TYPE; \
|
||||
static constexpr size_t uvm_bitmask = \
|
||||
bitmask(std::index_sequence<GET_UVM_ENUMS(__VA_ARGS__)>()); \
|
||||
static constexpr size_t kfd_bitmask = \
|
||||
to_kfd_bitmask(std::index_sequence<GET_UVM_ENUMS(__VA_ARGS__)>()); \
|
||||
}
|
||||
|
||||
#define COPY_FROM_START_1(MEMBER) end.MEMBER = start.MEMBER;
|
||||
#define COPY_FROM_START_2(UNION_TYPE, MEMBER) end.UNION_TYPE.MEMBER = start.UNION_TYPE.MEMBER;
|
||||
|
||||
#define SPECIALIZE_KFD_IOC_IOCTL(STRUCT, ARG_IOC) \
|
||||
template <> \
|
||||
struct IOC_event<STRUCT> \
|
||||
{ \
|
||||
static constexpr auto value = ARG_IOC; \
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
#
|
||||
#
|
||||
set(ROCPROFILER_LIB_UVM_DETAILS_SOURCES)
|
||||
set(ROCPROFILER_LIB_UVM_DETAILS_HEADERS kfd_ioctl.h)
|
||||
|
||||
target_sources(rocprofiler-object-library PRIVATE ${ROCPROFILER_LIB_UVM_DETAILS_SOURCES}
|
||||
${ROCPROFILER_LIB_UVM_DETAILS_HEADERS})
|
||||
Rozdílový obsah nebyl zobrazen, protože je příliš veliký
Načíst rozdílové porovnání
Rozdílový obsah nebyl zobrazen, protože je příliš veliký
Načíst rozdílové porovnání
@@ -0,0 +1,74 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#include "lib/rocprofiler-sdk/page_migration/defines.hpp"
|
||||
#include "lib/rocprofiler-sdk/page_migration/page_migration.hpp"
|
||||
|
||||
#if defined(ROCPROFILER_LIB_ROCPROFILER_SDK_PAGE_MIGRATION_PAGE_MIGRATION_CPP_IMPL) && \
|
||||
ROCPROFILER_LIB_ROCPROFILER_SDK_PAGE_MIGRATION_PAGE_MIGRATION_CPP_IMPL == 1
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace page_migration
|
||||
{
|
||||
// clang-format off
|
||||
// Map ROCPROF UVM enums to KFD enums
|
||||
SPECIALIZE_UVM_KFD_EVENT(ROCPROFILER_UVM_EVENT_NONE, KFD_SMI_EVENT_NONE, "Error: Invalid UVM event from KFD" );
|
||||
SPECIALIZE_UVM_KFD_EVENT(ROCPROFILER_UVM_EVENT_MIGRATE_START, KFD_SMI_EVENT_MIGRATE_START, "%x %ld -%d @%lx(%lx) %x->%x %x:%x %d\n" );
|
||||
SPECIALIZE_UVM_KFD_EVENT(ROCPROFILER_UVM_EVENT_MIGRATE_END, KFD_SMI_EVENT_MIGRATE_END, "%x %ld -%d @%lx(%lx) %x->%x %d\n" );
|
||||
SPECIALIZE_UVM_KFD_EVENT(ROCPROFILER_UVM_EVENT_PAGE_FAULT_START, KFD_SMI_EVENT_PAGE_FAULT_START, "%x %ld -%d @%lx(%x) %c\n" );
|
||||
SPECIALIZE_UVM_KFD_EVENT(ROCPROFILER_UVM_EVENT_PAGE_FAULT_END, KFD_SMI_EVENT_PAGE_FAULT_END, "%x %ld -%d @%lx(%x) %c\n" );
|
||||
SPECIALIZE_UVM_KFD_EVENT(ROCPROFILER_UVM_EVENT_QUEUE_EVICTION, KFD_SMI_EVENT_QUEUE_EVICTION, "%x %ld -%d %x %d\n" );
|
||||
SPECIALIZE_UVM_KFD_EVENT(ROCPROFILER_UVM_EVENT_QUEUE_RESTORE, KFD_SMI_EVENT_QUEUE_RESTORE, "%x %ld -%d %x\n" );
|
||||
SPECIALIZE_UVM_KFD_EVENT(ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU, KFD_SMI_EVENT_UNMAP_FROM_GPU, "%x %ld -%d @%lx(%lx) %x %d\n" );
|
||||
// clang-format on
|
||||
# undef SPECIALIZE_UVM_KFD_EVENT
|
||||
|
||||
SPECIALIZE_PAGE_MIGRATION_INFO(NONE, NONE);
|
||||
SPECIALIZE_PAGE_MIGRATION_INFO(PAGE_MIGRATE, MIGRATE_START, MIGRATE_END);
|
||||
SPECIALIZE_PAGE_MIGRATION_INFO(PAGE_FAULT, PAGE_FAULT_START, PAGE_FAULT_END);
|
||||
SPECIALIZE_PAGE_MIGRATION_INFO(QUEUE_SUSPEND, QUEUE_EVICTION, QUEUE_RESTORE);
|
||||
SPECIALIZE_PAGE_MIGRATION_INFO(UNMAP_FROM_GPU, UNMAP_FROM_GPU);
|
||||
|
||||
template <size_t UvmOpInx, size_t... OpInxs>
|
||||
constexpr size_t to_rocprof_op_impl(std::index_sequence<OpInxs...>)
|
||||
{
|
||||
return ((((bitmask(UvmOpInx) & page_migration_info<OpInxs>::uvm_bitmask) != 0) * OpInxs) + ...);
|
||||
}
|
||||
|
||||
template <size_t... OpInxs>
|
||||
constexpr auto _to_rocprof_op_impl(std::index_sequence<OpInxs...>)
|
||||
{
|
||||
return std::array{
|
||||
to_rocprof_op_impl<OpInxs>(std::make_index_sequence<ROCPROFILER_PAGE_MIGRATION_LAST>{})...};
|
||||
}
|
||||
|
||||
constexpr auto
|
||||
to_rocprof_op(size_t pos)
|
||||
{
|
||||
using rop = rocprofiler_page_migration_operation_t;
|
||||
return static_cast<rop>(
|
||||
_to_rocprof_op_impl(std::make_index_sequence<ROCPROFILER_UVM_EVENT_LAST>{})[pos]);
|
||||
}
|
||||
} // namespace page_migration
|
||||
} // namespace rocprofiler
|
||||
#endif
|
||||
@@ -0,0 +1,45 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "lib/common/container/small_vector.hpp"
|
||||
|
||||
#include <rocprofiler-sdk/rocprofiler.h>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace page_migration
|
||||
{
|
||||
const char*
|
||||
name_by_id(uint32_t id);
|
||||
|
||||
std::vector<uint32_t>
|
||||
get_ids();
|
||||
|
||||
rocprofiler_status_t
|
||||
init();
|
||||
|
||||
void
|
||||
finalize();
|
||||
} // namespace page_migration
|
||||
} // namespace rocprofiler
|
||||
@@ -0,0 +1,218 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "lib/rocprofiler-sdk/page_migration/details/kfd_ioctl.h"
|
||||
|
||||
#include <rocprofiler-sdk/buffer_tracing.h>
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
|
||||
#include <cstdint>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
|
||||
namespace rocprofiler
|
||||
{
|
||||
namespace page_migration
|
||||
{
|
||||
// serves as an overview of what events we capture and report
|
||||
enum fault_type_t
|
||||
{
|
||||
NONE,
|
||||
READ,
|
||||
WRITE,
|
||||
};
|
||||
|
||||
struct uvm_event_page_fault_start_t
|
||||
{
|
||||
int kind;
|
||||
uint64_t start_timestamp;
|
||||
int pid;
|
||||
int node_id;
|
||||
uint64_t address;
|
||||
fault_type_t fault;
|
||||
};
|
||||
|
||||
struct uvm_event_page_fault_end_t
|
||||
{
|
||||
int kind;
|
||||
uint64_t end_timestamp;
|
||||
uint32_t pid;
|
||||
int node_id;
|
||||
uint64_t address;
|
||||
bool migrated;
|
||||
};
|
||||
|
||||
struct uvm_event_migrate_start_t
|
||||
{
|
||||
int kind;
|
||||
uint64_t start_timestamp;
|
||||
uint32_t pid;
|
||||
uint64_t start;
|
||||
uint64_t end_offset;
|
||||
uint32_t from;
|
||||
uint32_t to;
|
||||
uint32_t prefetch_node; // last prefetch location, 0 for CPU, or GPU id
|
||||
uint32_t preferred_node; // perferred location, 0 for CPU, or GPU id
|
||||
uint32_t trigger;
|
||||
};
|
||||
|
||||
struct uvm_event_migrate_end_t
|
||||
{
|
||||
int kind;
|
||||
uint64_t end_timestamp;
|
||||
uint32_t pid;
|
||||
uint64_t start;
|
||||
uint64_t end_offset;
|
||||
uint32_t from;
|
||||
uint32_t to;
|
||||
uint32_t trigger;
|
||||
};
|
||||
|
||||
struct uvm_event_queue_eviction_t
|
||||
{
|
||||
int kind;
|
||||
uint64_t start_timestamp;
|
||||
uint32_t pid;
|
||||
int node_id;
|
||||
uint32_t trigger;
|
||||
};
|
||||
|
||||
struct uvm_event_queue_restore_t
|
||||
{
|
||||
int kind;
|
||||
uint64_t end_timestamp;
|
||||
uint32_t pid;
|
||||
int node_id;
|
||||
bool rescheduled;
|
||||
};
|
||||
|
||||
struct uvm_event_unmap_from_gpu_t
|
||||
{
|
||||
int kind;
|
||||
uint64_t timestamp;
|
||||
uint32_t pid;
|
||||
uint64_t address;
|
||||
uint64_t size;
|
||||
int node_id;
|
||||
uint32_t trigger;
|
||||
};
|
||||
|
||||
template <size_t e>
|
||||
struct uvm_event_info;
|
||||
|
||||
template <size_t>
|
||||
struct page_migration_info;
|
||||
|
||||
namespace kfd
|
||||
{
|
||||
template <typename T>
|
||||
struct IOC_event;
|
||||
} // namespace kfd
|
||||
|
||||
constexpr size_t
|
||||
bitmask(size_t num)
|
||||
{
|
||||
if(num == 0)
|
||||
return 0;
|
||||
else
|
||||
return (1ULL << (num - 1));
|
||||
}
|
||||
|
||||
template <size_t... Args>
|
||||
constexpr size_t bitmask(std::index_sequence<Args...>)
|
||||
{
|
||||
return (bitmask(Args) | ...);
|
||||
}
|
||||
|
||||
enum uvm_event_id_t
|
||||
{
|
||||
ROCPROFILER_UVM_EVENT_NONE,
|
||||
ROCPROFILER_UVM_EVENT_MIGRATE_START,
|
||||
ROCPROFILER_UVM_EVENT_MIGRATE_END,
|
||||
ROCPROFILER_UVM_EVENT_PAGE_FAULT_START,
|
||||
ROCPROFILER_UVM_EVENT_PAGE_FAULT_END,
|
||||
ROCPROFILER_UVM_EVENT_QUEUE_EVICTION,
|
||||
ROCPROFILER_UVM_EVENT_QUEUE_RESTORE,
|
||||
ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU,
|
||||
ROCPROFILER_UVM_EVENT_LAST,
|
||||
};
|
||||
|
||||
static_assert(KFD_SMI_EVENT_NONE == 0);
|
||||
static_assert(KFD_SMI_EVENT_MIGRATE_START == 5);
|
||||
static_assert(KFD_SMI_EVENT_MIGRATE_END == 6);
|
||||
static_assert(KFD_SMI_EVENT_PAGE_FAULT_START == 7);
|
||||
static_assert(KFD_SMI_EVENT_PAGE_FAULT_END == 8);
|
||||
static_assert(KFD_SMI_EVENT_QUEUE_EVICTION == 9);
|
||||
static_assert(KFD_SMI_EVENT_QUEUE_RESTORE == 10);
|
||||
static_assert(KFD_SMI_EVENT_UNMAP_FROM_GPU == 11);
|
||||
static_assert(KFD_SMI_EVENT_ALL_PROCESS == 64);
|
||||
|
||||
using rocprof_buffer_op_t = rocprofiler_page_migration_operation_t;
|
||||
|
||||
using node_fd_t = int;
|
||||
|
||||
using event_map_t =
|
||||
std::unordered_map<uint64_t, rocprofiler_buffer_tracing_page_migration_record_t>;
|
||||
using events_cache_t = std::array<event_map_t, ROCPROFILER_PAGE_MIGRATION_LAST>;
|
||||
|
||||
template <size_t... Ints>
|
||||
constexpr size_t to_kfd_bitmask(std::index_sequence<Ints...>)
|
||||
{
|
||||
return bitmask(std::index_sequence<uvm_event_info<Ints>::kfd_event...>());
|
||||
}
|
||||
|
||||
template <rocprofiler_page_migration_operation_t... Ops>
|
||||
constexpr size_t to_uvm_bitmask(std::index_sequence<Ops...>)
|
||||
{
|
||||
return bitmask(std::index_sequence<static_cast<uint32_t>(Ops)...>());
|
||||
}
|
||||
|
||||
template <size_t RocprofOpIdx, size_t UvmOpIdx>
|
||||
constexpr bool
|
||||
is_rocprof_uvm_map()
|
||||
{
|
||||
return page_migration_info<RocprofOpIdx>::uvm_bitmask & bitmask(UvmOpIdx);
|
||||
}
|
||||
|
||||
template <size_t RocprofOpIdx, size_t OpInx, size_t... OpInxs>
|
||||
constexpr bool
|
||||
_is_rocprof_uvm_map(size_t uvm_event, std::index_sequence<OpInx, OpInxs...>)
|
||||
{
|
||||
if(OpInx == uvm_event)
|
||||
return is_rocprof_uvm_map<RocprofOpIdx, OpInx>();
|
||||
else if constexpr(sizeof...(OpInxs) > 0)
|
||||
return _is_rocprof_uvm_map<RocprofOpIdx>(uvm_event, std::index_sequence<OpInxs...>{});
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
template <size_t RocprofOpIdx>
|
||||
constexpr bool
|
||||
is_rocprof_uvm_map(size_t uvm_event)
|
||||
{
|
||||
return _is_rocprof_uvm_map<RocprofOpIdx>(
|
||||
uvm_event, std::make_index_sequence<ROCPROFILER_UVM_EVENT_LAST>{});
|
||||
}
|
||||
} // namespace page_migration
|
||||
} // namespace rocprofiler
|
||||
@@ -38,6 +38,7 @@
|
||||
#include "lib/rocprofiler-sdk/intercept_table.hpp"
|
||||
#include "lib/rocprofiler-sdk/internal_threading.hpp"
|
||||
#include "lib/rocprofiler-sdk/marker/marker.hpp"
|
||||
#include "lib/rocprofiler-sdk/page_migration/page_migration.hpp"
|
||||
|
||||
#include <rocprofiler-sdk/context.h>
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
@@ -600,6 +601,7 @@ finalize()
|
||||
set_fini_status(-1);
|
||||
hsa::async_copy_fini();
|
||||
hsa::queue_controller_fini();
|
||||
page_migration::finalize();
|
||||
hsa::code_object_shutdown();
|
||||
if(get_init_status() > 0)
|
||||
{
|
||||
|
||||
@@ -87,6 +87,8 @@ ROCPROFILER_STATUS_STRING(ROCPROFILER_STATUS_ERROR_AST_NOT_FOUND, "AST was not f
|
||||
ROCPROFILER_STATUS_STRING(
|
||||
ROCPROFILER_STATUS_ERROR_AQL_NO_EVENT_COORD,
|
||||
"AQL Profiler was not able to find event coordinates for defined counters")
|
||||
ROCPROFILER_STATUS_STRING(ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_KERNEL,
|
||||
"A service depends on a newer version of KFD (amdgpu kernel driver)")
|
||||
template <size_t Idx, size_t... Tail>
|
||||
const char*
|
||||
get_status_name(rocprofiler_status_t status, std::index_sequence<Idx, Tail...>)
|
||||
|
||||
@@ -36,8 +36,9 @@ set_tests_properties(${lib_TESTS} PROPERTIES TIMEOUT 30 LABELS "unittests")
|
||||
#
|
||||
# -------------------------------------------------------------------------------------- #
|
||||
|
||||
set(rocprofiler_shared_lib_sources external_correlation.cpp intercept_table.cpp
|
||||
registration.cpp roctx.cpp status.cpp)
|
||||
set(rocprofiler_shared_lib_sources
|
||||
external_correlation.cpp intercept_table.cpp page_migration.cpp registration.cpp
|
||||
roctx.cpp status.cpp)
|
||||
|
||||
add_executable(rocprofiler-lib-tests-shared)
|
||||
target_sources(rocprofiler-lib-tests-shared PRIVATE ${rocprofiler_shared_lib_sources})
|
||||
|
||||
@@ -0,0 +1,177 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#include "lib/common/defines.hpp"
|
||||
#include "lib/rocprofiler-sdk/page_migration/details/kfd_ioctl.h"
|
||||
#include "lib/rocprofiler-sdk/page_migration/utils.hpp"
|
||||
|
||||
#include <rocprofiler-sdk/fwd.h>
|
||||
#include <rocprofiler-sdk/rocprofiler.h>
|
||||
|
||||
#include <fmt/format.h>
|
||||
#include <glog/logging.h>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <sstream>
|
||||
#include <string_view>
|
||||
|
||||
#define ROCPROFILER_LIB_ROCPROFILER_SDK_PAGE_MIGRATION_PAGE_MIGRATION_CPP_IMPL 1
|
||||
#include "lib/rocprofiler-sdk/page_migration/page_migration.def.cpp"
|
||||
#undef ROCPROFILER_LIB_ROCPROFILER_SDK_PAGE_MIGRATION_PAGE_MIGRATION_CPP_IMPL
|
||||
|
||||
#define ASSERT_SAME(A, B) static_assert(static_cast<size_t>(A) == static_cast<size_t>(B))
|
||||
|
||||
namespace
|
||||
{
|
||||
constexpr std::string_view MULTILINE_STRING = "This is 0 Line 0\n"
|
||||
"This is 10 Line 1\n"
|
||||
"This is 20 Line 2\n"
|
||||
"This is 30 Line 3\n"
|
||||
"This is 40 Line 4\n";
|
||||
}
|
||||
|
||||
void
|
||||
return_line(const std::string_view line)
|
||||
{
|
||||
static int line_no = 0;
|
||||
std::stringstream strs{};
|
||||
strs << fmt::format("This is {} Line {}", line_no * 10, line_no);
|
||||
EXPECT_EQ(strs.str(), line);
|
||||
line_no++;
|
||||
}
|
||||
|
||||
auto
|
||||
parse_lines()
|
||||
{
|
||||
KFD_EVENT_PARSE_EVENTS(MULTILINE_STRING, return_line);
|
||||
}
|
||||
|
||||
TEST(page_migration, readlines)
|
||||
{
|
||||
// Ensure all lines are read
|
||||
parse_lines();
|
||||
}
|
||||
|
||||
TEST(page_migration, parse_kvm_events)
|
||||
{
|
||||
// Ensure all lines are read
|
||||
parse_lines();
|
||||
}
|
||||
|
||||
TEST(page_migtation, rocprof_kfd_map)
|
||||
{
|
||||
using namespace ::rocprofiler::page_migration;
|
||||
|
||||
// clang-format off
|
||||
static_assert( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT, ROCPROFILER_UVM_EVENT_PAGE_FAULT_START >() );
|
||||
static_assert( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT, ROCPROFILER_UVM_EVENT_PAGE_FAULT_END >() );
|
||||
static_assert( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE, ROCPROFILER_UVM_EVENT_MIGRATE_START >() );
|
||||
static_assert( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE, ROCPROFILER_UVM_EVENT_MIGRATE_END >() );
|
||||
static_assert( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND, ROCPROFILER_UVM_EVENT_QUEUE_EVICTION >() );
|
||||
static_assert( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND, ROCPROFILER_UVM_EVENT_QUEUE_RESTORE >() );
|
||||
static_assert( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU, ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU >() );
|
||||
|
||||
EXPECT_TRUE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT >(ROCPROFILER_UVM_EVENT_PAGE_FAULT_START) );
|
||||
EXPECT_TRUE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT >(ROCPROFILER_UVM_EVENT_PAGE_FAULT_END ) );
|
||||
EXPECT_TRUE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE >(ROCPROFILER_UVM_EVENT_MIGRATE_START ) );
|
||||
EXPECT_TRUE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE >(ROCPROFILER_UVM_EVENT_MIGRATE_END ) );
|
||||
EXPECT_TRUE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND >(ROCPROFILER_UVM_EVENT_QUEUE_EVICTION ) );
|
||||
EXPECT_TRUE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND >(ROCPROFILER_UVM_EVENT_QUEUE_RESTORE ) );
|
||||
EXPECT_TRUE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU >(ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU ) );
|
||||
|
||||
|
||||
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE, ROCPROFILER_UVM_EVENT_QUEUE_EVICTION >() );
|
||||
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE, ROCPROFILER_UVM_EVENT_QUEUE_RESTORE >() );
|
||||
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE, ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU >() );
|
||||
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE, ROCPROFILER_UVM_EVENT_PAGE_FAULT_START >() );
|
||||
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE, ROCPROFILER_UVM_EVENT_PAGE_FAULT_END >() );
|
||||
|
||||
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT, ROCPROFILER_UVM_EVENT_MIGRATE_START >() );
|
||||
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT, ROCPROFILER_UVM_EVENT_MIGRATE_END >() );
|
||||
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT, ROCPROFILER_UVM_EVENT_QUEUE_EVICTION >() );
|
||||
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT, ROCPROFILER_UVM_EVENT_QUEUE_RESTORE >() );
|
||||
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT, ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU >() );
|
||||
|
||||
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND, ROCPROFILER_UVM_EVENT_MIGRATE_START >() );
|
||||
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND, ROCPROFILER_UVM_EVENT_MIGRATE_END >() );
|
||||
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND, ROCPROFILER_UVM_EVENT_PAGE_FAULT_START >() );
|
||||
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND, ROCPROFILER_UVM_EVENT_PAGE_FAULT_END >() );
|
||||
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND, ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU >() );
|
||||
|
||||
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU, ROCPROFILER_UVM_EVENT_MIGRATE_START >() );
|
||||
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU, ROCPROFILER_UVM_EVENT_MIGRATE_END >() );
|
||||
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU, ROCPROFILER_UVM_EVENT_PAGE_FAULT_START >() );
|
||||
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU, ROCPROFILER_UVM_EVENT_PAGE_FAULT_END >() );
|
||||
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU, ROCPROFILER_UVM_EVENT_QUEUE_EVICTION >() );
|
||||
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU, ROCPROFILER_UVM_EVENT_QUEUE_RESTORE >() );
|
||||
|
||||
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE >(ROCPROFILER_UVM_EVENT_QUEUE_EVICTION ) );
|
||||
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE >(ROCPROFILER_UVM_EVENT_QUEUE_RESTORE ) );
|
||||
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE >(ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU ) );
|
||||
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE >(ROCPROFILER_UVM_EVENT_PAGE_FAULT_START) );
|
||||
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE >(ROCPROFILER_UVM_EVENT_PAGE_FAULT_END ) );
|
||||
|
||||
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT >(ROCPROFILER_UVM_EVENT_MIGRATE_START ) );
|
||||
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT >(ROCPROFILER_UVM_EVENT_MIGRATE_END ) );
|
||||
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT >(ROCPROFILER_UVM_EVENT_QUEUE_EVICTION ) );
|
||||
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT >(ROCPROFILER_UVM_EVENT_QUEUE_RESTORE ) );
|
||||
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT >(ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU ) );
|
||||
|
||||
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND >(ROCPROFILER_UVM_EVENT_MIGRATE_START ) );
|
||||
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND >(ROCPROFILER_UVM_EVENT_MIGRATE_END ) );
|
||||
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND >(ROCPROFILER_UVM_EVENT_PAGE_FAULT_START) );
|
||||
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND >(ROCPROFILER_UVM_EVENT_PAGE_FAULT_END ) );
|
||||
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND >(ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU ) );
|
||||
|
||||
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU >(ROCPROFILER_UVM_EVENT_MIGRATE_START ) );
|
||||
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU >(ROCPROFILER_UVM_EVENT_MIGRATE_END ) );
|
||||
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU >(ROCPROFILER_UVM_EVENT_PAGE_FAULT_START) );
|
||||
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU >(ROCPROFILER_UVM_EVENT_PAGE_FAULT_END ) );
|
||||
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU >(ROCPROFILER_UVM_EVENT_QUEUE_EVICTION ) );
|
||||
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU >(ROCPROFILER_UVM_EVENT_QUEUE_RESTORE ) );
|
||||
|
||||
static_assert(to_rocprof_op(ROCPROFILER_UVM_EVENT_MIGRATE_START) == ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE );
|
||||
static_assert(to_rocprof_op(ROCPROFILER_UVM_EVENT_PAGE_FAULT_END) == ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT );
|
||||
static_assert(to_rocprof_op(ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU) == ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU );
|
||||
static_assert(to_rocprof_op(ROCPROFILER_UVM_EVENT_QUEUE_EVICTION) == ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND );
|
||||
|
||||
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_TRIGGER_PREFETCH, KFD_MIGRATE_TRIGGER_PREFETCH );
|
||||
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_TRIGGER_PAGEFAULT_GPU, KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU );
|
||||
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_TRIGGER_PAGEFAULT_CPU, KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU );
|
||||
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_TRIGGER_TTM_EVICTION, KFD_MIGRATE_TRIGGER_TTM_EVICTION );
|
||||
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_SVM, KFD_QUEUE_EVICTION_TRIGGER_SVM );
|
||||
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_USERPTR, KFD_QUEUE_EVICTION_TRIGGER_USERPTR );
|
||||
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_TTM, KFD_QUEUE_EVICTION_TRIGGER_TTM );
|
||||
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_SUSPEND, KFD_QUEUE_EVICTION_TRIGGER_SUSPEND );
|
||||
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_CRIU_CHECKPOINT, KFD_QUEUE_EVICTION_CRIU_CHECKPOINT );
|
||||
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_CRIU_RESTORE, KFD_QUEUE_EVICTION_CRIU_RESTORE );
|
||||
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_MMU_NOTIFY, KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY );
|
||||
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_MMU_NOTIFY_MIGRATE, KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY_MIGRATE);
|
||||
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_UNMAP_FROM_CPU, KFD_SVM_UNMAP_TRIGGER_UNMAP_FROM_CPU );
|
||||
|
||||
static_assert(to_kfd_bitmask(std::index_sequence<
|
||||
ROCPROFILER_UVM_EVENT_PAGE_FAULT_START, ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU>()) ==
|
||||
(KFD_SMI_EVENT_MASK_FROM_INDEX(KFD_SMI_EVENT_PAGE_FAULT_START)
|
||||
| KFD_SMI_EVENT_MASK_FROM_INDEX(KFD_SMI_EVENT_UNMAP_FROM_GPU)));
|
||||
|
||||
// clang-format on
|
||||
}
|
||||
Normální soubor → Spustitelný soubor
@@ -1,3 +1,5 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
|
||||
@@ -51,6 +51,7 @@ add_subdirectory(kernel-tracing)
|
||||
add_subdirectory(async-copy-tracing)
|
||||
add_subdirectory(scratch-memory-tracing)
|
||||
add_subdirectory(c-tool)
|
||||
add_subdirectory(page-migration)
|
||||
|
||||
# rocprofv3 validation tests
|
||||
add_subdirectory(rocprofv3)
|
||||
|
||||
@@ -21,4 +21,5 @@ add_subdirectory(multistream)
|
||||
add_subdirectory(vector-operations)
|
||||
add_subdirectory(hip-in-libraries)
|
||||
add_subdirectory(scratch-memory)
|
||||
add_subdirectory(page-migration)
|
||||
add_subdirectory(hsa-queue-dependency)
|
||||
|
||||
@@ -0,0 +1,47 @@
|
||||
#
|
||||
#
|
||||
#
|
||||
cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR)
|
||||
|
||||
if(NOT CMAKE_HIP_COMPILER)
|
||||
find_program(
|
||||
amdclangpp_EXECUTABLE
|
||||
NAMES amdclang++
|
||||
HINTS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm
|
||||
PATHS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm
|
||||
PATH_SUFFIXES bin llvm/bin NO_CACHE)
|
||||
mark_as_advanced(amdclangpp_EXECUTABLE)
|
||||
|
||||
if(amdclangpp_EXECUTABLE)
|
||||
set(CMAKE_HIP_COMPILER "${amdclangpp_EXECUTABLE}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
project(rocprofiler-tests-bin-page-migration LANGUAGES CXX HIP)
|
||||
|
||||
foreach(_TYPE DEBUG MINSIZEREL RELEASE RELWITHDEBINFO)
|
||||
if("${CMAKE_HIP_FLAGS_${_TYPE}}" STREQUAL "")
|
||||
set(CMAKE_HIP_FLAGS_${_TYPE} "${CMAKE_CXX_FLAGS_${_TYPE}}")
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CXX_EXTENSIONS OFF)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
set(CMAKE_HIP_STANDARD 17)
|
||||
set(CMAKE_HIP_EXTENSIONS OFF)
|
||||
set(CMAKE_HIP_STANDARD_REQUIRED ON)
|
||||
|
||||
set_source_files_properties(page-migration.cpp PROPERTIES LANGUAGE HIP)
|
||||
add_executable(page-migration)
|
||||
target_sources(page-migration PRIVATE page-migration.cpp)
|
||||
target_compile_options(page-migration PRIVATE -W -Wall -Wextra -Wpedantic -Wshadow
|
||||
-Werror)
|
||||
|
||||
find_package(Threads REQUIRED)
|
||||
target_link_libraries(page-migration PRIVATE Threads::Threads)
|
||||
|
||||
install(
|
||||
TARGETS page-migration
|
||||
DESTINATION bin
|
||||
COMPONENT tests)
|
||||
@@ -0,0 +1,222 @@
|
||||
// MIT License
|
||||
//
|
||||
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
#include <hip/hip_runtime.h>
|
||||
|
||||
#include <cerrno>
|
||||
#include <cstddef>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <mutex>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <sys/mman.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#define HIP_API_CALL(CALL) \
|
||||
{ \
|
||||
hipError_t error_ = (CALL); \
|
||||
if(error_ != hipSuccess) \
|
||||
{ \
|
||||
auto _hip_api_print_lk = auto_lock_t{print_lock}; \
|
||||
fprintf(stderr, \
|
||||
"%s:%d :: HIP error : %s\n", \
|
||||
__FILE__, \
|
||||
__LINE__, \
|
||||
hipGetErrorString(error_)); \
|
||||
throw std::runtime_error("hip_api_call"); \
|
||||
} \
|
||||
}
|
||||
|
||||
using auto_lock_t = std::unique_lock<std::mutex>;
|
||||
auto print_lock = std::mutex{};
|
||||
|
||||
__global__ void
|
||||
kernel(size_t* __restrict__ data, int size)
|
||||
{
|
||||
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
|
||||
int stride = hipBlockDim_x * hipGridDim_x;
|
||||
|
||||
for(int i = x; i < size; i += stride)
|
||||
{
|
||||
data[i] *= 2;
|
||||
}
|
||||
}
|
||||
|
||||
struct mmap_allocator
|
||||
{
|
||||
explicit mmap_allocator(size_t num_pages)
|
||||
{
|
||||
m_size = num_pages * sysconf(_SC_PAGE_SIZE);
|
||||
void* ret = mmap(nullptr, // addr: null. Kernel gives us page-aligned memory
|
||||
m_size, // length: num bytes to allocate
|
||||
PROT_WRITE | PROT_READ, // mem_prot: Allow read/write
|
||||
MAP_ANONYMOUS | MAP_PRIVATE, // flags: No file handle
|
||||
-1, // no fd, use memory "MAP_ANONYMOUS"
|
||||
0); // offset into fd
|
||||
if(ret == ((void*) -1)) // NOLINT performance-no-int-to-ptr
|
||||
{
|
||||
auto ecode = errno;
|
||||
fprintf(stderr, "mmap error %d: %s", ecode, strerror(ecode));
|
||||
throw std::runtime_error("mmap failed");
|
||||
}
|
||||
else
|
||||
{
|
||||
m_addr = ret;
|
||||
::memset(m_addr, 0, m_size);
|
||||
}
|
||||
}
|
||||
|
||||
~mmap_allocator()
|
||||
{
|
||||
auto ret = munmap(m_addr, m_size);
|
||||
if(ret != 0) perror("munmap failed");
|
||||
}
|
||||
|
||||
mmap_allocator(const mmap_allocator&) = delete;
|
||||
mmap_allocator(mmap_allocator&&) noexcept = default;
|
||||
mmap_allocator& operator=(const mmap_allocator&) = delete;
|
||||
mmap_allocator& operator=(mmap_allocator&&) noexcept = default;
|
||||
|
||||
template <typename Up>
|
||||
Up* get() const
|
||||
{
|
||||
static_assert(!std::is_pointer<Up>::value, "must not be pointer type");
|
||||
return static_cast<Up*>(m_addr);
|
||||
}
|
||||
|
||||
private:
|
||||
size_t m_size = 0;
|
||||
void* m_addr = nullptr;
|
||||
};
|
||||
|
||||
int
|
||||
main()
|
||||
{
|
||||
using namespace std::chrono_literals;
|
||||
|
||||
static constexpr auto NUM_PAGES = 16;
|
||||
const auto PAGE_SIZE_BYTES = ::sysconf(_SC_PAGE_SIZE);
|
||||
|
||||
size_t elem_count = (NUM_PAGES * PAGE_SIZE_BYTES) / sizeof(size_t); // one page?
|
||||
|
||||
auto alloc = mmap_allocator(NUM_PAGES);
|
||||
void* data_v = alloc.get<void>();
|
||||
auto* data = alloc.get<size_t>();
|
||||
|
||||
for(size_t i = 0; i < elem_count; ++i)
|
||||
if(data[i] != 0) throw std::runtime_error{"bad init"};
|
||||
|
||||
printf("Allocated size: %lu bytes (%lu KB), (%lu MB), %zu elements @ %p\n",
|
||||
sizeof(size_t) * elem_count,
|
||||
sizeof(size_t) * elem_count / 1024,
|
||||
sizeof(size_t) * elem_count / 1024 / 1024,
|
||||
elem_count,
|
||||
data_v);
|
||||
|
||||
HIP_API_CALL(hipHostRegister(data, elem_count * sizeof(size_t), hipHostRegisterDefault));
|
||||
|
||||
char maps[1024 * 1024];
|
||||
std::memset(maps, '\0', 1024 * 1024);
|
||||
auto fd = open("/proc/self/maps", O_RDONLY | O_CLOEXEC);
|
||||
|
||||
if(fd == -1)
|
||||
{
|
||||
auto ecode = errno;
|
||||
fprintf(stderr, "mmap error %d: %s", ecode, strerror(ecode));
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
auto bytes = read(fd, maps, 1024 * 1024 - 1);
|
||||
if(bytes == -1)
|
||||
{
|
||||
auto ecode = errno;
|
||||
fprintf(stderr, "mmap error %d: %s", ecode, strerror(ecode));
|
||||
exit(-1);
|
||||
}
|
||||
close(fd);
|
||||
|
||||
std::string_view maps_data{maps, static_cast<size_t>(bytes)};
|
||||
std::cout << "------------\n";
|
||||
std::cout << maps_data;
|
||||
std::cout << "------------\n";
|
||||
std::istringstream maps_stream{maps_data.data()};
|
||||
std::string line(1024, '\0');
|
||||
|
||||
while(std::getline(maps_stream, line))
|
||||
{
|
||||
char __[1024];
|
||||
int _{};
|
||||
|
||||
void* start{};
|
||||
void* end{};
|
||||
|
||||
auto ret =
|
||||
std::sscanf(line.data(), "%p-%p %s %d %d:%d %d\n", &start, &end, __, &_, &_, &_, &_);
|
||||
if(ret > 0 && (start == data_v))
|
||||
{
|
||||
size_t ptr_diff = ((size_t) end - (size_t) start);
|
||||
printf("Found match: %zu %zu KB, %zu 4K > %s\n",
|
||||
ptr_diff,
|
||||
ptr_diff / 1024,
|
||||
ptr_diff / 4096,
|
||||
line.data());
|
||||
}
|
||||
}
|
||||
|
||||
for(int iter = 0; iter < 1000; ++iter)
|
||||
{
|
||||
for(size_t i = 0; i < elem_count; ++i)
|
||||
data[i] = i;
|
||||
|
||||
// std::cout << "launching..." << std::endl;
|
||||
hipLaunchKernelGGL(kernel, 128, 64, 0, 0, data, elem_count);
|
||||
|
||||
// std::cout << "syncing..." << std::endl;
|
||||
HIP_API_CALL(hipDeviceSynchronize());
|
||||
|
||||
// std::cout << "checking..." << std::endl;
|
||||
for(size_t i = 0; i < elem_count; ++i)
|
||||
{
|
||||
if(data[i] != (i * 2))
|
||||
{
|
||||
auto msg = std::stringstream{};
|
||||
msg << "GPU computed value at " << i << " in iteration " << iter
|
||||
<< " is incorrect. Expected " << (i * 2) << ", found " << data[i];
|
||||
throw std::runtime_error{msg.str()};
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Iteration " << std::setw(2) << iter << ": correct\n" << std::flush;
|
||||
}
|
||||
|
||||
HIP_API_CALL(hipDeviceSynchronize());
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -33,8 +33,8 @@
|
||||
<< " failed with error code " << status_name << " (" << CHECKSTATUS \
|
||||
<< "): " << status_msg << std::endl; \
|
||||
std::stringstream errmsg{}; \
|
||||
errmsg << "[" #result "][" << __FILE__ << ":" << __LINE__ << "] " << msg " failure (" \
|
||||
<< status_name << ": " << status_msg << ")"; \
|
||||
errmsg << "[" #result "][" << __FILE__ << ":" << __LINE__ << "] " << msg \
|
||||
<< " failure (" << status_name << ": " << status_msg << ")"; \
|
||||
throw std::runtime_error(errmsg.str()); \
|
||||
} \
|
||||
}
|
||||
|
||||
@@ -391,6 +391,90 @@ save(ArchiveT& ar, rocprofiler_buffer_tracing_memory_copy_record_t data)
|
||||
SAVE_DATA_FIELD(src_agent_id);
|
||||
}
|
||||
|
||||
template <typename ArchiveT>
|
||||
void
|
||||
save(ArchiveT& ar, const rocprofiler_buffer_tracing_page_migration_record_t& data)
|
||||
{
|
||||
SAVE_DATA_FIELD(size);
|
||||
SAVE_DATA_FIELD(kind);
|
||||
SAVE_DATA_FIELD(operation);
|
||||
SAVE_DATA_FIELD(start_timestamp);
|
||||
SAVE_DATA_FIELD(end_timestamp);
|
||||
SAVE_DATA_FIELD(pid);
|
||||
|
||||
switch(data.operation)
|
||||
{
|
||||
case ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT:
|
||||
{
|
||||
ar(make_nvp("page_fault", data.page_fault));
|
||||
break;
|
||||
}
|
||||
case ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE:
|
||||
{
|
||||
ar(make_nvp("page_migrate", data.page_migrate));
|
||||
break;
|
||||
}
|
||||
case ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND:
|
||||
{
|
||||
ar(make_nvp("queue_suspend", data.queue_suspend));
|
||||
break;
|
||||
}
|
||||
case ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU:
|
||||
{
|
||||
ar(make_nvp("unmap_from_gpu", data.unmap_from_gpu));
|
||||
break;
|
||||
}
|
||||
case ROCPROFILER_PAGE_MIGRATION_NONE:
|
||||
case ROCPROFILER_PAGE_MIGRATION_LAST:
|
||||
{
|
||||
throw std::runtime_error{"unsupported page migration operation type"};
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename ArchiveT>
|
||||
void
|
||||
save(ArchiveT& ar, const rocprofiler_buffer_tracing_page_migration_page_fault_record_t& data)
|
||||
{
|
||||
SAVE_DATA_FIELD(node_id);
|
||||
SAVE_DATA_FIELD(address);
|
||||
SAVE_DATA_FIELD(read_fault);
|
||||
SAVE_DATA_FIELD(migrated);
|
||||
}
|
||||
|
||||
template <typename ArchiveT>
|
||||
void
|
||||
save(ArchiveT& ar, const rocprofiler_buffer_tracing_page_migration_page_migrate_record_t& data)
|
||||
{
|
||||
SAVE_DATA_FIELD(start_addr);
|
||||
SAVE_DATA_FIELD(end_addr);
|
||||
SAVE_DATA_FIELD(from_node);
|
||||
SAVE_DATA_FIELD(to_node);
|
||||
SAVE_DATA_FIELD(prefetch_node);
|
||||
SAVE_DATA_FIELD(preferred_node);
|
||||
SAVE_DATA_FIELD(trigger);
|
||||
}
|
||||
|
||||
template <typename ArchiveT>
|
||||
void
|
||||
save(ArchiveT& ar, const rocprofiler_buffer_tracing_page_migration_queue_suspend_record_t& data)
|
||||
{
|
||||
SAVE_DATA_FIELD(node_id);
|
||||
SAVE_DATA_FIELD(trigger);
|
||||
SAVE_DATA_FIELD(rescheduled);
|
||||
}
|
||||
|
||||
template <typename ArchiveT>
|
||||
void
|
||||
save(ArchiveT& ar, const rocprofiler_buffer_tracing_page_migration_unmap_from_gpu_record_t& data)
|
||||
{
|
||||
SAVE_DATA_FIELD(node_id);
|
||||
SAVE_DATA_FIELD(start_addr);
|
||||
SAVE_DATA_FIELD(end_addr);
|
||||
SAVE_DATA_FIELD(trigger);
|
||||
}
|
||||
|
||||
template <typename ArchiveT>
|
||||
void
|
||||
save(ArchiveT& ar, rocprofiler_buffer_tracing_scratch_memory_record_t data)
|
||||
|
||||
@@ -0,0 +1,65 @@
|
||||
#
|
||||
#
|
||||
#
|
||||
cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR)
|
||||
|
||||
project(
|
||||
rocprofiler-tests-page-migration
|
||||
LANGUAGES CXX
|
||||
VERSION 0.0.0)
|
||||
|
||||
find_package(rocprofiler-sdk REQUIRED)
|
||||
|
||||
if(ROCPROFILER_MEMCHECK_PRELOAD_ENV)
|
||||
set(PRELOAD_ENV
|
||||
"${ROCPROFILER_MEMCHECK_PRELOAD_ENV}:$<TARGET_FILE:rocprofiler-sdk-json-tool>")
|
||||
else()
|
||||
set(PRELOAD_ENV "LD_PRELOAD=$<TARGET_FILE:rocprofiler-sdk-json-tool>")
|
||||
endif()
|
||||
|
||||
add_test(NAME test-page-migration-execute COMMAND $<TARGET_FILE:page-migration>)
|
||||
|
||||
set(page-migration-env
|
||||
"${PRELOAD_ENV}"
|
||||
"ROCPROFILER_TOOL_OUTPUT_FILE=page-migration-test.json"
|
||||
"LD_LIBRARY_PATH=$<TARGET_FILE_DIR:rocprofiler::rocprofiler-shared-library>:$ENV{LD_LIBRARY_PATH}"
|
||||
)
|
||||
|
||||
set_tests_properties(
|
||||
test-page-migration-execute
|
||||
PROPERTIES TIMEOUT
|
||||
45
|
||||
LABELS
|
||||
"integration-tests"
|
||||
ENVIRONMENT
|
||||
"${page-migration-env}"
|
||||
FAIL_REGULAR_EXPRESSION
|
||||
"${ROCPROFILER_DEFAULT_FAIL_REGEX}"
|
||||
SKIP_REGULAR_EXPRESSION
|
||||
"KFD does not support SVM event reporting"
|
||||
WORKING_DIRECTORY
|
||||
${CMAKE_CURRENT_BINARY_DIR})
|
||||
|
||||
foreach(FILENAME validate.py pytest.ini conftest.py)
|
||||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/${FILENAME}
|
||||
${CMAKE_CURRENT_BINARY_DIR}/${FILENAME} COPYONLY)
|
||||
endforeach()
|
||||
|
||||
add_test(NAME test-page-migration-validate
|
||||
COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/validate.py --input
|
||||
${CMAKE_CURRENT_BINARY_DIR}/page-migration-test.json)
|
||||
|
||||
set_tests_properties(
|
||||
test-page-migration-validate
|
||||
PROPERTIES TIMEOUT
|
||||
45
|
||||
LABELS
|
||||
"integration-tests"
|
||||
DEPENDS
|
||||
test-page-migration-execute
|
||||
FAIL_REGULAR_EXPRESSION
|
||||
"${ROCPROFILER_DEFAULT_FAIL_REGEX}"
|
||||
SKIP_REGULAR_EXPRESSION
|
||||
"KFD does not support SVM event reporting"
|
||||
WORKING_DIRECTORY
|
||||
${CMAKE_CURRENT_BINARY_DIR})
|
||||
@@ -0,0 +1,27 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import json
|
||||
import pytest
|
||||
|
||||
|
||||
def pytest_addoption(parser):
|
||||
parser.addoption(
|
||||
"--input",
|
||||
action="store",
|
||||
default="page-migration-test.json",
|
||||
help="Input JSON",
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def input_data(request):
|
||||
filename = request.config.getoption("--input")
|
||||
data = None
|
||||
with open(filename, "r") as inp:
|
||||
data = json.load(inp)
|
||||
|
||||
if data["rocprofiler-sdk-json-tool"]["metadata"]["validate_page_migration"] is False:
|
||||
return pytest.skip(
|
||||
"Skipping test because KFD does not support SVM event reporting"
|
||||
)
|
||||
return data
|
||||
@@ -0,0 +1,4 @@
|
||||
|
||||
[pytest]
|
||||
addopts = --durations=20 -rA -s -vv
|
||||
testpaths = validate.py
|
||||
@@ -0,0 +1,344 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
from collections import defaultdict
|
||||
import os
|
||||
import sys
|
||||
import pytest
|
||||
|
||||
|
||||
# helper function
|
||||
def node_exists(name, data, min_len=1):
|
||||
assert name in data
|
||||
assert data[name] is not None
|
||||
if isinstance(data[name], (list, tuple, dict, set)):
|
||||
assert len(data[name]) >= min_len
|
||||
|
||||
|
||||
def to_dict(key_values):
|
||||
a = defaultdict()
|
||||
for kv in key_values:
|
||||
a[kv["key"]] = kv["value"]
|
||||
return a
|
||||
|
||||
|
||||
def op_name(op_name, record):
|
||||
found_op = False
|
||||
op_key = None
|
||||
|
||||
for kind_node in record["names"]["kind_names"]:
|
||||
if kind_node["value"] == op_name:
|
||||
op_key = kind_node["key"]
|
||||
|
||||
for op_node in record["names"]["operation_names"]:
|
||||
if op_node["key"] == op_key:
|
||||
return op_key, to_dict(op_node["value"])
|
||||
|
||||
|
||||
def dict_from_value_key(d):
|
||||
ret_d = defaultdict()
|
||||
|
||||
for k, v in d.items():
|
||||
assert v not in ret_d
|
||||
ret_d[v] = k
|
||||
return ret_d
|
||||
|
||||
|
||||
def sort_by_timestamp(lines):
|
||||
timestamp_line_map = {}
|
||||
|
||||
for log_line in lines:
|
||||
timestamp = log_line.split(" ")[1]
|
||||
timestamp_line_map[timestamp] = log_line
|
||||
|
||||
timestamps_sorted = sorted([l.split(" ")[1] for l in lines])
|
||||
return timestamps_sorted, timestamp_line_map
|
||||
|
||||
|
||||
# ------------------------------ Tests ------------------------------ #
|
||||
|
||||
|
||||
def test_data_structure(input_data):
|
||||
"""verify minimum amount of expected data is present"""
|
||||
data = input_data
|
||||
|
||||
node_exists("rocprofiler-sdk-json-tool", data)
|
||||
|
||||
sdk_data = data["rocprofiler-sdk-json-tool"]
|
||||
|
||||
node_exists("metadata", sdk_data)
|
||||
node_exists("pid", sdk_data["metadata"])
|
||||
node_exists("main_tid", sdk_data["metadata"])
|
||||
node_exists("init_time", sdk_data["metadata"])
|
||||
node_exists("fini_time", sdk_data["metadata"])
|
||||
node_exists("validate_page_migration", sdk_data["metadata"])
|
||||
|
||||
assert sdk_data["metadata"]["validate_page_migration"] is True
|
||||
|
||||
node_exists("agents", sdk_data)
|
||||
node_exists("call_stack", sdk_data)
|
||||
node_exists("callback_records", sdk_data)
|
||||
node_exists("buffer_records", sdk_data)
|
||||
|
||||
node_exists("names", sdk_data["callback_records"])
|
||||
node_exists("code_objects", sdk_data["callback_records"])
|
||||
node_exists("kernel_symbols", sdk_data["callback_records"])
|
||||
node_exists("hsa_api_traces", sdk_data["callback_records"])
|
||||
node_exists("hip_api_traces", sdk_data["callback_records"], 0)
|
||||
node_exists("marker_api_traces", sdk_data["callback_records"], 0)
|
||||
|
||||
node_exists("names", sdk_data["buffer_records"])
|
||||
node_exists("kernel_dispatches", sdk_data["buffer_records"])
|
||||
node_exists("memory_copies", sdk_data["buffer_records"], 0)
|
||||
node_exists("hsa_api_traces", sdk_data["buffer_records"])
|
||||
node_exists("hip_api_traces", sdk_data["buffer_records"], 0)
|
||||
node_exists("marker_api_traces", sdk_data["buffer_records"], 0)
|
||||
node_exists("retired_correlation_ids", sdk_data["buffer_records"])
|
||||
node_exists("page_migration", sdk_data["buffer_records"])
|
||||
|
||||
|
||||
def test_timestamps(input_data):
|
||||
data = input_data
|
||||
sdk_data = data["rocprofiler-sdk-json-tool"]
|
||||
|
||||
cb_start = {}
|
||||
cb_end = {}
|
||||
for titr in ["hsa_api_traces", "marker_api_traces", "hip_api_traces"]:
|
||||
for itr in sdk_data["callback_records"][titr]:
|
||||
cid = itr["record"]["correlation_id"]["internal"]
|
||||
phase = itr["record"]["phase"]
|
||||
if phase == 1:
|
||||
cb_start[cid] = itr["timestamp"]
|
||||
elif phase == 2:
|
||||
cb_end[cid] = itr["timestamp"]
|
||||
assert cb_start[cid] <= itr["timestamp"]
|
||||
else:
|
||||
assert phase == 1 or phase == 2
|
||||
|
||||
for itr in sdk_data["buffer_records"][titr]:
|
||||
assert itr["start_timestamp"] <= itr["end_timestamp"]
|
||||
|
||||
for titr in ["kernel_dispatches", "memory_copies"]:
|
||||
for itr in sdk_data["buffer_records"][titr]:
|
||||
assert itr["start_timestamp"] < itr["end_timestamp"]
|
||||
assert itr["correlation_id"]["internal"] > 0
|
||||
assert itr["correlation_id"]["external"] > 0
|
||||
assert sdk_data["metadata"]["init_time"] < itr["start_timestamp"]
|
||||
assert sdk_data["metadata"]["init_time"] < itr["end_timestamp"]
|
||||
assert sdk_data["metadata"]["fini_time"] > itr["start_timestamp"]
|
||||
assert sdk_data["metadata"]["fini_time"] > itr["end_timestamp"]
|
||||
|
||||
# api_start = cb_start[itr["correlation_id"]["internal"]]
|
||||
# api_end = cb_end[itr["correlation_id"]["internal"]]
|
||||
# assert api_start < itr["start_timestamp"]
|
||||
# assert api_end <= itr["end_timestamp"]
|
||||
|
||||
|
||||
def test_internal_correlation_ids(input_data):
|
||||
data = input_data
|
||||
sdk_data = data["rocprofiler-sdk-json-tool"]
|
||||
|
||||
api_corr_ids = []
|
||||
for titr in ["hsa_api_traces", "marker_api_traces", "hip_api_traces"]:
|
||||
for itr in sdk_data["callback_records"][titr]:
|
||||
api_corr_ids.append(itr["record"]["correlation_id"]["internal"])
|
||||
|
||||
for itr in sdk_data["buffer_records"][titr]:
|
||||
api_corr_ids.append(itr["correlation_id"]["internal"])
|
||||
|
||||
api_corr_ids_sorted = sorted(api_corr_ids)
|
||||
api_corr_ids_unique = list(set(api_corr_ids))
|
||||
|
||||
for itr in sdk_data["buffer_records"]["kernel_dispatches"]:
|
||||
assert itr["correlation_id"]["internal"] in api_corr_ids_unique
|
||||
|
||||
for itr in sdk_data["buffer_records"]["memory_copies"]:
|
||||
assert itr["correlation_id"]["internal"] in api_corr_ids_unique
|
||||
|
||||
len_corr_id_unq = len(api_corr_ids_unique)
|
||||
assert len(api_corr_ids) != len_corr_id_unq
|
||||
assert max(api_corr_ids_sorted) == len_corr_id_unq
|
||||
|
||||
|
||||
def test_external_correlation_ids(input_data):
|
||||
data = input_data
|
||||
sdk_data = data["rocprofiler-sdk-json-tool"]
|
||||
|
||||
extern_corr_ids = []
|
||||
for titr in ["hsa_api_traces", "marker_api_traces", "hip_api_traces"]:
|
||||
for itr in sdk_data["callback_records"][titr]:
|
||||
assert itr["record"]["correlation_id"]["external"] > 0
|
||||
assert (
|
||||
itr["record"]["thread_id"] == itr["record"]["correlation_id"]["external"]
|
||||
)
|
||||
extern_corr_ids.append(itr["record"]["correlation_id"]["external"])
|
||||
|
||||
extern_corr_ids = list(set(sorted(extern_corr_ids)))
|
||||
for titr in ["hsa_api_traces", "marker_api_traces", "hip_api_traces"]:
|
||||
for itr in sdk_data["buffer_records"][titr]:
|
||||
assert itr["correlation_id"]["external"] > 0
|
||||
assert itr["thread_id"] == itr["correlation_id"]["external"]
|
||||
assert itr["thread_id"] in extern_corr_ids
|
||||
assert itr["correlation_id"]["external"] in extern_corr_ids
|
||||
|
||||
for itr in sdk_data["buffer_records"]["kernel_dispatches"]:
|
||||
assert itr["correlation_id"]["external"] > 0
|
||||
assert itr["correlation_id"]["external"] in extern_corr_ids
|
||||
|
||||
for itr in sdk_data["buffer_records"]["memory_copies"]:
|
||||
assert itr["correlation_id"]["external"] > 0
|
||||
assert itr["correlation_id"]["external"] in extern_corr_ids
|
||||
|
||||
|
||||
def test_kernel_ids(input_data):
|
||||
data = input_data
|
||||
sdk_data = data["rocprofiler-sdk-json-tool"]
|
||||
|
||||
symbol_info = {}
|
||||
for itr in sdk_data["callback_records"]["kernel_symbols"]:
|
||||
phase = itr["record"]["phase"]
|
||||
payload = itr["payload"]
|
||||
kern_id = payload["kernel_id"]
|
||||
|
||||
assert phase == 1 or phase == 2
|
||||
assert kern_id > 0
|
||||
if phase == 1:
|
||||
assert len(payload["kernel_name"]) > 0
|
||||
symbol_info[kern_id] = payload
|
||||
elif phase == 2:
|
||||
assert payload["kernel_id"] in symbol_info.keys()
|
||||
assert payload["kernel_name"] == symbol_info[kern_id]["kernel_name"]
|
||||
|
||||
for itr in sdk_data["buffer_records"]["kernel_dispatches"]:
|
||||
assert itr["kernel_id"] in symbol_info.keys()
|
||||
|
||||
|
||||
def test_retired_correlation_ids(input_data):
|
||||
data = input_data
|
||||
sdk_data = data["rocprofiler-sdk-json-tool"]
|
||||
|
||||
def _sort_dict(inp):
|
||||
return dict(sorted(inp.items()))
|
||||
|
||||
api_corr_ids = {}
|
||||
for titr in ["hsa_api_traces", "marker_api_traces", "hip_api_traces"]:
|
||||
for itr in sdk_data["buffer_records"][titr]:
|
||||
corr_id = itr["correlation_id"]["internal"]
|
||||
assert corr_id not in api_corr_ids.keys()
|
||||
api_corr_ids[corr_id] = itr
|
||||
|
||||
async_corr_ids = {}
|
||||
for titr in ["kernel_dispatches", "memory_copies"]:
|
||||
for itr in sdk_data["buffer_records"][titr]:
|
||||
corr_id = itr["correlation_id"]["internal"]
|
||||
assert corr_id not in async_corr_ids.keys()
|
||||
async_corr_ids[corr_id] = itr
|
||||
|
||||
retired_corr_ids = {}
|
||||
for itr in sdk_data["buffer_records"]["retired_correlation_ids"]:
|
||||
corr_id = itr["internal_correlation_id"]
|
||||
assert corr_id not in retired_corr_ids.keys()
|
||||
retired_corr_ids[corr_id] = itr
|
||||
|
||||
api_corr_ids = _sort_dict(api_corr_ids)
|
||||
async_corr_ids = _sort_dict(async_corr_ids)
|
||||
retired_corr_ids = _sort_dict(retired_corr_ids)
|
||||
|
||||
for cid, itr in async_corr_ids.items():
|
||||
assert cid in retired_corr_ids.keys()
|
||||
ts = retired_corr_ids[cid]["timestamp"]
|
||||
assert (ts - itr["end_timestamp"]) > 0, f"correlation-id: {cid}, data: {itr}"
|
||||
|
||||
for cid, itr in api_corr_ids.items():
|
||||
assert cid in retired_corr_ids.keys()
|
||||
ts = retired_corr_ids[cid]["timestamp"]
|
||||
assert (ts - itr["end_timestamp"]) > 0, f"correlation-id: {cid}, data: {itr}"
|
||||
|
||||
assert len(api_corr_ids.keys()) == (len(retired_corr_ids.keys()))
|
||||
|
||||
|
||||
def get_allocated_pages(callback_records):
|
||||
# Get how many pages we allocated
|
||||
hip_api_traces = callback_records["hip_api_traces"]
|
||||
_, op_dict = op_name("HIP_RUNTIME_API", callback_records)
|
||||
op_key = [k for k, v in op_dict.items() if v == "hipHostRegister"][0]
|
||||
|
||||
host_register_record = []
|
||||
for r in hip_api_traces:
|
||||
if (
|
||||
r["record"]["operation"] == op_key
|
||||
and "sizeBytes" in r["args"]
|
||||
and "hostPtr" in r["args"]
|
||||
):
|
||||
host_register_record.append(r)
|
||||
|
||||
assert len(host_register_record) == 1
|
||||
alloc_size = int(host_register_record[0]["args"]["sizeBytes"], 10)
|
||||
start_addr = int(host_register_record[0]["args"]["hostPtr"], 16)
|
||||
end_addr = start_addr + alloc_size
|
||||
|
||||
return start_addr, end_addr, alloc_size
|
||||
|
||||
|
||||
def test_page_migration_data(input_data):
|
||||
data = input_data
|
||||
sdk_data = data["rocprofiler-sdk-json-tool"]
|
||||
buffer_records = sdk_data["buffer_records"]
|
||||
callback_records = sdk_data["callback_records"]
|
||||
page_migtation_buffers = buffer_records["page_migration"]
|
||||
|
||||
bf_op_id, bf_op_names = op_name("PAGE_MIGRATION", buffer_records)
|
||||
assert bf_op_names[0] == "NONE"
|
||||
assert "PAGE_MIGRATE" in str(bf_op_names)
|
||||
assert len(bf_op_names) == 5
|
||||
|
||||
node_ids = set(x["gpu_id"] for x in sdk_data["agents"])
|
||||
start_addr, end_addr, alloc_size = get_allocated_pages(callback_records)
|
||||
|
||||
assert start_addr < end_addr and start_addr + alloc_size == end_addr
|
||||
assert int(alloc_size) == 16 * 4096 # We allocated 16 pages in the test
|
||||
|
||||
# PID must be same
|
||||
assert len(set(r["pid"] for r in page_migtation_buffers)) == 1
|
||||
|
||||
for r in page_migtation_buffers:
|
||||
op = r["operation"]
|
||||
|
||||
assert r["size"] == 136
|
||||
assert r["kind"] == bf_op_id
|
||||
assert op != 0 and bf_op_names[op] != "NONE"
|
||||
assert bf_op_names[op].lower() in r
|
||||
|
||||
if "page_migrate" in r:
|
||||
assert r["page_migrate"]["from_node"] in node_ids
|
||||
assert r["page_migrate"]["to_node"] in node_ids
|
||||
assert r["page_migrate"]["prefetch_node"] in node_ids
|
||||
assert r["page_migrate"]["preferred_node"] in node_ids
|
||||
assert r["page_migrate"]["trigger"] >= 0
|
||||
|
||||
if "queue_suspend" in r:
|
||||
assert r["queue_suspend"]["trigger"] >= 0
|
||||
assert r["queue_suspend"]["node_id"] in node_ids
|
||||
|
||||
if "unmap_from_gpu" in r:
|
||||
assert r["unmap_from_gpu"]["trigger"] >= 0
|
||||
# unmap is "instantaneous"
|
||||
assert 0 < r["start_timestamp"] == r["end_timestamp"]
|
||||
else:
|
||||
assert 0 < r["start_timestamp"] < r["end_timestamp"]
|
||||
|
||||
# Check for events with our page
|
||||
for r in page_migtation_buffers:
|
||||
|
||||
if "page_migrate" in r and r["page_migrate"]["start_addr"] == start_addr:
|
||||
assert end_addr == r["page_migrate"]["end_addr"]
|
||||
|
||||
if "unmap_from_gpu" in r and r["unmap_from_gpu"]["start_addr"] == start_addr:
|
||||
assert end_addr == r["unmap_from_gpu"]["end_addr"]
|
||||
|
||||
# TODO: Check if a migrate a->b is paired up with b->a
|
||||
# It may not always be reported towards app finalization
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
exit_code = pytest.main(["-x", __file__] + sys.argv[1:])
|
||||
sys.exit(exit_code)
|
||||
@@ -312,6 +312,7 @@ get_buffer_tracing_names()
|
||||
ROCPROFILER_BUFFER_TRACING_MARKER_NAME_API,
|
||||
ROCPROFILER_BUFFER_TRACING_MEMORY_COPY,
|
||||
ROCPROFILER_BUFFER_TRACING_SCRATCH_MEMORY,
|
||||
ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION,
|
||||
};
|
||||
|
||||
auto cb_name_info = buffer_name_info{};
|
||||
@@ -758,6 +759,7 @@ auto hip_api_bf_records = std::deque<rocprofiler_buffer_tracing_hip_api_
|
||||
auto kernel_dispatch_bf_records = std::deque<rocprofiler_buffer_tracing_kernel_dispatch_record_t>{};
|
||||
auto memory_copy_records = std::deque<rocprofiler_buffer_tracing_memory_copy_record_t>{};
|
||||
auto scratch_memory_records = std::deque<rocprofiler_buffer_tracing_scratch_memory_record_t>{};
|
||||
auto page_migration_records = std::deque<rocprofiler_buffer_tracing_page_migration_record_t>{};
|
||||
auto corr_id_retire_records =
|
||||
std::deque<rocprofiler_buffer_tracing_correlation_id_retirement_record_t>{};
|
||||
|
||||
@@ -842,6 +844,13 @@ tool_tracing_buffered(rocprofiler_context_id_t /*context*/,
|
||||
|
||||
scratch_memory_records.emplace_back(*record);
|
||||
}
|
||||
else if(header->kind == ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION)
|
||||
{
|
||||
auto* record = static_cast<rocprofiler_buffer_tracing_page_migration_record_t*>(
|
||||
header->payload);
|
||||
|
||||
page_migration_records.emplace_back(*record);
|
||||
}
|
||||
else if(header->kind == ROCPROFILER_BUFFER_TRACING_CORRELATION_ID_RETIREMENT)
|
||||
{
|
||||
auto* record =
|
||||
@@ -927,12 +936,14 @@ rocprofiler_context_id_t scratch_memory_ctx = {};
|
||||
rocprofiler_context_id_t corr_id_retire_ctx = {};
|
||||
rocprofiler_context_id_t kernel_dispatch_callback_ctx = {};
|
||||
rocprofiler_context_id_t kernel_dispatch_buffered_ctx = {};
|
||||
rocprofiler_context_id_t page_migration_ctx = {};
|
||||
// buffers
|
||||
rocprofiler_buffer_id_t hsa_api_buffered_buffer = {};
|
||||
rocprofiler_buffer_id_t hip_api_buffered_buffer = {};
|
||||
rocprofiler_buffer_id_t marker_api_buffered_buffer = {};
|
||||
rocprofiler_buffer_id_t kernel_dispatch_buffer = {};
|
||||
rocprofiler_buffer_id_t memory_copy_buffer = {};
|
||||
rocprofiler_buffer_id_t page_migration_buffer = {};
|
||||
rocprofiler_buffer_id_t counter_collection_buffer = {};
|
||||
rocprofiler_buffer_id_t scratch_memory_buffer = {};
|
||||
rocprofiler_buffer_id_t corr_id_retire_buffer = {};
|
||||
@@ -948,25 +959,28 @@ auto contexts = std::unordered_map<std::string_view, rocprofiler_context_id_t*>{
|
||||
{"MARKER_API_BUFFERED", &marker_api_buffered_ctx},
|
||||
{"KERNEL_DISPATCH_BUFFERED", &kernel_dispatch_buffered_ctx},
|
||||
{"MEMORY_COPY", &memory_copy_ctx},
|
||||
{"PAGE_MIGRATION", &page_migration_ctx},
|
||||
{"COUNTER_COLLECTION", &counter_collection_ctx},
|
||||
{"SCRATCH_MEMORY", &scratch_memory_ctx},
|
||||
{"CORRELATION_ID_RETIREMENT", &corr_id_retire_ctx},
|
||||
};
|
||||
|
||||
auto buffers = std::array<rocprofiler_buffer_id_t*, 8>{&hsa_api_buffered_buffer,
|
||||
auto buffers = std::array<rocprofiler_buffer_id_t*, 9>{&hsa_api_buffered_buffer,
|
||||
&hip_api_buffered_buffer,
|
||||
&marker_api_buffered_buffer,
|
||||
&kernel_dispatch_buffer,
|
||||
&memory_copy_buffer,
|
||||
&scratch_memory_buffer,
|
||||
&page_migration_buffer,
|
||||
&counter_collection_buffer,
|
||||
&corr_id_retire_buffer};
|
||||
|
||||
auto agents = std::vector<rocprofiler_agent_t>{};
|
||||
|
||||
rocprofiler_timestamp_t init_time = 0;
|
||||
rocprofiler_timestamp_t fini_time = 0;
|
||||
rocprofiler_thread_id_t main_tid = 0;
|
||||
rocprofiler_timestamp_t init_time = 0;
|
||||
rocprofiler_timestamp_t fini_time = 0;
|
||||
rocprofiler_thread_id_t main_tid = 0;
|
||||
auto page_migration_status = ROCPROFILER_STATUS_SUCCESS;
|
||||
|
||||
int
|
||||
tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
|
||||
@@ -1142,6 +1156,15 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
|
||||
&scratch_memory_buffer),
|
||||
"buffer creation");
|
||||
|
||||
ROCPROFILER_CALL(rocprofiler_create_buffer(page_migration_ctx,
|
||||
buffer_size,
|
||||
watermark,
|
||||
ROCPROFILER_BUFFER_POLICY_LOSSLESS,
|
||||
tool_tracing_buffered,
|
||||
tool_data,
|
||||
&page_migration_buffer),
|
||||
"buffer creation");
|
||||
|
||||
ROCPROFILER_CALL(rocprofiler_create_buffer(corr_id_retire_ctx,
|
||||
buffer_size,
|
||||
watermark,
|
||||
@@ -1226,6 +1249,23 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
|
||||
scratch_memory_buffer),
|
||||
"buffer tracing service for scratch memory configure");
|
||||
|
||||
{
|
||||
page_migration_status =
|
||||
rocprofiler_configure_buffer_tracing_service(page_migration_ctx,
|
||||
ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION,
|
||||
nullptr,
|
||||
0,
|
||||
page_migration_buffer);
|
||||
|
||||
constexpr auto message = "page migration service for memory copy configure";
|
||||
if(page_migration_status == ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_KERNEL)
|
||||
std::cerr << message
|
||||
<< " failed: " << rocprofiler_get_status_string(page_migration_status)
|
||||
<< std::endl;
|
||||
else
|
||||
ROCPROFILER_CALL(page_migration_status, message);
|
||||
}
|
||||
|
||||
ROCPROFILER_CALL(rocprofiler_configure_buffer_tracing_service(
|
||||
corr_id_retire_ctx,
|
||||
ROCPROFILER_BUFFER_TRACING_CORRELATION_ID_RETIREMENT,
|
||||
@@ -1382,6 +1422,7 @@ tool_fini(void* tool_data)
|
||||
<< ", kernel_dispatch_bf_records=" << kernel_dispatch_bf_records.size()
|
||||
<< ", memory_copy_records=" << memory_copy_records.size()
|
||||
<< ", scratch_memory_records=" << scratch_memory_records.size()
|
||||
<< ", page_migration=" << page_migration_records.size()
|
||||
<< ", hsa_api_bf_records=" << hsa_api_bf_records.size()
|
||||
<< ", hip_api_bf_records=" << hip_api_bf_records.size()
|
||||
<< ", marker_api_bf_records=" << marker_api_bf_records.size()
|
||||
@@ -1444,6 +1485,8 @@ write_json(call_stack_t* _call_stack)
|
||||
auto json_ar = JSONOutputArchive{*ofs, json_opts};
|
||||
auto buffer_name_info = get_buffer_tracing_names();
|
||||
auto callback_name_info = get_callback_tracing_names();
|
||||
auto validate_page_migration =
|
||||
(page_migration_status != ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_KERNEL);
|
||||
|
||||
json_ar.setNextName("rocprofiler-sdk-json-tool");
|
||||
json_ar.startNode();
|
||||
@@ -1454,6 +1497,7 @@ write_json(call_stack_t* _call_stack)
|
||||
json_ar(cereal::make_nvp("main_tid", main_tid));
|
||||
json_ar(cereal::make_nvp("init_time", init_time));
|
||||
json_ar(cereal::make_nvp("fini_time", fini_time));
|
||||
json_ar(cereal::make_nvp("validate_page_migration", validate_page_migration));
|
||||
json_ar.finishNode();
|
||||
|
||||
json_ar(cereal::make_nvp("agents", agents));
|
||||
@@ -1487,6 +1531,7 @@ write_json(call_stack_t* _call_stack)
|
||||
json_ar(cereal::make_nvp("kernel_dispatches", kernel_dispatch_bf_records));
|
||||
json_ar(cereal::make_nvp("memory_copies", memory_copy_records));
|
||||
json_ar(cereal::make_nvp("scratch_memory_traces", scratch_memory_records));
|
||||
json_ar(cereal::make_nvp("page_migration", page_migration_records));
|
||||
json_ar(cereal::make_nvp("hsa_api_traces", hsa_api_bf_records));
|
||||
json_ar(cereal::make_nvp("hip_api_traces", hip_api_bf_records));
|
||||
json_ar(cereal::make_nvp("marker_api_traces", marker_api_bf_records));
|
||||
|
||||
Odkázat v novém úkolu
Zablokovat Uživatele