Page migration reporting (#651)

* Page migration reporting support

* Page migration: Update parser and reporting

Container does not lave latest KFD header, so CI might fail

* Add kfd_ioctl.h

* Formatting

* Update get_key

- get key was not used (and shouldn't be), so delete it

* clang-tidy fixes

* Tests for page migration

* Apply suggestions from code review

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>

* Update tests/bin/page-migration/CMakeLists.txt

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>

* Update page-migration test app

- add hipHostRegister to register mmap'ed allocation with HIP
- misc cleanup and reorg
- remove HSA_XNACK=1 from test env

* Update lib/rocprofiler-sdk/tests/page_migration.cpp

- fix compilation error

* Minor updates (reorg, rename)

* Page migration reporting support

* Page migration: Update parser and reporting

Container does not lave latest KFD header, so CI might fail

* Update page migration tests, fix trigger types

* Page Migration Tracing Support Refactoring (#753)

* Reorganization

* Update page migration init/fini

* Formatting

* Update page_migration.cpp

- change logging severity

* Skip test if KFD does not support page migration reporting

* Rework skipping test if KFD does not support page migration

* Fix event trigger enum values

* Fix clang-diagnostic-unused-const-variable

---------

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: Jonathan R. Madsen <jonathanrmadsen@gmail.com>
Co-authored-by: Jonathan R. Madsen <jrmadsen@users.noreply.github.com>
Tento commit je obsažen v:
Mythreya
2024-04-12 13:51:44 -07:00
odevzdal GitHub
rodič 0e83f48cd5
revize fd3d97287c
28 změnil soubory, kde provedl 4455 přidání a 13 odebrání
+94 -3
Zobrazit soubor
@@ -26,6 +26,8 @@
#include <rocprofiler-sdk/defines.h>
#include <rocprofiler-sdk/fwd.h>
#include <stdint.h>
ROCPROFILER_EXTERN_C_INIT
/**
@@ -35,6 +37,49 @@ ROCPROFILER_EXTERN_C_INIT
* @{
*/
/**
* @brief Page migration triggers
*
*/
typedef enum
{
ROCPROFILER_PAGE_MIGRATION_TRIGGER_NONE = -1,
ROCPROFILER_PAGE_MIGRATION_TRIGGER_PREFETCH,
ROCPROFILER_PAGE_MIGRATION_TRIGGER_PAGEFAULT_GPU,
ROCPROFILER_PAGE_MIGRATION_TRIGGER_PAGEFAULT_CPU,
ROCPROFILER_PAGE_MIGRATION_TRIGGER_TTM_EVICTION,
ROCPROFILER_PAGE_MIGRATION_TRIGGER_LAST,
} rocprofiler_page_migration_trigger_t;
/**
* @brief Page migration triggers causing the queue to suspend
*
*/
typedef enum
{
ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_NONE = -1,
ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_SVM,
ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_USERPTR,
ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_TTM,
ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_SUSPEND,
ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_CRIU_CHECKPOINT,
ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_CRIU_RESTORE,
ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_LAST,
} rocprofiler_page_migration_queue_suspend_trigger_t;
/**
* @brief Page migration triggers causing an unmap from the GPU
*
*/
typedef enum
{
ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_NONE = -1,
ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_MMU_NOTIFY,
ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_MMU_NOTIFY_MIGRATE,
ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_UNMAP_FROM_CPU,
ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_LAST,
} rocprofiler_page_migration_unmap_from_gpu_trigger_t;
/**
* @brief ROCProfiler Buffer HSA API Tracer Record.
*/
@@ -150,17 +195,63 @@ typedef struct rocprofiler_buffer_tracing_kernel_dispatch_record_t
/// @brief runtime grid size
} rocprofiler_buffer_tracing_kernel_dispatch_record_t;
typedef struct
{
uint8_t read_fault : 1; ///< Is the fault due to a read or a write
uint8_t migrated : 1;
uint32_t node_id; ///< GPU or CPU node ID which reports a page fault
uint64_t address; ///< Address access that caused the page fault
} rocprofiler_buffer_tracing_page_migration_page_fault_record_t;
typedef struct
{
uint64_t start_addr; ///< Start address of the page being migrated
uint64_t end_addr; ///< End address of the page being migrated
uint32_t from_node; ///< Source node
uint32_t to_node; ///< Destination node
uint32_t prefetch_node; ///< Node from which page was prefetched
uint32_t preferred_node; ///< Preferred destinaion node
rocprofiler_page_migration_trigger_t trigger; ///< Cause of migration
} rocprofiler_buffer_tracing_page_migration_page_migrate_record_t;
typedef struct
{
uint8_t rescheduled : 1;
uint32_t node_id; ///< GPU node from which the queue was suspended
rocprofiler_page_migration_queue_suspend_trigger_t trigger; ///< Cause of queue suspension
} rocprofiler_buffer_tracing_page_migration_queue_suspend_record_t;
typedef struct
{
uint32_t node_id; ///< Node ID from which page was unmapped
uint64_t start_addr; ///< Start address of unmapped page
uint64_t end_addr; ///< End address of unmapped page
rocprofiler_page_migration_unmap_from_gpu_trigger_t trigger; ///< Cause of unmap
} rocprofiler_buffer_tracing_page_migration_unmap_from_gpu_record_t;
/**
* @brief ROCProfiler Buffer Page Migration Tracer Record. Not implemented.
* @brief ROCProfiler Buffer Page Migration Tracer Record
*/
typedef struct
{
uint64_t size; ///< size of this struct
rocprofiler_buffer_tracing_kind_t kind; ///< ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION
rocprofiler_correlation_id_t correlation_id; ///< correlation ids for record
rocprofiler_tracing_operation_t operation;
rocprofiler_timestamp_t start_timestamp; ///< start time in nanoseconds
rocprofiler_timestamp_t end_timestamp; ///< end time in nanoseconds
// Not Sure What is the info needed here?
uint32_t pid;
union
{
rocprofiler_buffer_tracing_page_migration_page_fault_record_t page_fault;
rocprofiler_buffer_tracing_page_migration_page_migrate_record_t page_migrate;
rocprofiler_buffer_tracing_page_migration_queue_suspend_record_t queue_suspend;
rocprofiler_buffer_tracing_page_migration_unmap_from_gpu_record_t unmap_from_gpu;
struct
{
uint64_t reserved[12];
};
};
} rocprofiler_buffer_tracing_page_migration_record_t;
/**
+19
Zobrazit soubor
@@ -89,6 +89,10 @@ typedef enum // NOLINT(performance-enum-size)
ROCPROFILER_STATUS_ERROR_AST_GENERATION_FAILED, ///< AST could not be generated correctly
ROCPROFILER_STATUS_ERROR_AST_NOT_FOUND, ///< AST was not found
ROCPROFILER_STATUS_ERROR_AQL_NO_EVENT_COORD, ///< Event coordinate was not found by AQL profile
ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_KERNEL, ///< A service depends on a newer version of KFD
///< (amdgpu kernel driver). Check logs for
///< service that report incompatibility
ROCPROFILER_STATUS_LAST,
} rocprofiler_status_t;
@@ -202,6 +206,21 @@ typedef enum // NOLINT(performance-enum-size)
ROCPROFILER_MEMORY_COPY_LAST,
} rocprofiler_memory_copy_operation_t;
/**
* @brief Page migration event.
*/
typedef enum // NOLINT(performance-enum-size)
{
ROCPROFILER_PAGE_MIGRATION_NONE = 0, ///< Unknown event
ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE,
ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT,
ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND,
ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU,
// Any and all events, from all processes. Requires superuser
// ROCPROFILER_PAGE_MIGRATION_ANY_ALL_PROCESSES,
ROCPROFILER_PAGE_MIGRATION_LAST,
} rocprofiler_page_migration_operation_t;
/**
* @brief ROCProfiler Kernel Dispatch Tracing Operation Types.
*/
+1
Zobrazit soubor
@@ -45,6 +45,7 @@ add_subdirectory(marker)
add_subdirectory(thread_trace)
add_subdirectory(tracing)
add_subdirectory(kernel_dispatch)
add_subdirectory(page_migration)
target_link_libraries(
rocprofiler-object-library
+15 -2
Zobrazit soubor
@@ -34,12 +34,15 @@
#include "lib/rocprofiler-sdk/hsa/scratch_memory.hpp"
#include "lib/rocprofiler-sdk/kernel_dispatch/kernel_dispatch.hpp"
#include "lib/rocprofiler-sdk/marker/marker.hpp"
#include "lib/rocprofiler-sdk/page_migration/page_migration.hpp"
#include "lib/rocprofiler-sdk/registration.hpp"
#include <glog/logging.h>
#include <atomic>
#include <limits>
#include <stdexcept>
#include <string_view>
#include <vector>
#define RETURN_STATUS_ON_FAIL(...) \
@@ -105,8 +108,7 @@ rocprofiler_configure_buffer_tracing_service(rocprofiler_context_id_t c
if(rocprofiler::registration::get_init_status() > -1)
return ROCPROFILER_STATUS_ERROR_CONFIGURATION_LOCKED;
static auto unsupported = std::unordered_set<rocprofiler_buffer_tracing_kind_t>{
ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION};
static auto unsupported = std::unordered_set<rocprofiler_buffer_tracing_kind_t>{};
if(unsupported.count(kind) > 0) return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED;
auto* ctx = rocprofiler::context::get_mutable_registered_context(context_id);
@@ -137,6 +139,9 @@ rocprofiler_configure_buffer_tracing_service(rocprofiler_context_id_t c
ctx->buffered_tracer->domains, kind, operations[i]));
}
if(kind == ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION)
RETURN_STATUS_ON_FAIL(rocprofiler::page_migration::init());
return ROCPROFILER_STATUS_SUCCESS;
}
@@ -233,6 +238,10 @@ rocprofiler_query_buffer_tracing_kind_operation_name(rocprofiler_buffer_tracing_
break;
}
case ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION:
{
val = rocprofiler::page_migration::name_by_id(operation);
break;
}
case ROCPROFILER_BUFFER_TRACING_CORRELATION_ID_RETIREMENT:
{
return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED;
@@ -340,6 +349,10 @@ rocprofiler_iterate_buffer_tracing_kind_operations(
break;
}
case ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION:
{
ops = rocprofiler::page_migration::get_ids();
break;
}
case ROCPROFILER_BUFFER_TRACING_CORRELATION_ID_RETIREMENT:
{
return ROCPROFILER_STATUS_ERROR_NOT_IMPLEMENTED;
+9
Zobrazit soubor
@@ -0,0 +1,9 @@
#
#
set(ROCPROFILER_LIB_UVM_SOURCES page_migration.cpp)
set(ROCPROFILER_LIB_UVM_HEADERS defines.hpp page_migration.hpp utils.hpp)
target_sources(rocprofiler-object-library PRIVATE ${ROCPROFILER_LIB_UVM_SOURCES}
${ROCPROFILER_LIB_UVM_HEADERS})
add_subdirectory(details)
+87
Zobrazit soubor
@@ -0,0 +1,87 @@
// MIT License
//
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#pragma once
#define KFD_EVENT_PARSE_EVENTS(X, HANDLER) \
do \
{ \
const auto find_newline = [&](auto b) { return std::find(b, X.cend(), '\n'); }; \
\
const auto* cursor = X.cbegin(); \
\
for(const auto* pos = find_newline(cursor); pos != X.cend(); pos = find_newline(cursor)) \
{ \
size_t char_count = pos - cursor; \
assert(char_count > 0); \
std::string_view event_str{cursor, char_count}; \
\
LOG(INFO) << fmt::format("KFD event: [{}]", event_str); \
HANDLER(event_str); \
\
cursor = pos + 1; \
} \
} while(0)
#define APPEND_UVM_EVENT(X) ROCPROFILER_UVM_EVENT_##X
#define APPEND_1(X) APPEND_UVM_EVENT(X)
#define CONCAT(X, Y) X##Y
#define APPEND_2(A1, A2) APPEND_1(A1), APPEND_1(A2)
#define APPEND_3(A1, A2, A3) APPEND_2(A1, A2), APPEND_1(A3)
#define APPEND_4(A1, A2, A3, A4) APPEND_3(A1, A2, A3), APPEND_1(A4)
#define APPEND_5(A1, A2, A3, A4, A5) APPEND_4(A1, A2, A3, A4), APPEND_1(A5)
#define MACRO_N(MACRO, N, ...) CONCAT(MACRO, N)(__VA_ARGS__)
#define APPLY_N(MACRO, ...) MACRO_N(MACRO, IMPL_DETAIL_FOR_EACH_NARG(__VA_ARGS__), __VA_ARGS__)
#define GET_UVM_ENUMS(...) APPLY_N(APPEND_, __VA_ARGS__)
// static constexpr size_t uvm_event = UVM_ENUM;
#define SPECIALIZE_UVM_KFD_EVENT(UVM_ENUM, KFD_ENUM, FORMAT_STRING) \
template <> \
struct uvm_event_info<UVM_ENUM> \
{ \
static constexpr size_t kfd_event = KFD_ENUM; \
static constexpr std::string_view format_str{FORMAT_STRING}; \
};
#define SPECIALIZE_PAGE_MIGRATION_INFO(TYPE, ...) \
template <> \
struct page_migration_info<ROCPROFILER_PAGE_MIGRATION_##TYPE> \
{ \
static constexpr auto operation_idx = ROCPROFILER_PAGE_MIGRATION_##TYPE; \
static constexpr auto name = #TYPE; \
static constexpr size_t uvm_bitmask = \
bitmask(std::index_sequence<GET_UVM_ENUMS(__VA_ARGS__)>()); \
static constexpr size_t kfd_bitmask = \
to_kfd_bitmask(std::index_sequence<GET_UVM_ENUMS(__VA_ARGS__)>()); \
}
#define COPY_FROM_START_1(MEMBER) end.MEMBER = start.MEMBER;
#define COPY_FROM_START_2(UNION_TYPE, MEMBER) end.UNION_TYPE.MEMBER = start.UNION_TYPE.MEMBER;
#define SPECIALIZE_KFD_IOC_IOCTL(STRUCT, ARG_IOC) \
template <> \
struct IOC_event<STRUCT> \
{ \
static constexpr auto value = ARG_IOC; \
}
+7
Zobrazit soubor
@@ -0,0 +1,7 @@
#
#
set(ROCPROFILER_LIB_UVM_DETAILS_SOURCES)
set(ROCPROFILER_LIB_UVM_DETAILS_HEADERS kfd_ioctl.h)
target_sources(rocprofiler-object-library PRIVATE ${ROCPROFILER_LIB_UVM_DETAILS_SOURCES}
${ROCPROFILER_LIB_UVM_DETAILS_HEADERS})
Rozdílový obsah nebyl zobrazen, protože je příliš veliký Načíst rozdílové porovnání
Rozdílový obsah nebyl zobrazen, protože je příliš veliký Načíst rozdílové porovnání
+74
Zobrazit soubor
@@ -0,0 +1,74 @@
// MIT License
//
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#include "lib/rocprofiler-sdk/page_migration/defines.hpp"
#include "lib/rocprofiler-sdk/page_migration/page_migration.hpp"
#if defined(ROCPROFILER_LIB_ROCPROFILER_SDK_PAGE_MIGRATION_PAGE_MIGRATION_CPP_IMPL) && \
ROCPROFILER_LIB_ROCPROFILER_SDK_PAGE_MIGRATION_PAGE_MIGRATION_CPP_IMPL == 1
namespace rocprofiler
{
namespace page_migration
{
// clang-format off
// Map ROCPROF UVM enums to KFD enums
SPECIALIZE_UVM_KFD_EVENT(ROCPROFILER_UVM_EVENT_NONE, KFD_SMI_EVENT_NONE, "Error: Invalid UVM event from KFD" );
SPECIALIZE_UVM_KFD_EVENT(ROCPROFILER_UVM_EVENT_MIGRATE_START, KFD_SMI_EVENT_MIGRATE_START, "%x %ld -%d @%lx(%lx) %x->%x %x:%x %d\n" );
SPECIALIZE_UVM_KFD_EVENT(ROCPROFILER_UVM_EVENT_MIGRATE_END, KFD_SMI_EVENT_MIGRATE_END, "%x %ld -%d @%lx(%lx) %x->%x %d\n" );
SPECIALIZE_UVM_KFD_EVENT(ROCPROFILER_UVM_EVENT_PAGE_FAULT_START, KFD_SMI_EVENT_PAGE_FAULT_START, "%x %ld -%d @%lx(%x) %c\n" );
SPECIALIZE_UVM_KFD_EVENT(ROCPROFILER_UVM_EVENT_PAGE_FAULT_END, KFD_SMI_EVENT_PAGE_FAULT_END, "%x %ld -%d @%lx(%x) %c\n" );
SPECIALIZE_UVM_KFD_EVENT(ROCPROFILER_UVM_EVENT_QUEUE_EVICTION, KFD_SMI_EVENT_QUEUE_EVICTION, "%x %ld -%d %x %d\n" );
SPECIALIZE_UVM_KFD_EVENT(ROCPROFILER_UVM_EVENT_QUEUE_RESTORE, KFD_SMI_EVENT_QUEUE_RESTORE, "%x %ld -%d %x\n" );
SPECIALIZE_UVM_KFD_EVENT(ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU, KFD_SMI_EVENT_UNMAP_FROM_GPU, "%x %ld -%d @%lx(%lx) %x %d\n" );
// clang-format on
# undef SPECIALIZE_UVM_KFD_EVENT
SPECIALIZE_PAGE_MIGRATION_INFO(NONE, NONE);
SPECIALIZE_PAGE_MIGRATION_INFO(PAGE_MIGRATE, MIGRATE_START, MIGRATE_END);
SPECIALIZE_PAGE_MIGRATION_INFO(PAGE_FAULT, PAGE_FAULT_START, PAGE_FAULT_END);
SPECIALIZE_PAGE_MIGRATION_INFO(QUEUE_SUSPEND, QUEUE_EVICTION, QUEUE_RESTORE);
SPECIALIZE_PAGE_MIGRATION_INFO(UNMAP_FROM_GPU, UNMAP_FROM_GPU);
template <size_t UvmOpInx, size_t... OpInxs>
constexpr size_t to_rocprof_op_impl(std::index_sequence<OpInxs...>)
{
return ((((bitmask(UvmOpInx) & page_migration_info<OpInxs>::uvm_bitmask) != 0) * OpInxs) + ...);
}
template <size_t... OpInxs>
constexpr auto _to_rocprof_op_impl(std::index_sequence<OpInxs...>)
{
return std::array{
to_rocprof_op_impl<OpInxs>(std::make_index_sequence<ROCPROFILER_PAGE_MIGRATION_LAST>{})...};
}
constexpr auto
to_rocprof_op(size_t pos)
{
using rop = rocprofiler_page_migration_operation_t;
return static_cast<rop>(
_to_rocprof_op_impl(std::make_index_sequence<ROCPROFILER_UVM_EVENT_LAST>{})[pos]);
}
} // namespace page_migration
} // namespace rocprofiler
#endif
+45
Zobrazit soubor
@@ -0,0 +1,45 @@
// MIT License
//
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#pragma once
#include "lib/common/container/small_vector.hpp"
#include <rocprofiler-sdk/rocprofiler.h>
namespace rocprofiler
{
namespace page_migration
{
const char*
name_by_id(uint32_t id);
std::vector<uint32_t>
get_ids();
rocprofiler_status_t
init();
void
finalize();
} // namespace page_migration
} // namespace rocprofiler
+218
Zobrazit soubor
@@ -0,0 +1,218 @@
// MIT License
//
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#pragma once
#include "lib/rocprofiler-sdk/page_migration/details/kfd_ioctl.h"
#include <rocprofiler-sdk/buffer_tracing.h>
#include <rocprofiler-sdk/fwd.h>
#include <cstdint>
#include <unordered_map>
#include <utility>
namespace rocprofiler
{
namespace page_migration
{
// serves as an overview of what events we capture and report
enum fault_type_t
{
NONE,
READ,
WRITE,
};
struct uvm_event_page_fault_start_t
{
int kind;
uint64_t start_timestamp;
int pid;
int node_id;
uint64_t address;
fault_type_t fault;
};
struct uvm_event_page_fault_end_t
{
int kind;
uint64_t end_timestamp;
uint32_t pid;
int node_id;
uint64_t address;
bool migrated;
};
struct uvm_event_migrate_start_t
{
int kind;
uint64_t start_timestamp;
uint32_t pid;
uint64_t start;
uint64_t end_offset;
uint32_t from;
uint32_t to;
uint32_t prefetch_node; // last prefetch location, 0 for CPU, or GPU id
uint32_t preferred_node; // perferred location, 0 for CPU, or GPU id
uint32_t trigger;
};
struct uvm_event_migrate_end_t
{
int kind;
uint64_t end_timestamp;
uint32_t pid;
uint64_t start;
uint64_t end_offset;
uint32_t from;
uint32_t to;
uint32_t trigger;
};
struct uvm_event_queue_eviction_t
{
int kind;
uint64_t start_timestamp;
uint32_t pid;
int node_id;
uint32_t trigger;
};
struct uvm_event_queue_restore_t
{
int kind;
uint64_t end_timestamp;
uint32_t pid;
int node_id;
bool rescheduled;
};
struct uvm_event_unmap_from_gpu_t
{
int kind;
uint64_t timestamp;
uint32_t pid;
uint64_t address;
uint64_t size;
int node_id;
uint32_t trigger;
};
template <size_t e>
struct uvm_event_info;
template <size_t>
struct page_migration_info;
namespace kfd
{
template <typename T>
struct IOC_event;
} // namespace kfd
constexpr size_t
bitmask(size_t num)
{
if(num == 0)
return 0;
else
return (1ULL << (num - 1));
}
template <size_t... Args>
constexpr size_t bitmask(std::index_sequence<Args...>)
{
return (bitmask(Args) | ...);
}
enum uvm_event_id_t
{
ROCPROFILER_UVM_EVENT_NONE,
ROCPROFILER_UVM_EVENT_MIGRATE_START,
ROCPROFILER_UVM_EVENT_MIGRATE_END,
ROCPROFILER_UVM_EVENT_PAGE_FAULT_START,
ROCPROFILER_UVM_EVENT_PAGE_FAULT_END,
ROCPROFILER_UVM_EVENT_QUEUE_EVICTION,
ROCPROFILER_UVM_EVENT_QUEUE_RESTORE,
ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU,
ROCPROFILER_UVM_EVENT_LAST,
};
static_assert(KFD_SMI_EVENT_NONE == 0);
static_assert(KFD_SMI_EVENT_MIGRATE_START == 5);
static_assert(KFD_SMI_EVENT_MIGRATE_END == 6);
static_assert(KFD_SMI_EVENT_PAGE_FAULT_START == 7);
static_assert(KFD_SMI_EVENT_PAGE_FAULT_END == 8);
static_assert(KFD_SMI_EVENT_QUEUE_EVICTION == 9);
static_assert(KFD_SMI_EVENT_QUEUE_RESTORE == 10);
static_assert(KFD_SMI_EVENT_UNMAP_FROM_GPU == 11);
static_assert(KFD_SMI_EVENT_ALL_PROCESS == 64);
using rocprof_buffer_op_t = rocprofiler_page_migration_operation_t;
using node_fd_t = int;
using event_map_t =
std::unordered_map<uint64_t, rocprofiler_buffer_tracing_page_migration_record_t>;
using events_cache_t = std::array<event_map_t, ROCPROFILER_PAGE_MIGRATION_LAST>;
template <size_t... Ints>
constexpr size_t to_kfd_bitmask(std::index_sequence<Ints...>)
{
return bitmask(std::index_sequence<uvm_event_info<Ints>::kfd_event...>());
}
template <rocprofiler_page_migration_operation_t... Ops>
constexpr size_t to_uvm_bitmask(std::index_sequence<Ops...>)
{
return bitmask(std::index_sequence<static_cast<uint32_t>(Ops)...>());
}
template <size_t RocprofOpIdx, size_t UvmOpIdx>
constexpr bool
is_rocprof_uvm_map()
{
return page_migration_info<RocprofOpIdx>::uvm_bitmask & bitmask(UvmOpIdx);
}
template <size_t RocprofOpIdx, size_t OpInx, size_t... OpInxs>
constexpr bool
_is_rocprof_uvm_map(size_t uvm_event, std::index_sequence<OpInx, OpInxs...>)
{
if(OpInx == uvm_event)
return is_rocprof_uvm_map<RocprofOpIdx, OpInx>();
else if constexpr(sizeof...(OpInxs) > 0)
return _is_rocprof_uvm_map<RocprofOpIdx>(uvm_event, std::index_sequence<OpInxs...>{});
else
return false;
}
template <size_t RocprofOpIdx>
constexpr bool
is_rocprof_uvm_map(size_t uvm_event)
{
return _is_rocprof_uvm_map<RocprofOpIdx>(
uvm_event, std::make_index_sequence<ROCPROFILER_UVM_EVENT_LAST>{});
}
} // namespace page_migration
} // namespace rocprofiler
+2
Zobrazit soubor
@@ -38,6 +38,7 @@
#include "lib/rocprofiler-sdk/intercept_table.hpp"
#include "lib/rocprofiler-sdk/internal_threading.hpp"
#include "lib/rocprofiler-sdk/marker/marker.hpp"
#include "lib/rocprofiler-sdk/page_migration/page_migration.hpp"
#include <rocprofiler-sdk/context.h>
#include <rocprofiler-sdk/fwd.h>
@@ -600,6 +601,7 @@ finalize()
set_fini_status(-1);
hsa::async_copy_fini();
hsa::queue_controller_fini();
page_migration::finalize();
hsa::code_object_shutdown();
if(get_init_status() > 0)
{
+2
Zobrazit soubor
@@ -87,6 +87,8 @@ ROCPROFILER_STATUS_STRING(ROCPROFILER_STATUS_ERROR_AST_NOT_FOUND, "AST was not f
ROCPROFILER_STATUS_STRING(
ROCPROFILER_STATUS_ERROR_AQL_NO_EVENT_COORD,
"AQL Profiler was not able to find event coordinates for defined counters")
ROCPROFILER_STATUS_STRING(ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_KERNEL,
"A service depends on a newer version of KFD (amdgpu kernel driver)")
template <size_t Idx, size_t... Tail>
const char*
get_status_name(rocprofiler_status_t status, std::index_sequence<Idx, Tail...>)
+3 -2
Zobrazit soubor
@@ -36,8 +36,9 @@ set_tests_properties(${lib_TESTS} PROPERTIES TIMEOUT 30 LABELS "unittests")
#
# -------------------------------------------------------------------------------------- #
set(rocprofiler_shared_lib_sources external_correlation.cpp intercept_table.cpp
registration.cpp roctx.cpp status.cpp)
set(rocprofiler_shared_lib_sources
external_correlation.cpp intercept_table.cpp page_migration.cpp registration.cpp
roctx.cpp status.cpp)
add_executable(rocprofiler-lib-tests-shared)
target_sources(rocprofiler-lib-tests-shared PRIVATE ${rocprofiler_shared_lib_sources})
+177
Zobrazit soubor
@@ -0,0 +1,177 @@
// MIT License
//
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include "lib/common/defines.hpp"
#include "lib/rocprofiler-sdk/page_migration/details/kfd_ioctl.h"
#include "lib/rocprofiler-sdk/page_migration/utils.hpp"
#include <rocprofiler-sdk/fwd.h>
#include <rocprofiler-sdk/rocprofiler.h>
#include <fmt/format.h>
#include <glog/logging.h>
#include <gtest/gtest.h>
#include <sstream>
#include <string_view>
#define ROCPROFILER_LIB_ROCPROFILER_SDK_PAGE_MIGRATION_PAGE_MIGRATION_CPP_IMPL 1
#include "lib/rocprofiler-sdk/page_migration/page_migration.def.cpp"
#undef ROCPROFILER_LIB_ROCPROFILER_SDK_PAGE_MIGRATION_PAGE_MIGRATION_CPP_IMPL
#define ASSERT_SAME(A, B) static_assert(static_cast<size_t>(A) == static_cast<size_t>(B))
namespace
{
constexpr std::string_view MULTILINE_STRING = "This is 0 Line 0\n"
"This is 10 Line 1\n"
"This is 20 Line 2\n"
"This is 30 Line 3\n"
"This is 40 Line 4\n";
}
void
return_line(const std::string_view line)
{
static int line_no = 0;
std::stringstream strs{};
strs << fmt::format("This is {} Line {}", line_no * 10, line_no);
EXPECT_EQ(strs.str(), line);
line_no++;
}
auto
parse_lines()
{
KFD_EVENT_PARSE_EVENTS(MULTILINE_STRING, return_line);
}
TEST(page_migration, readlines)
{
// Ensure all lines are read
parse_lines();
}
TEST(page_migration, parse_kvm_events)
{
// Ensure all lines are read
parse_lines();
}
TEST(page_migtation, rocprof_kfd_map)
{
using namespace ::rocprofiler::page_migration;
// clang-format off
static_assert( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT, ROCPROFILER_UVM_EVENT_PAGE_FAULT_START >() );
static_assert( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT, ROCPROFILER_UVM_EVENT_PAGE_FAULT_END >() );
static_assert( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE, ROCPROFILER_UVM_EVENT_MIGRATE_START >() );
static_assert( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE, ROCPROFILER_UVM_EVENT_MIGRATE_END >() );
static_assert( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND, ROCPROFILER_UVM_EVENT_QUEUE_EVICTION >() );
static_assert( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND, ROCPROFILER_UVM_EVENT_QUEUE_RESTORE >() );
static_assert( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU, ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU >() );
EXPECT_TRUE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT >(ROCPROFILER_UVM_EVENT_PAGE_FAULT_START) );
EXPECT_TRUE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT >(ROCPROFILER_UVM_EVENT_PAGE_FAULT_END ) );
EXPECT_TRUE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE >(ROCPROFILER_UVM_EVENT_MIGRATE_START ) );
EXPECT_TRUE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE >(ROCPROFILER_UVM_EVENT_MIGRATE_END ) );
EXPECT_TRUE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND >(ROCPROFILER_UVM_EVENT_QUEUE_EVICTION ) );
EXPECT_TRUE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND >(ROCPROFILER_UVM_EVENT_QUEUE_RESTORE ) );
EXPECT_TRUE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU >(ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU ) );
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE, ROCPROFILER_UVM_EVENT_QUEUE_EVICTION >() );
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE, ROCPROFILER_UVM_EVENT_QUEUE_RESTORE >() );
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE, ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU >() );
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE, ROCPROFILER_UVM_EVENT_PAGE_FAULT_START >() );
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE, ROCPROFILER_UVM_EVENT_PAGE_FAULT_END >() );
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT, ROCPROFILER_UVM_EVENT_MIGRATE_START >() );
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT, ROCPROFILER_UVM_EVENT_MIGRATE_END >() );
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT, ROCPROFILER_UVM_EVENT_QUEUE_EVICTION >() );
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT, ROCPROFILER_UVM_EVENT_QUEUE_RESTORE >() );
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT, ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU >() );
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND, ROCPROFILER_UVM_EVENT_MIGRATE_START >() );
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND, ROCPROFILER_UVM_EVENT_MIGRATE_END >() );
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND, ROCPROFILER_UVM_EVENT_PAGE_FAULT_START >() );
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND, ROCPROFILER_UVM_EVENT_PAGE_FAULT_END >() );
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND, ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU >() );
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU, ROCPROFILER_UVM_EVENT_MIGRATE_START >() );
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU, ROCPROFILER_UVM_EVENT_MIGRATE_END >() );
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU, ROCPROFILER_UVM_EVENT_PAGE_FAULT_START >() );
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU, ROCPROFILER_UVM_EVENT_PAGE_FAULT_END >() );
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU, ROCPROFILER_UVM_EVENT_QUEUE_EVICTION >() );
static_assert( ! is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU, ROCPROFILER_UVM_EVENT_QUEUE_RESTORE >() );
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE >(ROCPROFILER_UVM_EVENT_QUEUE_EVICTION ) );
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE >(ROCPROFILER_UVM_EVENT_QUEUE_RESTORE ) );
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE >(ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU ) );
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE >(ROCPROFILER_UVM_EVENT_PAGE_FAULT_START) );
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE >(ROCPROFILER_UVM_EVENT_PAGE_FAULT_END ) );
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT >(ROCPROFILER_UVM_EVENT_MIGRATE_START ) );
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT >(ROCPROFILER_UVM_EVENT_MIGRATE_END ) );
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT >(ROCPROFILER_UVM_EVENT_QUEUE_EVICTION ) );
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT >(ROCPROFILER_UVM_EVENT_QUEUE_RESTORE ) );
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT >(ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU ) );
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND >(ROCPROFILER_UVM_EVENT_MIGRATE_START ) );
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND >(ROCPROFILER_UVM_EVENT_MIGRATE_END ) );
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND >(ROCPROFILER_UVM_EVENT_PAGE_FAULT_START) );
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND >(ROCPROFILER_UVM_EVENT_PAGE_FAULT_END ) );
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND >(ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU ) );
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU >(ROCPROFILER_UVM_EVENT_MIGRATE_START ) );
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU >(ROCPROFILER_UVM_EVENT_MIGRATE_END ) );
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU >(ROCPROFILER_UVM_EVENT_PAGE_FAULT_START) );
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU >(ROCPROFILER_UVM_EVENT_PAGE_FAULT_END ) );
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU >(ROCPROFILER_UVM_EVENT_QUEUE_EVICTION ) );
EXPECT_FALSE( is_rocprof_uvm_map < ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU >(ROCPROFILER_UVM_EVENT_QUEUE_RESTORE ) );
static_assert(to_rocprof_op(ROCPROFILER_UVM_EVENT_MIGRATE_START) == ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE );
static_assert(to_rocprof_op(ROCPROFILER_UVM_EVENT_PAGE_FAULT_END) == ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT );
static_assert(to_rocprof_op(ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU) == ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU );
static_assert(to_rocprof_op(ROCPROFILER_UVM_EVENT_QUEUE_EVICTION) == ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND );
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_TRIGGER_PREFETCH, KFD_MIGRATE_TRIGGER_PREFETCH );
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_TRIGGER_PAGEFAULT_GPU, KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU );
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_TRIGGER_PAGEFAULT_CPU, KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU );
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_TRIGGER_TTM_EVICTION, KFD_MIGRATE_TRIGGER_TTM_EVICTION );
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_SVM, KFD_QUEUE_EVICTION_TRIGGER_SVM );
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_USERPTR, KFD_QUEUE_EVICTION_TRIGGER_USERPTR );
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_TTM, KFD_QUEUE_EVICTION_TRIGGER_TTM );
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_SUSPEND, KFD_QUEUE_EVICTION_TRIGGER_SUSPEND );
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_CRIU_CHECKPOINT, KFD_QUEUE_EVICTION_CRIU_CHECKPOINT );
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND_TRIGGER_CRIU_RESTORE, KFD_QUEUE_EVICTION_CRIU_RESTORE );
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_MMU_NOTIFY, KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY );
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_MMU_NOTIFY_MIGRATE, KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY_MIGRATE);
ASSERT_SAME(ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU_TRIGGER_UNMAP_FROM_CPU, KFD_SVM_UNMAP_TRIGGER_UNMAP_FROM_CPU );
static_assert(to_kfd_bitmask(std::index_sequence<
ROCPROFILER_UVM_EVENT_PAGE_FAULT_START, ROCPROFILER_UVM_EVENT_UNMAP_FROM_GPU>()) ==
(KFD_SMI_EVENT_MASK_FROM_INDEX(KFD_SMI_EVENT_PAGE_FAULT_START)
| KFD_SMI_EVENT_MASK_FROM_INDEX(KFD_SMI_EVENT_UNMAP_FROM_GPU)));
// clang-format on
}
Normální soubor → Spustitelný soubor
+2
Zobrazit soubor
@@ -1,3 +1,5 @@
#!/usr/bin/env python3
import argparse
import os
import sys
+1
Zobrazit soubor
@@ -51,6 +51,7 @@ add_subdirectory(kernel-tracing)
add_subdirectory(async-copy-tracing)
add_subdirectory(scratch-memory-tracing)
add_subdirectory(c-tool)
add_subdirectory(page-migration)
# rocprofv3 validation tests
add_subdirectory(rocprofv3)
+1
Zobrazit soubor
@@ -21,4 +21,5 @@ add_subdirectory(multistream)
add_subdirectory(vector-operations)
add_subdirectory(hip-in-libraries)
add_subdirectory(scratch-memory)
add_subdirectory(page-migration)
add_subdirectory(hsa-queue-dependency)
+47
Zobrazit soubor
@@ -0,0 +1,47 @@
#
#
#
cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR)
if(NOT CMAKE_HIP_COMPILER)
find_program(
amdclangpp_EXECUTABLE
NAMES amdclang++
HINTS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm
PATHS ${ROCM_PATH} ENV ROCM_PATH /opt/rocm
PATH_SUFFIXES bin llvm/bin NO_CACHE)
mark_as_advanced(amdclangpp_EXECUTABLE)
if(amdclangpp_EXECUTABLE)
set(CMAKE_HIP_COMPILER "${amdclangpp_EXECUTABLE}")
endif()
endif()
project(rocprofiler-tests-bin-page-migration LANGUAGES CXX HIP)
foreach(_TYPE DEBUG MINSIZEREL RELEASE RELWITHDEBINFO)
if("${CMAKE_HIP_FLAGS_${_TYPE}}" STREQUAL "")
set(CMAKE_HIP_FLAGS_${_TYPE} "${CMAKE_CXX_FLAGS_${_TYPE}}")
endif()
endforeach()
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_EXTENSIONS OFF)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_HIP_STANDARD 17)
set(CMAKE_HIP_EXTENSIONS OFF)
set(CMAKE_HIP_STANDARD_REQUIRED ON)
set_source_files_properties(page-migration.cpp PROPERTIES LANGUAGE HIP)
add_executable(page-migration)
target_sources(page-migration PRIVATE page-migration.cpp)
target_compile_options(page-migration PRIVATE -W -Wall -Wextra -Wpedantic -Wshadow
-Werror)
find_package(Threads REQUIRED)
target_link_libraries(page-migration PRIVATE Threads::Threads)
install(
TARGETS page-migration
DESTINATION bin
COMPONENT tests)
+222
Zobrazit soubor
@@ -0,0 +1,222 @@
// MIT License
//
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include <hip/hip_runtime.h>
#include <cerrno>
#include <cstddef>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <iomanip>
#include <iostream>
#include <mutex>
#include <sstream>
#include <stdexcept>
#include <string>
#include <string_view>
#include <fcntl.h>
#include <sys/mman.h>
#include <unistd.h>
#define HIP_API_CALL(CALL) \
{ \
hipError_t error_ = (CALL); \
if(error_ != hipSuccess) \
{ \
auto _hip_api_print_lk = auto_lock_t{print_lock}; \
fprintf(stderr, \
"%s:%d :: HIP error : %s\n", \
__FILE__, \
__LINE__, \
hipGetErrorString(error_)); \
throw std::runtime_error("hip_api_call"); \
} \
}
using auto_lock_t = std::unique_lock<std::mutex>;
auto print_lock = std::mutex{};
__global__ void
kernel(size_t* __restrict__ data, int size)
{
int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
int stride = hipBlockDim_x * hipGridDim_x;
for(int i = x; i < size; i += stride)
{
data[i] *= 2;
}
}
struct mmap_allocator
{
explicit mmap_allocator(size_t num_pages)
{
m_size = num_pages * sysconf(_SC_PAGE_SIZE);
void* ret = mmap(nullptr, // addr: null. Kernel gives us page-aligned memory
m_size, // length: num bytes to allocate
PROT_WRITE | PROT_READ, // mem_prot: Allow read/write
MAP_ANONYMOUS | MAP_PRIVATE, // flags: No file handle
-1, // no fd, use memory "MAP_ANONYMOUS"
0); // offset into fd
if(ret == ((void*) -1)) // NOLINT performance-no-int-to-ptr
{
auto ecode = errno;
fprintf(stderr, "mmap error %d: %s", ecode, strerror(ecode));
throw std::runtime_error("mmap failed");
}
else
{
m_addr = ret;
::memset(m_addr, 0, m_size);
}
}
~mmap_allocator()
{
auto ret = munmap(m_addr, m_size);
if(ret != 0) perror("munmap failed");
}
mmap_allocator(const mmap_allocator&) = delete;
mmap_allocator(mmap_allocator&&) noexcept = default;
mmap_allocator& operator=(const mmap_allocator&) = delete;
mmap_allocator& operator=(mmap_allocator&&) noexcept = default;
template <typename Up>
Up* get() const
{
static_assert(!std::is_pointer<Up>::value, "must not be pointer type");
return static_cast<Up*>(m_addr);
}
private:
size_t m_size = 0;
void* m_addr = nullptr;
};
int
main()
{
using namespace std::chrono_literals;
static constexpr auto NUM_PAGES = 16;
const auto PAGE_SIZE_BYTES = ::sysconf(_SC_PAGE_SIZE);
size_t elem_count = (NUM_PAGES * PAGE_SIZE_BYTES) / sizeof(size_t); // one page?
auto alloc = mmap_allocator(NUM_PAGES);
void* data_v = alloc.get<void>();
auto* data = alloc.get<size_t>();
for(size_t i = 0; i < elem_count; ++i)
if(data[i] != 0) throw std::runtime_error{"bad init"};
printf("Allocated size: %lu bytes (%lu KB), (%lu MB), %zu elements @ %p\n",
sizeof(size_t) * elem_count,
sizeof(size_t) * elem_count / 1024,
sizeof(size_t) * elem_count / 1024 / 1024,
elem_count,
data_v);
HIP_API_CALL(hipHostRegister(data, elem_count * sizeof(size_t), hipHostRegisterDefault));
char maps[1024 * 1024];
std::memset(maps, '\0', 1024 * 1024);
auto fd = open("/proc/self/maps", O_RDONLY | O_CLOEXEC);
if(fd == -1)
{
auto ecode = errno;
fprintf(stderr, "mmap error %d: %s", ecode, strerror(ecode));
exit(-1);
}
auto bytes = read(fd, maps, 1024 * 1024 - 1);
if(bytes == -1)
{
auto ecode = errno;
fprintf(stderr, "mmap error %d: %s", ecode, strerror(ecode));
exit(-1);
}
close(fd);
std::string_view maps_data{maps, static_cast<size_t>(bytes)};
std::cout << "------------\n";
std::cout << maps_data;
std::cout << "------------\n";
std::istringstream maps_stream{maps_data.data()};
std::string line(1024, '\0');
while(std::getline(maps_stream, line))
{
char __[1024];
int _{};
void* start{};
void* end{};
auto ret =
std::sscanf(line.data(), "%p-%p %s %d %d:%d %d\n", &start, &end, __, &_, &_, &_, &_);
if(ret > 0 && (start == data_v))
{
size_t ptr_diff = ((size_t) end - (size_t) start);
printf("Found match: %zu %zu KB, %zu 4K > %s\n",
ptr_diff,
ptr_diff / 1024,
ptr_diff / 4096,
line.data());
}
}
for(int iter = 0; iter < 1000; ++iter)
{
for(size_t i = 0; i < elem_count; ++i)
data[i] = i;
// std::cout << "launching..." << std::endl;
hipLaunchKernelGGL(kernel, 128, 64, 0, 0, data, elem_count);
// std::cout << "syncing..." << std::endl;
HIP_API_CALL(hipDeviceSynchronize());
// std::cout << "checking..." << std::endl;
for(size_t i = 0; i < elem_count; ++i)
{
if(data[i] != (i * 2))
{
auto msg = std::stringstream{};
msg << "GPU computed value at " << i << " in iteration " << iter
<< " is incorrect. Expected " << (i * 2) << ", found " << data[i];
throw std::runtime_error{msg.str()};
}
}
std::cout << "Iteration " << std::setw(2) << iter << ": correct\n" << std::flush;
}
HIP_API_CALL(hipDeviceSynchronize());
return 0;
}
+2 -2
Zobrazit soubor
@@ -33,8 +33,8 @@
<< " failed with error code " << status_name << " (" << CHECKSTATUS \
<< "): " << status_msg << std::endl; \
std::stringstream errmsg{}; \
errmsg << "[" #result "][" << __FILE__ << ":" << __LINE__ << "] " << msg " failure (" \
<< status_name << ": " << status_msg << ")"; \
errmsg << "[" #result "][" << __FILE__ << ":" << __LINE__ << "] " << msg \
<< " failure (" << status_name << ": " << status_msg << ")"; \
throw std::runtime_error(errmsg.str()); \
} \
}
+84
Zobrazit soubor
@@ -391,6 +391,90 @@ save(ArchiveT& ar, rocprofiler_buffer_tracing_memory_copy_record_t data)
SAVE_DATA_FIELD(src_agent_id);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, const rocprofiler_buffer_tracing_page_migration_record_t& data)
{
SAVE_DATA_FIELD(size);
SAVE_DATA_FIELD(kind);
SAVE_DATA_FIELD(operation);
SAVE_DATA_FIELD(start_timestamp);
SAVE_DATA_FIELD(end_timestamp);
SAVE_DATA_FIELD(pid);
switch(data.operation)
{
case ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT:
{
ar(make_nvp("page_fault", data.page_fault));
break;
}
case ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE:
{
ar(make_nvp("page_migrate", data.page_migrate));
break;
}
case ROCPROFILER_PAGE_MIGRATION_QUEUE_SUSPEND:
{
ar(make_nvp("queue_suspend", data.queue_suspend));
break;
}
case ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU:
{
ar(make_nvp("unmap_from_gpu", data.unmap_from_gpu));
break;
}
case ROCPROFILER_PAGE_MIGRATION_NONE:
case ROCPROFILER_PAGE_MIGRATION_LAST:
{
throw std::runtime_error{"unsupported page migration operation type"};
break;
}
}
}
template <typename ArchiveT>
void
save(ArchiveT& ar, const rocprofiler_buffer_tracing_page_migration_page_fault_record_t& data)
{
SAVE_DATA_FIELD(node_id);
SAVE_DATA_FIELD(address);
SAVE_DATA_FIELD(read_fault);
SAVE_DATA_FIELD(migrated);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, const rocprofiler_buffer_tracing_page_migration_page_migrate_record_t& data)
{
SAVE_DATA_FIELD(start_addr);
SAVE_DATA_FIELD(end_addr);
SAVE_DATA_FIELD(from_node);
SAVE_DATA_FIELD(to_node);
SAVE_DATA_FIELD(prefetch_node);
SAVE_DATA_FIELD(preferred_node);
SAVE_DATA_FIELD(trigger);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, const rocprofiler_buffer_tracing_page_migration_queue_suspend_record_t& data)
{
SAVE_DATA_FIELD(node_id);
SAVE_DATA_FIELD(trigger);
SAVE_DATA_FIELD(rescheduled);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, const rocprofiler_buffer_tracing_page_migration_unmap_from_gpu_record_t& data)
{
SAVE_DATA_FIELD(node_id);
SAVE_DATA_FIELD(start_addr);
SAVE_DATA_FIELD(end_addr);
SAVE_DATA_FIELD(trigger);
}
template <typename ArchiveT>
void
save(ArchiveT& ar, rocprofiler_buffer_tracing_scratch_memory_record_t data)
+65
Zobrazit soubor
@@ -0,0 +1,65 @@
#
#
#
cmake_minimum_required(VERSION 3.21.0 FATAL_ERROR)
project(
rocprofiler-tests-page-migration
LANGUAGES CXX
VERSION 0.0.0)
find_package(rocprofiler-sdk REQUIRED)
if(ROCPROFILER_MEMCHECK_PRELOAD_ENV)
set(PRELOAD_ENV
"${ROCPROFILER_MEMCHECK_PRELOAD_ENV}:$<TARGET_FILE:rocprofiler-sdk-json-tool>")
else()
set(PRELOAD_ENV "LD_PRELOAD=$<TARGET_FILE:rocprofiler-sdk-json-tool>")
endif()
add_test(NAME test-page-migration-execute COMMAND $<TARGET_FILE:page-migration>)
set(page-migration-env
"${PRELOAD_ENV}"
"ROCPROFILER_TOOL_OUTPUT_FILE=page-migration-test.json"
"LD_LIBRARY_PATH=$<TARGET_FILE_DIR:rocprofiler::rocprofiler-shared-library>:$ENV{LD_LIBRARY_PATH}"
)
set_tests_properties(
test-page-migration-execute
PROPERTIES TIMEOUT
45
LABELS
"integration-tests"
ENVIRONMENT
"${page-migration-env}"
FAIL_REGULAR_EXPRESSION
"${ROCPROFILER_DEFAULT_FAIL_REGEX}"
SKIP_REGULAR_EXPRESSION
"KFD does not support SVM event reporting"
WORKING_DIRECTORY
${CMAKE_CURRENT_BINARY_DIR})
foreach(FILENAME validate.py pytest.ini conftest.py)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/${FILENAME}
${CMAKE_CURRENT_BINARY_DIR}/${FILENAME} COPYONLY)
endforeach()
add_test(NAME test-page-migration-validate
COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/validate.py --input
${CMAKE_CURRENT_BINARY_DIR}/page-migration-test.json)
set_tests_properties(
test-page-migration-validate
PROPERTIES TIMEOUT
45
LABELS
"integration-tests"
DEPENDS
test-page-migration-execute
FAIL_REGULAR_EXPRESSION
"${ROCPROFILER_DEFAULT_FAIL_REGEX}"
SKIP_REGULAR_EXPRESSION
"KFD does not support SVM event reporting"
WORKING_DIRECTORY
${CMAKE_CURRENT_BINARY_DIR})
+27
Zobrazit soubor
@@ -0,0 +1,27 @@
#!/usr/bin/env python3
import json
import pytest
def pytest_addoption(parser):
parser.addoption(
"--input",
action="store",
default="page-migration-test.json",
help="Input JSON",
)
@pytest.fixture
def input_data(request):
filename = request.config.getoption("--input")
data = None
with open(filename, "r") as inp:
data = json.load(inp)
if data["rocprofiler-sdk-json-tool"]["metadata"]["validate_page_migration"] is False:
return pytest.skip(
"Skipping test because KFD does not support SVM event reporting"
)
return data
+4
Zobrazit soubor
@@ -0,0 +1,4 @@
[pytest]
addopts = --durations=20 -rA -s -vv
testpaths = validate.py
+344
Zobrazit soubor
@@ -0,0 +1,344 @@
#!/usr/bin/env python3
from collections import defaultdict
import os
import sys
import pytest
# helper function
def node_exists(name, data, min_len=1):
assert name in data
assert data[name] is not None
if isinstance(data[name], (list, tuple, dict, set)):
assert len(data[name]) >= min_len
def to_dict(key_values):
a = defaultdict()
for kv in key_values:
a[kv["key"]] = kv["value"]
return a
def op_name(op_name, record):
found_op = False
op_key = None
for kind_node in record["names"]["kind_names"]:
if kind_node["value"] == op_name:
op_key = kind_node["key"]
for op_node in record["names"]["operation_names"]:
if op_node["key"] == op_key:
return op_key, to_dict(op_node["value"])
def dict_from_value_key(d):
ret_d = defaultdict()
for k, v in d.items():
assert v not in ret_d
ret_d[v] = k
return ret_d
def sort_by_timestamp(lines):
timestamp_line_map = {}
for log_line in lines:
timestamp = log_line.split(" ")[1]
timestamp_line_map[timestamp] = log_line
timestamps_sorted = sorted([l.split(" ")[1] for l in lines])
return timestamps_sorted, timestamp_line_map
# ------------------------------ Tests ------------------------------ #
def test_data_structure(input_data):
"""verify minimum amount of expected data is present"""
data = input_data
node_exists("rocprofiler-sdk-json-tool", data)
sdk_data = data["rocprofiler-sdk-json-tool"]
node_exists("metadata", sdk_data)
node_exists("pid", sdk_data["metadata"])
node_exists("main_tid", sdk_data["metadata"])
node_exists("init_time", sdk_data["metadata"])
node_exists("fini_time", sdk_data["metadata"])
node_exists("validate_page_migration", sdk_data["metadata"])
assert sdk_data["metadata"]["validate_page_migration"] is True
node_exists("agents", sdk_data)
node_exists("call_stack", sdk_data)
node_exists("callback_records", sdk_data)
node_exists("buffer_records", sdk_data)
node_exists("names", sdk_data["callback_records"])
node_exists("code_objects", sdk_data["callback_records"])
node_exists("kernel_symbols", sdk_data["callback_records"])
node_exists("hsa_api_traces", sdk_data["callback_records"])
node_exists("hip_api_traces", sdk_data["callback_records"], 0)
node_exists("marker_api_traces", sdk_data["callback_records"], 0)
node_exists("names", sdk_data["buffer_records"])
node_exists("kernel_dispatches", sdk_data["buffer_records"])
node_exists("memory_copies", sdk_data["buffer_records"], 0)
node_exists("hsa_api_traces", sdk_data["buffer_records"])
node_exists("hip_api_traces", sdk_data["buffer_records"], 0)
node_exists("marker_api_traces", sdk_data["buffer_records"], 0)
node_exists("retired_correlation_ids", sdk_data["buffer_records"])
node_exists("page_migration", sdk_data["buffer_records"])
def test_timestamps(input_data):
data = input_data
sdk_data = data["rocprofiler-sdk-json-tool"]
cb_start = {}
cb_end = {}
for titr in ["hsa_api_traces", "marker_api_traces", "hip_api_traces"]:
for itr in sdk_data["callback_records"][titr]:
cid = itr["record"]["correlation_id"]["internal"]
phase = itr["record"]["phase"]
if phase == 1:
cb_start[cid] = itr["timestamp"]
elif phase == 2:
cb_end[cid] = itr["timestamp"]
assert cb_start[cid] <= itr["timestamp"]
else:
assert phase == 1 or phase == 2
for itr in sdk_data["buffer_records"][titr]:
assert itr["start_timestamp"] <= itr["end_timestamp"]
for titr in ["kernel_dispatches", "memory_copies"]:
for itr in sdk_data["buffer_records"][titr]:
assert itr["start_timestamp"] < itr["end_timestamp"]
assert itr["correlation_id"]["internal"] > 0
assert itr["correlation_id"]["external"] > 0
assert sdk_data["metadata"]["init_time"] < itr["start_timestamp"]
assert sdk_data["metadata"]["init_time"] < itr["end_timestamp"]
assert sdk_data["metadata"]["fini_time"] > itr["start_timestamp"]
assert sdk_data["metadata"]["fini_time"] > itr["end_timestamp"]
# api_start = cb_start[itr["correlation_id"]["internal"]]
# api_end = cb_end[itr["correlation_id"]["internal"]]
# assert api_start < itr["start_timestamp"]
# assert api_end <= itr["end_timestamp"]
def test_internal_correlation_ids(input_data):
data = input_data
sdk_data = data["rocprofiler-sdk-json-tool"]
api_corr_ids = []
for titr in ["hsa_api_traces", "marker_api_traces", "hip_api_traces"]:
for itr in sdk_data["callback_records"][titr]:
api_corr_ids.append(itr["record"]["correlation_id"]["internal"])
for itr in sdk_data["buffer_records"][titr]:
api_corr_ids.append(itr["correlation_id"]["internal"])
api_corr_ids_sorted = sorted(api_corr_ids)
api_corr_ids_unique = list(set(api_corr_ids))
for itr in sdk_data["buffer_records"]["kernel_dispatches"]:
assert itr["correlation_id"]["internal"] in api_corr_ids_unique
for itr in sdk_data["buffer_records"]["memory_copies"]:
assert itr["correlation_id"]["internal"] in api_corr_ids_unique
len_corr_id_unq = len(api_corr_ids_unique)
assert len(api_corr_ids) != len_corr_id_unq
assert max(api_corr_ids_sorted) == len_corr_id_unq
def test_external_correlation_ids(input_data):
data = input_data
sdk_data = data["rocprofiler-sdk-json-tool"]
extern_corr_ids = []
for titr in ["hsa_api_traces", "marker_api_traces", "hip_api_traces"]:
for itr in sdk_data["callback_records"][titr]:
assert itr["record"]["correlation_id"]["external"] > 0
assert (
itr["record"]["thread_id"] == itr["record"]["correlation_id"]["external"]
)
extern_corr_ids.append(itr["record"]["correlation_id"]["external"])
extern_corr_ids = list(set(sorted(extern_corr_ids)))
for titr in ["hsa_api_traces", "marker_api_traces", "hip_api_traces"]:
for itr in sdk_data["buffer_records"][titr]:
assert itr["correlation_id"]["external"] > 0
assert itr["thread_id"] == itr["correlation_id"]["external"]
assert itr["thread_id"] in extern_corr_ids
assert itr["correlation_id"]["external"] in extern_corr_ids
for itr in sdk_data["buffer_records"]["kernel_dispatches"]:
assert itr["correlation_id"]["external"] > 0
assert itr["correlation_id"]["external"] in extern_corr_ids
for itr in sdk_data["buffer_records"]["memory_copies"]:
assert itr["correlation_id"]["external"] > 0
assert itr["correlation_id"]["external"] in extern_corr_ids
def test_kernel_ids(input_data):
data = input_data
sdk_data = data["rocprofiler-sdk-json-tool"]
symbol_info = {}
for itr in sdk_data["callback_records"]["kernel_symbols"]:
phase = itr["record"]["phase"]
payload = itr["payload"]
kern_id = payload["kernel_id"]
assert phase == 1 or phase == 2
assert kern_id > 0
if phase == 1:
assert len(payload["kernel_name"]) > 0
symbol_info[kern_id] = payload
elif phase == 2:
assert payload["kernel_id"] in symbol_info.keys()
assert payload["kernel_name"] == symbol_info[kern_id]["kernel_name"]
for itr in sdk_data["buffer_records"]["kernel_dispatches"]:
assert itr["kernel_id"] in symbol_info.keys()
def test_retired_correlation_ids(input_data):
data = input_data
sdk_data = data["rocprofiler-sdk-json-tool"]
def _sort_dict(inp):
return dict(sorted(inp.items()))
api_corr_ids = {}
for titr in ["hsa_api_traces", "marker_api_traces", "hip_api_traces"]:
for itr in sdk_data["buffer_records"][titr]:
corr_id = itr["correlation_id"]["internal"]
assert corr_id not in api_corr_ids.keys()
api_corr_ids[corr_id] = itr
async_corr_ids = {}
for titr in ["kernel_dispatches", "memory_copies"]:
for itr in sdk_data["buffer_records"][titr]:
corr_id = itr["correlation_id"]["internal"]
assert corr_id not in async_corr_ids.keys()
async_corr_ids[corr_id] = itr
retired_corr_ids = {}
for itr in sdk_data["buffer_records"]["retired_correlation_ids"]:
corr_id = itr["internal_correlation_id"]
assert corr_id not in retired_corr_ids.keys()
retired_corr_ids[corr_id] = itr
api_corr_ids = _sort_dict(api_corr_ids)
async_corr_ids = _sort_dict(async_corr_ids)
retired_corr_ids = _sort_dict(retired_corr_ids)
for cid, itr in async_corr_ids.items():
assert cid in retired_corr_ids.keys()
ts = retired_corr_ids[cid]["timestamp"]
assert (ts - itr["end_timestamp"]) > 0, f"correlation-id: {cid}, data: {itr}"
for cid, itr in api_corr_ids.items():
assert cid in retired_corr_ids.keys()
ts = retired_corr_ids[cid]["timestamp"]
assert (ts - itr["end_timestamp"]) > 0, f"correlation-id: {cid}, data: {itr}"
assert len(api_corr_ids.keys()) == (len(retired_corr_ids.keys()))
def get_allocated_pages(callback_records):
# Get how many pages we allocated
hip_api_traces = callback_records["hip_api_traces"]
_, op_dict = op_name("HIP_RUNTIME_API", callback_records)
op_key = [k for k, v in op_dict.items() if v == "hipHostRegister"][0]
host_register_record = []
for r in hip_api_traces:
if (
r["record"]["operation"] == op_key
and "sizeBytes" in r["args"]
and "hostPtr" in r["args"]
):
host_register_record.append(r)
assert len(host_register_record) == 1
alloc_size = int(host_register_record[0]["args"]["sizeBytes"], 10)
start_addr = int(host_register_record[0]["args"]["hostPtr"], 16)
end_addr = start_addr + alloc_size
return start_addr, end_addr, alloc_size
def test_page_migration_data(input_data):
data = input_data
sdk_data = data["rocprofiler-sdk-json-tool"]
buffer_records = sdk_data["buffer_records"]
callback_records = sdk_data["callback_records"]
page_migtation_buffers = buffer_records["page_migration"]
bf_op_id, bf_op_names = op_name("PAGE_MIGRATION", buffer_records)
assert bf_op_names[0] == "NONE"
assert "PAGE_MIGRATE" in str(bf_op_names)
assert len(bf_op_names) == 5
node_ids = set(x["gpu_id"] for x in sdk_data["agents"])
start_addr, end_addr, alloc_size = get_allocated_pages(callback_records)
assert start_addr < end_addr and start_addr + alloc_size == end_addr
assert int(alloc_size) == 16 * 4096 # We allocated 16 pages in the test
# PID must be same
assert len(set(r["pid"] for r in page_migtation_buffers)) == 1
for r in page_migtation_buffers:
op = r["operation"]
assert r["size"] == 136
assert r["kind"] == bf_op_id
assert op != 0 and bf_op_names[op] != "NONE"
assert bf_op_names[op].lower() in r
if "page_migrate" in r:
assert r["page_migrate"]["from_node"] in node_ids
assert r["page_migrate"]["to_node"] in node_ids
assert r["page_migrate"]["prefetch_node"] in node_ids
assert r["page_migrate"]["preferred_node"] in node_ids
assert r["page_migrate"]["trigger"] >= 0
if "queue_suspend" in r:
assert r["queue_suspend"]["trigger"] >= 0
assert r["queue_suspend"]["node_id"] in node_ids
if "unmap_from_gpu" in r:
assert r["unmap_from_gpu"]["trigger"] >= 0
# unmap is "instantaneous"
assert 0 < r["start_timestamp"] == r["end_timestamp"]
else:
assert 0 < r["start_timestamp"] < r["end_timestamp"]
# Check for events with our page
for r in page_migtation_buffers:
if "page_migrate" in r and r["page_migrate"]["start_addr"] == start_addr:
assert end_addr == r["page_migrate"]["end_addr"]
if "unmap_from_gpu" in r and r["unmap_from_gpu"]["start_addr"] == start_addr:
assert end_addr == r["unmap_from_gpu"]["end_addr"]
# TODO: Check if a migrate a->b is paired up with b->a
# It may not always be reported towards app finalization
if __name__ == "__main__":
exit_code = pytest.main(["-x", __file__] + sys.argv[1:])
sys.exit(exit_code)
+49 -4
Zobrazit soubor
@@ -312,6 +312,7 @@ get_buffer_tracing_names()
ROCPROFILER_BUFFER_TRACING_MARKER_NAME_API,
ROCPROFILER_BUFFER_TRACING_MEMORY_COPY,
ROCPROFILER_BUFFER_TRACING_SCRATCH_MEMORY,
ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION,
};
auto cb_name_info = buffer_name_info{};
@@ -758,6 +759,7 @@ auto hip_api_bf_records = std::deque<rocprofiler_buffer_tracing_hip_api_
auto kernel_dispatch_bf_records = std::deque<rocprofiler_buffer_tracing_kernel_dispatch_record_t>{};
auto memory_copy_records = std::deque<rocprofiler_buffer_tracing_memory_copy_record_t>{};
auto scratch_memory_records = std::deque<rocprofiler_buffer_tracing_scratch_memory_record_t>{};
auto page_migration_records = std::deque<rocprofiler_buffer_tracing_page_migration_record_t>{};
auto corr_id_retire_records =
std::deque<rocprofiler_buffer_tracing_correlation_id_retirement_record_t>{};
@@ -842,6 +844,13 @@ tool_tracing_buffered(rocprofiler_context_id_t /*context*/,
scratch_memory_records.emplace_back(*record);
}
else if(header->kind == ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION)
{
auto* record = static_cast<rocprofiler_buffer_tracing_page_migration_record_t*>(
header->payload);
page_migration_records.emplace_back(*record);
}
else if(header->kind == ROCPROFILER_BUFFER_TRACING_CORRELATION_ID_RETIREMENT)
{
auto* record =
@@ -927,12 +936,14 @@ rocprofiler_context_id_t scratch_memory_ctx = {};
rocprofiler_context_id_t corr_id_retire_ctx = {};
rocprofiler_context_id_t kernel_dispatch_callback_ctx = {};
rocprofiler_context_id_t kernel_dispatch_buffered_ctx = {};
rocprofiler_context_id_t page_migration_ctx = {};
// buffers
rocprofiler_buffer_id_t hsa_api_buffered_buffer = {};
rocprofiler_buffer_id_t hip_api_buffered_buffer = {};
rocprofiler_buffer_id_t marker_api_buffered_buffer = {};
rocprofiler_buffer_id_t kernel_dispatch_buffer = {};
rocprofiler_buffer_id_t memory_copy_buffer = {};
rocprofiler_buffer_id_t page_migration_buffer = {};
rocprofiler_buffer_id_t counter_collection_buffer = {};
rocprofiler_buffer_id_t scratch_memory_buffer = {};
rocprofiler_buffer_id_t corr_id_retire_buffer = {};
@@ -948,25 +959,28 @@ auto contexts = std::unordered_map<std::string_view, rocprofiler_context_id_t*>{
{"MARKER_API_BUFFERED", &marker_api_buffered_ctx},
{"KERNEL_DISPATCH_BUFFERED", &kernel_dispatch_buffered_ctx},
{"MEMORY_COPY", &memory_copy_ctx},
{"PAGE_MIGRATION", &page_migration_ctx},
{"COUNTER_COLLECTION", &counter_collection_ctx},
{"SCRATCH_MEMORY", &scratch_memory_ctx},
{"CORRELATION_ID_RETIREMENT", &corr_id_retire_ctx},
};
auto buffers = std::array<rocprofiler_buffer_id_t*, 8>{&hsa_api_buffered_buffer,
auto buffers = std::array<rocprofiler_buffer_id_t*, 9>{&hsa_api_buffered_buffer,
&hip_api_buffered_buffer,
&marker_api_buffered_buffer,
&kernel_dispatch_buffer,
&memory_copy_buffer,
&scratch_memory_buffer,
&page_migration_buffer,
&counter_collection_buffer,
&corr_id_retire_buffer};
auto agents = std::vector<rocprofiler_agent_t>{};
rocprofiler_timestamp_t init_time = 0;
rocprofiler_timestamp_t fini_time = 0;
rocprofiler_thread_id_t main_tid = 0;
rocprofiler_timestamp_t init_time = 0;
rocprofiler_timestamp_t fini_time = 0;
rocprofiler_thread_id_t main_tid = 0;
auto page_migration_status = ROCPROFILER_STATUS_SUCCESS;
int
tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
@@ -1142,6 +1156,15 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
&scratch_memory_buffer),
"buffer creation");
ROCPROFILER_CALL(rocprofiler_create_buffer(page_migration_ctx,
buffer_size,
watermark,
ROCPROFILER_BUFFER_POLICY_LOSSLESS,
tool_tracing_buffered,
tool_data,
&page_migration_buffer),
"buffer creation");
ROCPROFILER_CALL(rocprofiler_create_buffer(corr_id_retire_ctx,
buffer_size,
watermark,
@@ -1226,6 +1249,23 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
scratch_memory_buffer),
"buffer tracing service for scratch memory configure");
{
page_migration_status =
rocprofiler_configure_buffer_tracing_service(page_migration_ctx,
ROCPROFILER_BUFFER_TRACING_PAGE_MIGRATION,
nullptr,
0,
page_migration_buffer);
constexpr auto message = "page migration service for memory copy configure";
if(page_migration_status == ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_KERNEL)
std::cerr << message
<< " failed: " << rocprofiler_get_status_string(page_migration_status)
<< std::endl;
else
ROCPROFILER_CALL(page_migration_status, message);
}
ROCPROFILER_CALL(rocprofiler_configure_buffer_tracing_service(
corr_id_retire_ctx,
ROCPROFILER_BUFFER_TRACING_CORRELATION_ID_RETIREMENT,
@@ -1382,6 +1422,7 @@ tool_fini(void* tool_data)
<< ", kernel_dispatch_bf_records=" << kernel_dispatch_bf_records.size()
<< ", memory_copy_records=" << memory_copy_records.size()
<< ", scratch_memory_records=" << scratch_memory_records.size()
<< ", page_migration=" << page_migration_records.size()
<< ", hsa_api_bf_records=" << hsa_api_bf_records.size()
<< ", hip_api_bf_records=" << hip_api_bf_records.size()
<< ", marker_api_bf_records=" << marker_api_bf_records.size()
@@ -1444,6 +1485,8 @@ write_json(call_stack_t* _call_stack)
auto json_ar = JSONOutputArchive{*ofs, json_opts};
auto buffer_name_info = get_buffer_tracing_names();
auto callback_name_info = get_callback_tracing_names();
auto validate_page_migration =
(page_migration_status != ROCPROFILER_STATUS_ERROR_INCOMPATIBLE_KERNEL);
json_ar.setNextName("rocprofiler-sdk-json-tool");
json_ar.startNode();
@@ -1454,6 +1497,7 @@ write_json(call_stack_t* _call_stack)
json_ar(cereal::make_nvp("main_tid", main_tid));
json_ar(cereal::make_nvp("init_time", init_time));
json_ar(cereal::make_nvp("fini_time", fini_time));
json_ar(cereal::make_nvp("validate_page_migration", validate_page_migration));
json_ar.finishNode();
json_ar(cereal::make_nvp("agents", agents));
@@ -1487,6 +1531,7 @@ write_json(call_stack_t* _call_stack)
json_ar(cereal::make_nvp("kernel_dispatches", kernel_dispatch_bf_records));
json_ar(cereal::make_nvp("memory_copies", memory_copy_records));
json_ar(cereal::make_nvp("scratch_memory_traces", scratch_memory_records));
json_ar(cereal::make_nvp("page_migration", page_migration_records));
json_ar(cereal::make_nvp("hsa_api_traces", hsa_api_bf_records));
json_ar(cereal::make_nvp("hip_api_traces", hip_api_bf_records));
json_ar(cereal::make_nvp("marker_api_traces", marker_api_bf_records));