Files
rocm-systems/source/lib/rocprofiler-sdk/tests/page_migration.cpp
T
Mythreya 363f85dc72 Report page migration events as start/end (#793)
* Squashed commit of the following:

commit b76f2635f4b65599f03812a73d0cf410f5ada213
Author: Mythreya <mythreya.kuricheti@amd.com>
Date:   Fri Apr 26 00:29:09 2024 +0000

    Changed for PR feedback

commit bedb8ad566ff42fbf117b19202c26c507abcf8ac
Author: Jonathan R. Madsen <jonathanrmadsen@gmail.com>
Date:   Thu Apr 25 19:20:06 2024 -0500

    Fix installation

commit a98f8a69459a1450a1be9c98e20b3c1e7f2568c2
Author: Jonathan R. Madsen <jonathanrmadsen@gmail.com>
Date:   Thu Apr 25 19:16:35 2024 -0500

    Restructure the headers

commit 46489a020ffafdd5f4ce3f580469ff233ef67fe1
Author: Mythreya <mythreya.kuricheti@amd.com>
Date:   Tue Apr 23 23:31:10 2024 +0000

    Update hsa include

commit 8e795282cce348fc6aa736b7857b21aeb32aa20a
Author: Mythreya <mythreya.kuricheti@amd.com>
Date:   Tue Apr 23 23:02:32 2024 +0000

    Report page migration events as start/end

    * Updated tests accordingly
    * Page migration events are reported independently

commit 8784e5ad4895a626a2a8e4ac12f8021b34172bd4
Author: Mythreya <mythreya.kuricheti@amd.com>
Date:   Tue Apr 16 17:01:57 2024 +0000

    Update handling of dropped page migration events

    Previously, we dropped all locally buffered events when we detect that
    KFD has dropped some events. This may drop too many pending events too eagerly.

    When we receive an end event and cannot find the corresponding start,
    we can be sure that KFD has dropped some events in the immediate past.

    When this happens, we look through all locally buffered events and report
    the start events that are older than 10s as partial events --- they have
    no "end" information (we expect that the end events have been dropped).

    We also set the polling timeout to 10s to prevent the local buffer from
    getting too large with events waiting to be paired up.

    Updated tests

commit 2e8e0b07eeda9b5990e1ae8d28dcd3a035ce38e1
Author: Mythreya <mythreya.kuricheti@amd.com>
Date:   Tue Apr 16 17:01:31 2024 +0000

    Docs for triggers

* Fix page migration sample

* Fix hasher, kfd install

* Add hsa include
* Install KFD include dir

* Updates from code review

- single timestamp field
- node_id -> agent_id
- from_node -> from_agent
- to_node -> to_agent

* Misc revisions

* Remove page-migration install target

* Update page-migration pytest

* Tweak to serialization

* Address PR comments

* Update page-migration test

* Add cli args, update iterations

* Address PR comments

* Add abi.cpp for static_asserts
* Update page_migration gtest with only runtime tests
* Moved helpers into utils.hpp

---------

Co-authored-by: Jonathan R. Madsen <jonathanrmadsen@gmail.com>
2024-11-11 11:08:47 -06:00

106 γραμμές
4.3 KiB
C++

// MIT License
//
// Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include "lib/common/container/small_vector.hpp"
#include "lib/common/defines.hpp"
#include "lib/common/mpl.hpp"
#include "lib/rocprofiler-sdk/page_migration/utils.hpp"
#include <rocprofiler-sdk/buffer_tracing.h>
#include <rocprofiler-sdk/fwd.h>
#include <rocprofiler-sdk/rocprofiler.h>
#include <fmt/core.h>
#include <gtest/gtest.h>
#include <sstream>
#include <string_view>
#include <utility>
#define ROCPROFILER_LIB_ROCPROFILER_SDK_PAGE_MIGRATION_PAGE_MIGRATION_CPP_IMPL 1
#include "lib/rocprofiler-sdk/page_migration/page_migration.def.cpp"
#undef ROCPROFILER_LIB_ROCPROFILER_SDK_PAGE_MIGRATION_PAGE_MIGRATION_CPP_IMPL
namespace
{
constexpr std::string_view MULTILINE_STRING = "This is 0 Line 0\n"
"This is 10 Line 1\n"
"This is 20 Line 2\n"
"This is 30 Line 3\n"
"This is 40 Line 4\n";
}
void
return_line(const std::string_view line)
{
static int line_no = 0;
std::stringstream strs{};
strs << fmt::format("This is {} Line {}", line_no * 10, line_no);
EXPECT_EQ(strs.str(), line);
line_no++;
}
auto
parse_lines()
{
rocprofiler::page_migration::kfd_readlines(MULTILINE_STRING, return_line);
}
TEST(page_migration, readlines)
{
// Ensure all lines are read
parse_lines();
}
TEST(page_migtation, rocprof_kfd_map)
{
using namespace rocprofiler::page_migration;
using namespace rocprofiler::common::container;
using rocprofiler_page_migration_seq_t =
std::make_index_sequence<ROCPROFILER_PAGE_MIGRATION_LAST>;
const small_vector<size_t> vec{ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_END,
ROCPROFILER_PAGE_MIGRATION_QUEUE_EVICTION,
ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU};
EXPECT_EQ((page_migration_info<ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_END>::kfd_bitmask |
page_migration_info<ROCPROFILER_PAGE_MIGRATION_QUEUE_EVICTION>::kfd_bitmask |
page_migration_info<ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU>::kfd_bitmask),
kfd_bitmask(vec, rocprofiler_page_migration_seq_t{}));
const auto to_kfd_str = [](kfd_smi_event e) {
std::string str = fmt::format("{:x} ", static_cast<size_t>(e));
return rocprofiler::page_migration::get_rocprof_op({str});
};
// clang-format off
EXPECT_EQ(to_kfd_str(KFD_SMI_EVENT_MIGRATE_START), ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_START);
EXPECT_EQ(to_kfd_str(KFD_SMI_EVENT_MIGRATE_END), ROCPROFILER_PAGE_MIGRATION_PAGE_MIGRATE_END);
EXPECT_EQ(to_kfd_str(KFD_SMI_EVENT_PAGE_FAULT_START), ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT_START);
EXPECT_EQ(to_kfd_str(KFD_SMI_EVENT_PAGE_FAULT_END), ROCPROFILER_PAGE_MIGRATION_PAGE_FAULT_END);
EXPECT_EQ(to_kfd_str(KFD_SMI_EVENT_QUEUE_EVICTION), ROCPROFILER_PAGE_MIGRATION_QUEUE_EVICTION);
EXPECT_EQ(to_kfd_str(KFD_SMI_EVENT_QUEUE_RESTORE), ROCPROFILER_PAGE_MIGRATION_QUEUE_RESTORE);
EXPECT_EQ(to_kfd_str(KFD_SMI_EVENT_UNMAP_FROM_GPU), ROCPROFILER_PAGE_MIGRATION_UNMAP_FROM_GPU);
// clang-format on
}