15c82d6da8
## Motivation Enable UCX communication tracing and communication metadata ## Technical Details Implement UCX API wrappers to trace transport-layer communication. This adds communication data tracking and exposes “UCX Comm Send/Recv” timelines, enabling detailed analysis of MPI, OpenSHMEM, and other UCX-based runtime communication patterns. - Implements function interception for UCX functions across multiple categories using gotcha component. - Extended comm_data component to track UCX send/recv operations - Added ucx_send and ucx_recv labels for Perfetto counter tracks. Integrated UCX data tracking with existing MPI/RCCL tracking infrastructure. - Added ROCPROFSYS_USE_UCX configuration option (enabled by default). - Created FindUCX.cmake module for UCX header detection. Falls back to internal UCX headers if system headers not found. - Updated all Dockerfiles to include UCX dependencies.
375 line
17 KiB
C++
375 line
17 KiB
C++
// MIT License
|
|
//
|
|
// Copyright (c) 2022-2025 Advanced Micro Devices, Inc. All Rights Reserved.
|
|
//
|
|
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
// of this software and associated documentation files (the "Software"), to deal
|
|
// in the Software without restriction, including without limitation the rights
|
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
// copies of the Software, and to permit persons to whom the Software is
|
|
// furnished to do so, subject to the following conditions:
|
|
//
|
|
// The above copyright notice and this permission notice shall be included in all
|
|
// copies or substantial portions of the Software.
|
|
//
|
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
// SOFTWARE.
|
|
|
|
#include "library/components/ucx_gotcha.hpp"
|
|
#include "core/common.hpp"
|
|
#include "core/config.hpp"
|
|
#include "core/state.hpp"
|
|
#include "core/timemory.hpp"
|
|
#include "library/components/category_region.hpp"
|
|
#include "library/runtime.hpp"
|
|
|
|
#include <timemory/backends/threading.hpp>
|
|
#include <timemory/components/macros.hpp>
|
|
#include <timemory/mpl/concepts.hpp>
|
|
#include <timemory/utility/types.hpp>
|
|
|
|
#include <cstddef>
|
|
#include <cstdlib>
|
|
|
|
namespace rocprofsys
|
|
{
|
|
namespace component
|
|
{
|
|
namespace
|
|
{
|
|
auto&
|
|
get_ucx_gotcha()
|
|
{
|
|
static auto _v = tim::lightweight_tuple<ucx_gotcha_t>{};
|
|
return _v;
|
|
}
|
|
} // namespace
|
|
|
|
void
|
|
ucx_gotcha::configure()
|
|
{
|
|
// don't emit warnings for missing UCX functions unless debug or verbosity >= 3
|
|
if(get_verbose_env() < 3 && !get_debug_env())
|
|
{
|
|
for(size_t i = 0; i < ucx_gotcha_t::capacity(); ++i)
|
|
{
|
|
auto* itr = ucx_gotcha_t::at(i);
|
|
if(itr) itr->verbose = -1;
|
|
}
|
|
}
|
|
|
|
ucx_gotcha_t::get_initializer() = []() {
|
|
// Active Message
|
|
ucx_gotcha_t::configure<0, void*, void*, unsigned, void*, size_t, void*, size_t,
|
|
unsigned, void*>("ucp_am_send_nb");
|
|
ucx_gotcha_t::configure<1, void*, void*, unsigned, const void*, size_t,
|
|
const void*, size_t, const void*>("ucp_am_send_nbx");
|
|
ucx_gotcha_t::configure<2, void*, void*, void*, size_t, void*>(
|
|
"ucp_am_recv_data_nbx");
|
|
ucx_gotcha_t::configure<3, void, void*, void*>("ucp_am_data_release");
|
|
|
|
// Atomic operations
|
|
ucx_gotcha_t::configure<4, void*, void*, uint32_t, uint64_t, void*>(
|
|
"ucp_atomic_add32");
|
|
ucx_gotcha_t::configure<5, void*, void*, uint64_t, uint64_t, void*>(
|
|
"ucp_atomic_add64");
|
|
ucx_gotcha_t::configure<6, void*, void*, uint32_t, uint32_t, uint64_t, void*>(
|
|
"ucp_atomic_cswap32");
|
|
ucx_gotcha_t::configure<7, void*, void*, uint64_t, uint64_t, uint64_t, void*>(
|
|
"ucp_atomic_cswap64");
|
|
ucx_gotcha_t::configure<8, void*, void*, uint32_t, uint64_t, void*, void*>(
|
|
"ucp_atomic_fadd32");
|
|
ucx_gotcha_t::configure<9, void*, void*, uint64_t, uint64_t, void*, void*>(
|
|
"ucp_atomic_fadd64");
|
|
ucx_gotcha_t::configure<10, void*, void*, uint32_t, uint64_t, void*, void*>(
|
|
"ucp_atomic_swap32");
|
|
ucx_gotcha_t::configure<11, void*, void*, uint64_t, uint64_t, void*, void*>(
|
|
"ucp_atomic_swap64");
|
|
ucx_gotcha_t::configure<12, int, void*, int, uint64_t, const void*, size_t,
|
|
void*>("ucp_atomic_post");
|
|
ucx_gotcha_t::configure<13, void*, void*, int, uint64_t, void*, size_t, void*,
|
|
void*>("ucp_atomic_fetch_nb");
|
|
ucx_gotcha_t::configure<14, void*, void*, unsigned, void*, void*, size_t,
|
|
uint64_t, void*>("ucp_atomic_op_nbx");
|
|
|
|
// Cleanup and config
|
|
ucx_gotcha_t::configure<15, void, void*>("ucp_cleanup");
|
|
ucx_gotcha_t::configure<16, int, void*, const char*, const char*, const char*>(
|
|
"ucp_config_modify");
|
|
ucx_gotcha_t::configure<17, int, const char*, const char*, void**>(
|
|
"ucp_config_read");
|
|
ucx_gotcha_t::configure<18, void, void*>("ucp_config_release");
|
|
|
|
// Connection management
|
|
ucx_gotcha_t::configure<19, void*, void*, unsigned>("ucp_disconnect_nb");
|
|
|
|
// Datatype
|
|
ucx_gotcha_t::configure<20, int, void*, void**>("ucp_dt_create_generic");
|
|
ucx_gotcha_t::configure<21, void, void*>("ucp_dt_destroy");
|
|
|
|
// Endpoint
|
|
ucx_gotcha_t::configure<22, int, void*, const void*, void**>("ucp_ep_create");
|
|
ucx_gotcha_t::configure<23, void, void*>("ucp_ep_destroy");
|
|
ucx_gotcha_t::configure<24, void*, void*, const void*>("ucp_ep_modify_nb");
|
|
ucx_gotcha_t::configure<25, void*, void*, const void*>("ucp_ep_close_nbx");
|
|
ucx_gotcha_t::configure<26, int, void*>("ucp_ep_flush");
|
|
ucx_gotcha_t::configure<27, void*, void*, unsigned, void*>("ucp_ep_flush_nb");
|
|
ucx_gotcha_t::configure<28, void*, void*, const void*>("ucp_ep_flush_nbx");
|
|
|
|
// Listener
|
|
ucx_gotcha_t::configure<29, int, void*, const void*, void**>(
|
|
"ucp_listener_create");
|
|
ucx_gotcha_t::configure<30, void, void*>("ucp_listener_destroy");
|
|
ucx_gotcha_t::configure<31, int, void*, void*>("ucp_listener_query");
|
|
ucx_gotcha_t::configure<32, int, void*, void*>("ucp_listener_reject");
|
|
|
|
// Memory
|
|
ucx_gotcha_t::configure<33, int, void*, void*, size_t, int>("ucp_mem_advise");
|
|
ucx_gotcha_t::configure<34, int, void*, const void*, void**>("ucp_mem_map");
|
|
ucx_gotcha_t::configure<35, int, void*, void*>("ucp_mem_unmap");
|
|
ucx_gotcha_t::configure<36, int, void*, void*>("ucp_mem_query");
|
|
|
|
// Put/Get operations
|
|
ucx_gotcha_t::configure<37, int, void*, const void*, size_t, uint64_t, void*>(
|
|
"ucp_put");
|
|
ucx_gotcha_t::configure<38, int, void*, void*, size_t, uint64_t, void*>(
|
|
"ucp_get");
|
|
ucx_gotcha_t::configure<39, int, void*, const void*, size_t, uint64_t, void*>(
|
|
"ucp_put_nbi");
|
|
ucx_gotcha_t::configure<40, int, void*, void*, size_t, uint64_t, void*>(
|
|
"ucp_get_nbi");
|
|
ucx_gotcha_t::configure<41, void*, void*, const void*, size_t, uint64_t, void*,
|
|
void*>("ucp_put_nb");
|
|
ucx_gotcha_t::configure<42, void*, void*, void*, size_t, uint64_t, void*, void*>(
|
|
"ucp_get_nb");
|
|
ucx_gotcha_t::configure<43, void*, void*, const void*, size_t, uint64_t, void*,
|
|
const void*>("ucp_put_nbx");
|
|
ucx_gotcha_t::configure<44, void*, void*, void*, size_t, uint64_t, void*,
|
|
const void*>("ucp_get_nbx");
|
|
|
|
// Request
|
|
ucx_gotcha_t::configure<45, void*, void*>("ucp_request_alloc");
|
|
ucx_gotcha_t::configure<46, void, void*, void*>("ucp_request_cancel");
|
|
ucx_gotcha_t::configure<47, int, void*>("ucp_request_is_completed");
|
|
|
|
// Remote key
|
|
ucx_gotcha_t::configure<48, void, void*>("ucp_rkey_buffer_release");
|
|
ucx_gotcha_t::configure<49, void, void*>("ucp_rkey_destroy");
|
|
ucx_gotcha_t::configure<50, int, void*, void*, void**, size_t*>("ucp_rkey_pack");
|
|
ucx_gotcha_t::configure<51, int, void*, void*, void**>("ucp_rkey_ptr");
|
|
|
|
// Stream
|
|
ucx_gotcha_t::configure<52, void, void*, void*>("ucp_stream_data_release");
|
|
ucx_gotcha_t::configure<53, void*, void*, void*, size_t, size_t*, unsigned,
|
|
void*>("ucp_stream_recv_data_nb");
|
|
ucx_gotcha_t::configure<54, void*, void*, const void*, size_t, void*>(
|
|
"ucp_stream_send_nb");
|
|
ucx_gotcha_t::configure<55, void*, void*, void*, size_t, size_t*, void*>(
|
|
"ucp_stream_recv_nb");
|
|
ucx_gotcha_t::configure<56, void*, void*, const void*, size_t, const void*>(
|
|
"ucp_stream_send_nbx");
|
|
ucx_gotcha_t::configure<57, void*, void*, void*, size_t, size_t*, const void*>(
|
|
"ucp_stream_recv_nbx");
|
|
ucx_gotcha_t::configure<58, void*, void*>("ucp_stream_worker_poll");
|
|
|
|
// Tag matching
|
|
ucx_gotcha_t::configure<59, void*, void*, void*, void*, size_t, void*, void*>(
|
|
"ucp_tag_msg_recv_nb");
|
|
ucx_gotcha_t::configure<60, void*, void*, void*, void*, size_t, const void*>(
|
|
"ucp_tag_msg_recv_nbx");
|
|
ucx_gotcha_t::configure<61, void*, void*, const void*, size_t, void*, void*>(
|
|
"ucp_tag_send_nbr");
|
|
ucx_gotcha_t::configure<62, void*, void*, void*, size_t, void*, void*, void*>(
|
|
"ucp_tag_recv_nbr");
|
|
ucx_gotcha_t::configure<63, void*, void*, const void*, size_t, void*, void*>(
|
|
"ucp_tag_send_nb");
|
|
ucx_gotcha_t::configure<64, void*, void*, void*, size_t, void*, void*, void*>(
|
|
"ucp_tag_recv_nb");
|
|
ucx_gotcha_t::configure<65, void*, void*, const void*, size_t, uint64_t,
|
|
const void*>("ucp_tag_send_nbx");
|
|
ucx_gotcha_t::configure<66, void*, void*, void*, size_t, uint64_t, uint64_t,
|
|
const void*>("ucp_tag_recv_nbx");
|
|
ucx_gotcha_t::configure<67, void*, void*, const void*, size_t, uint64_t, void*>(
|
|
"ucp_tag_send_sync_nb");
|
|
ucx_gotcha_t::configure<68, void*, void*, const void*, size_t, uint64_t,
|
|
const void*>("ucp_tag_send_sync_nbx");
|
|
|
|
// Worker
|
|
ucx_gotcha_t::configure<69, int, void*, const void*, void**>("ucp_worker_create");
|
|
ucx_gotcha_t::configure<70, void, void*>("ucp_worker_destroy");
|
|
ucx_gotcha_t::configure<71, int, void*, void**, size_t*>(
|
|
"ucp_worker_get_address");
|
|
ucx_gotcha_t::configure<72, int, void*, int*>("ucp_worker_get_efd");
|
|
ucx_gotcha_t::configure<73, int, void*>("ucp_worker_arm");
|
|
ucx_gotcha_t::configure<74, int, void*>("ucp_worker_fence");
|
|
ucx_gotcha_t::configure<75, int, void*>("ucp_worker_wait");
|
|
ucx_gotcha_t::configure<76, int, void*>("ucp_worker_signal");
|
|
ucx_gotcha_t::configure<77, int, void*, void*, size_t, void*>(
|
|
"ucp_worker_wait_mem");
|
|
ucx_gotcha_t::configure<78, int, void*>("ucp_worker_flush");
|
|
ucx_gotcha_t::configure<79, void*, void*, unsigned, void*>("ucp_worker_flush_nb");
|
|
ucx_gotcha_t::configure<80, void*, void*, unsigned, void*>(
|
|
"ucp_worker_flush_nbx");
|
|
ucx_gotcha_t::configure<81, int, void*, unsigned, void*, void*, void*>(
|
|
"ucp_worker_set_am_handler");
|
|
ucx_gotcha_t::configure<82, int, void*, const void*>(
|
|
"ucp_worker_set_am_recv_handler");
|
|
ucx_gotcha_t::configure<83, unsigned, void*>("ucp_worker_progress");
|
|
|
|
// UCT Active Message (low-level transport)
|
|
ucx_gotcha_t::configure<84, ssize_t, void*, unsigned, void*, void*>(
|
|
"uct_ep_am_bcopy");
|
|
ucx_gotcha_t::configure<85, ssize_t, void*, unsigned, const void*, unsigned,
|
|
const void*, size_t, void*>("uct_ep_am_zcopy");
|
|
ucx_gotcha_t::configure<86, ssize_t, void*, unsigned, uint64_t, const void*,
|
|
unsigned>("uct_ep_am_short");
|
|
ucx_gotcha_t::configure<87, unsigned, void*>("uct_iface_progress");
|
|
ucx_gotcha_t::configure<88, int, void*, unsigned, void*, void*, unsigned>(
|
|
"uct_iface_set_am_handler");
|
|
|
|
// Legacy UCX function variants that might be used on older systems
|
|
ucx_gotcha_t::configure<89, void*, void*, const void*, size_t, void*>(
|
|
"ucp_tag_send");
|
|
ucx_gotcha_t::configure<90, void*, void*, void*, size_t, void*, void*>(
|
|
"ucp_tag_recv");
|
|
ucx_gotcha_t::configure<91, void*, void*, const void*, size_t, int, int, void*>(
|
|
"ucp_send");
|
|
ucx_gotcha_t::configure<92, void*, void*, void*, size_t, int, int, void*>(
|
|
"ucp_recv");
|
|
};
|
|
}
|
|
|
|
void
|
|
ucx_gotcha::shutdown()
|
|
{
|
|
ucx_gotcha_t::disable();
|
|
}
|
|
|
|
void
|
|
ucx_gotcha::start()
|
|
{
|
|
if(!get_ucx_gotcha().get<ucx_gotcha_t>()->get_is_running())
|
|
{
|
|
configure();
|
|
get_ucx_gotcha().start();
|
|
}
|
|
}
|
|
|
|
void
|
|
ucx_gotcha::stop()
|
|
{}
|
|
|
|
// Generic audit functions now handled by template in header
|
|
|
|
// Specific audit functions for tag operations
|
|
void
|
|
ucx_gotcha::audit(const gotcha_data& _data, audit::incoming, void* arg1, const void* arg2,
|
|
size_t arg3, uint64_t arg4, const void* arg5)
|
|
{
|
|
category_region<category::ucx>::start(std::string_view{ _data.tool_id }, "ep", arg1,
|
|
"buffer", arg2, "count", arg3, "tag", arg4,
|
|
"param", arg5);
|
|
|
|
// Also trigger communication data tracking
|
|
comm_data::audit(_data, audit::incoming{}, arg1, arg2, arg3, arg4, arg5);
|
|
}
|
|
|
|
void
|
|
ucx_gotcha::audit(const gotcha_data& _data, audit::incoming, void* arg1, void* arg2,
|
|
size_t arg3, uint64_t arg4, uint64_t arg5, const void* arg6)
|
|
{
|
|
category_region<category::ucx>::start(std::string_view{ _data.tool_id }, "worker",
|
|
arg1, "buffer", arg2, "count", arg3, "tag",
|
|
arg4, "tag_mask", arg5, "param", arg6);
|
|
|
|
// Also trigger communication data tracking
|
|
comm_data::audit(_data, audit::incoming{}, arg1, arg2, arg3, arg4, arg5, arg6);
|
|
}
|
|
|
|
// RMA operations
|
|
void
|
|
ucx_gotcha::audit(const gotcha_data& _data, audit::incoming, void* arg1, const void* arg2,
|
|
size_t arg3, uint64_t arg4, void* arg5, const void* arg6)
|
|
{
|
|
category_region<category::ucx>::start(std::string_view{ _data.tool_id }, "ep", arg1,
|
|
"buffer", arg2, "count", arg3, "remote_addr",
|
|
arg4, "rkey", arg5, "param", arg6);
|
|
|
|
// Also trigger communication data tracking
|
|
comm_data::audit(_data, audit::incoming{}, arg1, arg2, arg3, arg4, arg5, arg6);
|
|
}
|
|
|
|
void
|
|
ucx_gotcha::audit(const gotcha_data& _data, audit::incoming, void* arg1, void* arg2,
|
|
size_t arg3, uint64_t arg4, void* arg5, const void* arg6)
|
|
{
|
|
category_region<category::ucx>::start(std::string_view{ _data.tool_id }, "ep", arg1,
|
|
"buffer", arg2, "count", arg3, "remote_addr",
|
|
arg4, "rkey", arg5, "param", arg6);
|
|
|
|
// Also trigger communication data tracking
|
|
comm_data::audit(_data, audit::incoming{}, arg1, arg2, arg3, arg4, arg5, arg6);
|
|
}
|
|
|
|
// Active message send
|
|
void
|
|
ucx_gotcha::audit(const gotcha_data& _data, audit::incoming, void* arg1, unsigned arg2,
|
|
const void* arg3, size_t arg4, const void* arg5, size_t arg6,
|
|
const void* arg7)
|
|
{
|
|
category_region<category::ucx>::start(
|
|
std::string_view{ _data.tool_id }, "ep", arg1, "id", arg2, "header", arg3,
|
|
"header_length", arg4, "buffer", arg5, "count", arg6, "param", arg7);
|
|
|
|
// Also trigger communication data tracking
|
|
comm_data::audit(_data, audit::incoming{}, arg1, arg2, arg3, arg4, arg5, arg6, arg7);
|
|
}
|
|
|
|
// Stream operations
|
|
void
|
|
ucx_gotcha::audit(const gotcha_data& _data, audit::incoming, void* arg1, const void* arg2,
|
|
size_t arg3, const void* arg4)
|
|
{
|
|
category_region<category::ucx>::start(std::string_view{ _data.tool_id }, "ep", arg1,
|
|
"buffer", arg2, "count", arg3, "param", arg4);
|
|
|
|
// Also trigger communication data tracking
|
|
comm_data::audit(_data, audit::incoming{}, arg1, arg2, arg3, arg4);
|
|
}
|
|
|
|
void
|
|
ucx_gotcha::audit(const gotcha_data& _data, audit::incoming, void* arg1, void* arg2,
|
|
size_t arg3, size_t* arg4, const void* arg5)
|
|
{
|
|
category_region<category::ucx>::start(std::string_view{ _data.tool_id }, "ep", arg1,
|
|
"buffer", arg2, "count", arg3, "length", arg4,
|
|
"param", arg5);
|
|
|
|
// Also trigger communication data tracking
|
|
comm_data::audit(_data, audit::incoming{}, arg1, arg2, arg3, arg4, arg5);
|
|
}
|
|
|
|
void
|
|
ucx_gotcha::audit(const gotcha_data& _data, audit::outgoing, void* ret)
|
|
{
|
|
category_region<category::ucx>::stop(std::string_view{ _data.tool_id }, "return",
|
|
ret);
|
|
}
|
|
|
|
void
|
|
ucx_gotcha::audit(const gotcha_data& _data, audit::outgoing, int ret)
|
|
{
|
|
category_region<category::ucx>::stop(std::string_view{ _data.tool_id }, "return",
|
|
ret);
|
|
}
|
|
|
|
} // namespace component
|
|
} // namespace rocprofsys
|
|
|
|
TIMEMORY_STORAGE_INITIALIZER(rocprofsys::component::ucx_gotcha)
|