Files
rocm-systems/projects/rocprofiler-systems/src/libmisc.cpp
T
Jonathan R. Madsen 6825578603 Improved analysis of functions to instrument + MPI support + timemory support (#2)
* various tweaks
* build updates + cleanup + overlap guard + min addr range
* Library source reorg + miscellaneous tweaks
* Removed unnecessary fwd decls
* Print address range in --print-X pair mode

- hosttrace modifications
  - disable instrumenting functions with overlapping sections or multiple entry points by default (control via --allow-overlapping option)
  - disable instrumenting functions whose address range < 512 bytes unless a loop is present by default (control via --min-address-range option)
  - disable instrumenting functions w/ loops whose address range < 64 bytes (control via --min-loop-address-range)
- Support for wrapping MPI function calls even in binary rewrite mode
  - e.g. use gotcha to wrap MPI functions with hosttrace_push_trace and hosttrace_pop_trace
- New timemory only mode --> HOSTTRACE_USE_TIMEMORY=ON
- New timemory + perfetto mode --> HOSTTRACE_USE_PERFETTO=ON + HOSTTRACE_USE_TIMEMORY=ON
- Full support for all timemory components
- parallel-overhead example for measuring the overhead in a MT-parallelized application with very small instrumentation functions
- improvements to output directories for hosttrace exe
- improvements to output directories for hosttrace library
- new hosttrace options
  - --print-instrumented <type> prints out the instrumented entities and exits
  - --print-available <type> prints out the available instrumentation entities and exits
  - --print-overlapping <type> prints out the overlapping entities and exits
  - NOTE: <type> above refers to the information printed out, e.g. module name vs. function name vs. module and function name, etc.

[ROCm/rocprofiler-systems commit: 1f15b3070f]
2021-09-02 11:38:39 -05:00

125 строки
3.9 KiB
C++

#include "library.hpp"
//
// This file contains miscellaneous function definitions related to timemory
// placed in separate file so that, during development, the long compile-times
// arising from compiling timemory's gotcha wrappers are reduced
//
namespace
{
uint64_t mpip_index = std::numeric_limits<uint64_t>::max();
// this ensures hosttrace_trace_finalize is called before MPI_Finalize
void
hosttrace_mpi_set_attr()
{
#if defined(TIMEMORY_USE_MPI)
static auto _mpi_finalize = [](MPI_Comm, int, void*, void*) {
if(mpip_index != std::numeric_limits<uint64_t>::max())
comp::deactivate_mpip<tim::component_tuple<hosttrace_component>, hosttrace>(
mpip_index);
hosttrace_pop_trace("MPI_Finalize()");
hosttrace_trace_finalize();
return MPI_SUCCESS;
};
using func_t = int (*)(MPI_Comm, int, void*, void*);
int _comm_key = -1;
if(PMPI_Comm_create_keyval(nullptr, static_cast<func_t>(_mpi_finalize), &_comm_key,
nullptr) == MPI_SUCCESS)
PMPI_Comm_set_attr(MPI_COMM_SELF, _comm_key, nullptr);
#endif
}
} // namespace
void
fork_gotcha::audit(const gotcha_data_t&, audit::incoming)
{
HOSTTRACE_DEBUG(
"Warning! Calling fork() within an OpenMPI application using libfabric "
"may result is segmentation fault\n");
TIMEMORY_CONDITIONAL_DEMANGLED_BACKTRACE(get_debug(), 16);
}
void
fork_gotcha::audit(const gotcha_data_t& _data, audit::outgoing, pid_t _pid)
{
HOSTTRACE_DEBUG("%s() return PID %i\n", _data.tool_id.c_str(), (int) _pid);
}
void
mpi_gotcha::audit(const gotcha_data_t& _data, audit::incoming, int*, char***)
{
HOSTTRACE_DEBUG("[%s] %s(int*, char***)\n", __FUNCTION__, _data.tool_id.c_str());
if(get_state() == ::State::DelayedInit)
get_state() = ::State::PreInit;
}
void
mpi_gotcha::audit(const gotcha_data_t& _data, audit::incoming, int*, char***, int, int*)
{
HOSTTRACE_DEBUG("[%s] %s(int*, char***, int, int*)\n", __FUNCTION__,
_data.tool_id.c_str());
if(get_state() == ::State::DelayedInit)
get_state() = ::State::PreInit;
}
void
mpi_gotcha::audit(const gotcha_data_t& _data, audit::outgoing, int _retval)
{
HOSTTRACE_DEBUG("[%s] %s() returned %i\n", __FUNCTION__, _data.tool_id.c_str(),
(int) _retval);
if(_retval == tim::mpi::success_v && get_state() == ::State::PreInit)
{
hosttrace_mpi_set_attr();
// hosttrace will set this environement variable to true in binary rewrite mode
// when it detects MPI. Hides this env variable from the user to avoid this
// being activated unwaringly during runtime instrumentation because that
// will result in double instrumenting the MPI functions (unless the MPI functions
// were excluded via a regex expression)
if(tim::get_env("HOSTTRACE_USE_MPIP", false, false))
{
HOSTTRACE_DEBUG("[%s] Activating MPI wrappers...\n", __FUNCTION__);
comp::configure_mpip<tim::component_tuple<hosttrace_component>, hosttrace>();
mpip_index = comp::activate_mpip<tim::component_tuple<hosttrace_component>,
hosttrace>();
}
hosttrace_push_trace(_data.tool_id.c_str());
}
}
void
hosttrace_component::start()
{
if(m_prefix)
hosttrace_push_trace(m_prefix);
}
void
hosttrace_component::stop()
{
if(m_prefix)
hosttrace_pop_trace(m_prefix);
}
void
hosttrace_component::set_prefix(const char* _prefix)
{
m_prefix = _prefix;
}
hosttrace_timemory_data::instance_array_t&
hosttrace_timemory_data::instances()
{
static auto _v = instance_array_t{};
return _v;
}
PERFETTO_TRACK_EVENT_STATIC_STORAGE();
TIMEMORY_INITIALIZE_STORAGE(fork_gotcha, mpi_gotcha, comp::wall_clock,
comp::user_global_bundle)
#if defined(CUSTOM_DATA_SOURCE)
PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(CustomDataSource);
#endif