Improved analysis of functions to instrument + MPI support + timemory support (#2)
* various tweaks
* build updates + cleanup + overlap guard + min addr range
* Library source reorg + miscellaneous tweaks
* Removed unnecessary fwd decls
* Print address range in --print-X pair mode
- hosttrace modifications
- disable instrumenting functions with overlapping sections or multiple entry points by default (control via --allow-overlapping option)
- disable instrumenting functions whose address range < 512 bytes unless a loop is present by default (control via --min-address-range option)
- disable instrumenting functions w/ loops whose address range < 64 bytes (control via --min-loop-address-range)
- Support for wrapping MPI function calls even in binary rewrite mode
- e.g. use gotcha to wrap MPI functions with hosttrace_push_trace and hosttrace_pop_trace
- New timemory only mode --> HOSTTRACE_USE_TIMEMORY=ON
- New timemory + perfetto mode --> HOSTTRACE_USE_PERFETTO=ON + HOSTTRACE_USE_TIMEMORY=ON
- Full support for all timemory components
- parallel-overhead example for measuring the overhead in a MT-parallelized application with very small instrumentation functions
- improvements to output directories for hosttrace exe
- improvements to output directories for hosttrace library
- new hosttrace options
- --print-instrumented <type> prints out the instrumented entities and exits
- --print-available <type> prints out the available instrumentation entities and exits
- --print-overlapping <type> prints out the overlapping entities and exits
- NOTE: <type> above refers to the information printed out, e.g. module name vs. function name vs. module and function name, etc.
[ROCm/rocprofiler-systems commit: 1f15b3070f]
Este cometimento está contido em:
cometido por
GitHub
ascendente
1ff2dfed88
cometimento
6825578603
@@ -30,4 +30,6 @@
|
||||
*.exe
|
||||
*.out
|
||||
*.app
|
||||
|
||||
/build*
|
||||
/.vscode
|
||||
|
||||
@@ -29,6 +29,7 @@ include(BuildSettings) # compiler flags
|
||||
set(CMAKE_CXX_STANDARD 17 CACHE STRING "CXX language standard")
|
||||
add_option(CMAKE_CXX_STANDARD_REQUIRED "Require C++ language standard" ON)
|
||||
add_option(CMAKE_CXX_EXTENSIONS "Compiler specific language extensions" OFF)
|
||||
add_option(CMAKE_INSTALL_RPATH_USE_LINK_PATH "Enable rpath to linked libraries" ON)
|
||||
add_option(HOSTTRACE_USE_CLANG_TIDY "Enable clang-tidy" OFF)
|
||||
|
||||
include(Packages) # finds third-party libraries
|
||||
@@ -45,6 +46,8 @@ option(HOSTTRACE_CUSTOM_DATA_SOURCE "Enable custom data source" OFF)
|
||||
|
||||
add_library(hosttrace-library SHARED
|
||||
${CMAKE_CURRENT_LIST_DIR}/src/library.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/src/libmisc.cpp
|
||||
${CMAKE_CURRENT_LIST_DIR}/include/library.hpp
|
||||
${perfetto_DIR}/sdk/perfetto.cc)
|
||||
|
||||
target_include_directories(hosttrace-library PRIVATE
|
||||
@@ -55,15 +58,23 @@ target_include_directories(hosttrace-library SYSTEM PRIVATE
|
||||
|
||||
target_compile_definitions(hosttrace-library PRIVATE
|
||||
$<IF:$<BOOL:${HOSTTRACE_CUSTOM_DATA_SOURCE}>,CUSTOM_DATA_SOURCE,>)
|
||||
|
||||
|
||||
target_link_libraries(hosttrace-library PRIVATE
|
||||
hosttrace::hosttrace-threading
|
||||
$<BUILD_INTERFACE:timemory::timemory-headers>
|
||||
$<BUILD_INTERFACE:timemory::timemory-gotcha>
|
||||
$<BUILD_INTERFACE:timemory::timemory-cxx-shared>
|
||||
$<BUILD_INTERFACE:timemory::timemory-threading>
|
||||
$<BUILD_INTERFACE:timemory::timemory-compile-options>
|
||||
$<IF:$<BOOL:${hosttrace_USE_SANITIZER}>,hosttrace::hosttrace-sanitizer,>)
|
||||
|
||||
if(DYNINST_API_RT)
|
||||
get_filename_component(DYNINST_API_RT_DIR "${DYNINST_API_RT}" DIRECTORY)
|
||||
endif()
|
||||
|
||||
set_target_properties(hosttrace-library PROPERTIES
|
||||
OUTPUT_NAME hosttrace)
|
||||
OUTPUT_NAME hosttrace
|
||||
INSTALL_RPATH "\$ORIGIN:${DYNINST_API_RT_DIR}:${CMAKE_INSTALL_RPATH}")
|
||||
|
||||
install(
|
||||
TARGETS hosttrace-library
|
||||
@@ -87,7 +98,8 @@ target_include_directories(hosttrace-exe PRIVATE
|
||||
target_link_libraries(hosttrace-exe PRIVATE
|
||||
$<BUILD_INTERFACE:timemory::timemory-headers>
|
||||
hosttrace::hosttrace-dyninst
|
||||
hosttrace::hosttrace-compile-options)
|
||||
hosttrace::hosttrace-compile-options
|
||||
$<IF:$<BOOL:${hosttrace_USE_SANITIZER}>,hosttrace::hosttrace-sanitizer,>)
|
||||
|
||||
set_target_properties(hosttrace-exe PROPERTIES
|
||||
OUTPUT_NAME hosttrace
|
||||
|
||||
@@ -204,6 +204,8 @@ set(TIMEMORY_USE_GOTCHA ON CACHE BOOL "Enable GOTCHA support in tim
|
||||
set(TIMEMORY_USE_PERFETTO OFF CACHE BOOL "Disable perfetto support in timemory")
|
||||
# timemory feature build settings
|
||||
set(TIMEMORY_BUILD_GOTCHA ON CACHE BOOL "Enable building GOTCHA library from submodule")
|
||||
# timemory build settings
|
||||
set(TIMEMORY_TLS_MODEL "global-dynamic" CACHE STRING "Thread-local static model" FORCE)
|
||||
|
||||
checkout_git_submodule(
|
||||
RELATIVE_PATH external/timemory
|
||||
@@ -211,4 +213,19 @@ checkout_git_submodule(
|
||||
REPO_URL https://github.com/NERSC/timemory.git
|
||||
REPO_BRANCH develop)
|
||||
|
||||
hosttrace_save_variables(BUILD_CONFIG
|
||||
BUILD_SHARED_LIBS
|
||||
BUILD_STATIC_LIBS
|
||||
CMAKE_POSITION_INDEPENDENT_CODE)
|
||||
|
||||
# ensure timemory builds PIC static libs so that we don't have to install timemory shared lib
|
||||
set(BUILD_SHARED_LIBS ON)
|
||||
set(BUILD_STATIC_LIBS OFF)
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
add_subdirectory(external/timemory)
|
||||
|
||||
hosttrace_restore_variables(BUILD_CONFIG
|
||||
BUILD_SHARED_LIBS
|
||||
BUILD_STATIC_LIBS
|
||||
CMAKE_POSITION_INDEPENDENT_CODE)
|
||||
|
||||
@@ -4,3 +4,4 @@ project(hosttrace-dyninst-examples
|
||||
LANGUAGES CXX)
|
||||
|
||||
add_subdirectory(transpose)
|
||||
add_subdirectory(parallel-overhead)
|
||||
|
||||
@@ -0,0 +1,5 @@
|
||||
|
||||
set(CMAKE_BUILD_TYPE "Release")
|
||||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}")
|
||||
add_executable(parallel-overhead parallel-overhead.cpp)
|
||||
target_link_libraries(parallel-overhead Threads::Threads)
|
||||
@@ -0,0 +1,52 @@
|
||||
|
||||
#include <atomic>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
std::atomic<long> total{ 0 };
|
||||
long
|
||||
fib(long n) __attribute__((noinline));
|
||||
void
|
||||
run(size_t nitr, long) __attribute__((noinline));
|
||||
|
||||
long
|
||||
fib(long n)
|
||||
{
|
||||
return (n < 2) ? n : fib(n - 1) + fib(n - 2);
|
||||
}
|
||||
|
||||
void
|
||||
run(size_t nitr, long n)
|
||||
{
|
||||
long local = 0;
|
||||
for(size_t i = 0; i < nitr; ++i)
|
||||
local += fib(n);
|
||||
total += local;
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char** argv)
|
||||
{
|
||||
size_t nthread = 16;
|
||||
size_t nitr = 50000;
|
||||
long nfib = 10;
|
||||
if(argc > 1)
|
||||
nfib = atol(argv[1]);
|
||||
if(argc > 2)
|
||||
nthread = atol(argv[2]);
|
||||
if(argc > 3)
|
||||
nitr = atol(argv[3]);
|
||||
|
||||
std::vector<std::thread> threads{};
|
||||
for(size_t i = 0; i < nthread; ++i)
|
||||
threads.emplace_back(&run, nitr, nfib);
|
||||
|
||||
for(auto& itr : threads)
|
||||
itr.join();
|
||||
|
||||
printf("fibonacci(%li) x %lu = %li\n", nfib, nthread, total.load());
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -45,21 +45,33 @@ if(TARGET MPI::MPI_C)
|
||||
get_target_property(INCLUDE_DIRS MPI::MPI_C INTERFACE_INCLUDE_DIRECTORIES)
|
||||
foreach(_IDIR ${INCLUDE_DIRS})
|
||||
set(transpose_CXX_FLAGS "${transpose_CXX_FLAGS} -I${_IDIR}")
|
||||
endforeach()
|
||||
if(MPI_C_LINK_FLAGS)
|
||||
set(transpose_LINK_FLAGS "${transpose_LINK_FLAGS} ${MPI_C_LINK_FLAGS}")
|
||||
endif()
|
||||
set(_LINK_LIBS "")
|
||||
foreach(_LIB ${MPI_C_LIB_NAMES})
|
||||
string(APPEND _LINK_LIBS "-l${_LIB} ")
|
||||
endforeach()
|
||||
foreach(_IDIR ${INCLUDE_DIRS} ${MPI_mpich_LIBRARY} ${MPI_mpi_LIBRARY} ${MPI_LIBRARY_DIRS})
|
||||
get_filename_component(_LIBDIR "${_IDIR}" DIRECTORY)
|
||||
if(EXISTS "${_IDIR}/libmpi${CMAKE_SHARED_LIBRARY_SUFFIX}")
|
||||
set(transpose_LINK_FLAGS "${transpose_LINK_FLAGS} -L${_IDIR} ${_LINK_LIBS}")
|
||||
endif()
|
||||
if(EXISTS "${_LIBDIR}/libmpi${CMAKE_SHARED_LIBRARY_SUFFIX}")
|
||||
set(transpose_LINK_FLAGS "${transpose_LINK_FLAGS} -L${_LIBDIR} ${_LINK_LIBS}")
|
||||
endif()
|
||||
foreach(_LDIR lib lib64)
|
||||
set(_LIBDIR_SAVE "${_LIBDIR}")
|
||||
if(NOT EXISTS "${_LIBDIR}/${_LDIR}")
|
||||
get_filename_component(_LIBDIR "${_LIBDIR}" DIRECTORY)
|
||||
endif()
|
||||
if(EXISTS "${_LIBDIR}/${_LDIR}")
|
||||
set(transpose_LINK_FLAGS "${transpose_LINK_FLAGS} -L${_LIBDIR}/${_LDIR} -lmpi")
|
||||
set(transpose_LINK_FLAGS "${transpose_LINK_FLAGS} -L${_LIBDIR}/${_LDIR} ${_LINK_LIBS}")
|
||||
endif()
|
||||
set(_LIBDIR "${_LIBDIR_SAVE}")
|
||||
endforeach()
|
||||
endforeach()
|
||||
if(MPI_C_LINK_FLAGS)
|
||||
set(transpose_LINK_FLAGS "${transpose_LINK_FLAGS} ${MPI_C_LINK_FLAGS}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# remove generator expressions
|
||||
|
||||
@@ -155,11 +155,15 @@ run(int argc, char** argv)
|
||||
int
|
||||
main(int argc, char** argv)
|
||||
{
|
||||
int rank = 0;
|
||||
#if defined(USE_MPI)
|
||||
MPI_Init(&argc, &argv);
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
|
||||
#endif
|
||||
run(argc, argv);
|
||||
if(rank == 0)
|
||||
run(argc, argv);
|
||||
#if defined(USE_MPI)
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
MPI_Finalize();
|
||||
#endif
|
||||
return 0;
|
||||
|
||||
+1
-1
Submódulo projects/rocprofiler-systems/external/timemory modificado: 7542f48e65...aa4a0ed7b2
@@ -140,24 +140,25 @@ static int verbose_level = tim::get_env<int>("TIMEMORY_RUN_VERBOSE", 0);
|
||||
// string settings
|
||||
//
|
||||
static string_t main_fname = "main";
|
||||
static string_t argv0 = "";
|
||||
static string_t cmdv0 = "";
|
||||
static string_t argv0 = {};
|
||||
static string_t cmdv0 = {};
|
||||
static string_t default_components = "wall_clock";
|
||||
static string_t prefer_library = "";
|
||||
static string_t prefer_library = {};
|
||||
//
|
||||
// global variables
|
||||
//
|
||||
static patch_pointer_t bpatch;
|
||||
static call_expr_t* initialize_expr = nullptr;
|
||||
static call_expr_t* terminate_expr = nullptr;
|
||||
static snippet_vec_t init_names;
|
||||
static snippet_vec_t fini_names;
|
||||
static fmodset_t available_module_functions;
|
||||
static fmodset_t instrumented_module_functions;
|
||||
static regexvec_t func_include;
|
||||
static regexvec_t func_exclude;
|
||||
static regexvec_t file_include;
|
||||
static regexvec_t file_exclude;
|
||||
static patch_pointer_t bpatch = {};
|
||||
static call_expr_t* initialize_expr = nullptr;
|
||||
static call_expr_t* terminate_expr = nullptr;
|
||||
static snippet_vec_t init_names = {};
|
||||
static snippet_vec_t fini_names = {};
|
||||
static fmodset_t available_module_functions = {};
|
||||
static fmodset_t instrumented_module_functions = {};
|
||||
static fmodset_t overlapping_module_functions = {};
|
||||
static regexvec_t func_include = {};
|
||||
static regexvec_t func_exclude = {};
|
||||
static regexvec_t file_include = {};
|
||||
static regexvec_t file_exclude = {};
|
||||
static auto regex_opts = std::regex_constants::egrep | std::regex_constants::optimize;
|
||||
//
|
||||
//======================================================================================//
|
||||
@@ -219,17 +220,6 @@ error_func_real(error_level_t level, int num, const char* const* params);
|
||||
void
|
||||
error_func_fake(error_level_t level, int num, const char* const* params);
|
||||
|
||||
bool
|
||||
find_func_or_calls(std::vector<const char*> names, bpvector_t<point_t*>& points,
|
||||
image_t* appImage, procedure_loc_t loc = BPatch_locEntry);
|
||||
|
||||
bool
|
||||
find_func_or_calls(const char* name, bpvector_t<point_t*>& points, image_t* image,
|
||||
procedure_loc_t loc = BPatch_locEntry);
|
||||
|
||||
bool
|
||||
load_dependent_libraries(address_space_t* bedit, char* bindings);
|
||||
|
||||
bool
|
||||
c_stdlib_module_constraint(const string_t& file);
|
||||
|
||||
@@ -283,10 +273,10 @@ struct function_signature
|
||||
location_t m_row = { 0, 0 };
|
||||
location_t m_col = { 0, 0 };
|
||||
string_t m_return = "void";
|
||||
string_t m_name = "";
|
||||
string_t m_name = {};
|
||||
string_t m_params = "()";
|
||||
string_t m_file = "";
|
||||
mutable string_t m_signature = "";
|
||||
string_t m_file = {};
|
||||
mutable string_t m_signature = {};
|
||||
|
||||
TIMEMORY_DEFAULT_OBJECT(function_signature)
|
||||
|
||||
@@ -360,7 +350,10 @@ struct function_signature
|
||||
//
|
||||
struct module_function
|
||||
{
|
||||
using width_t = std::array<size_t, 3>;
|
||||
using width_t = std::array<size_t, 3>;
|
||||
using address_t = Dyninst::Address;
|
||||
|
||||
static constexpr size_t absolute_max_width = 80;
|
||||
|
||||
static auto& get_width()
|
||||
{
|
||||
@@ -399,6 +392,13 @@ struct module_function
|
||||
module = modname;
|
||||
function = fname;
|
||||
signature = get_func_file_line_info(mod, proc);
|
||||
assert(proc->isInstrumentable() == true);
|
||||
std::pair<address_t, address_t> _range{};
|
||||
if(proc->getAddressRange(_range.first, _range.second))
|
||||
address_range = _range.second - _range.first;
|
||||
auto _instructions = proc->findPoint(BPatch_locInstruction);
|
||||
if(_instructions)
|
||||
instr_count = _instructions->size();
|
||||
}
|
||||
|
||||
friend bool operator<(const module_function& lhs, const module_function& rhs)
|
||||
@@ -410,56 +410,85 @@ struct module_function
|
||||
: (lhs.module < rhs.module);
|
||||
}
|
||||
|
||||
static void write_header(std::ostream& os)
|
||||
{
|
||||
auto w0 = std::min<size_t>(get_width()[0], absolute_max_width);
|
||||
auto w1 = std::min<size_t>(get_width()[1], absolute_max_width);
|
||||
auto w2 = std::min<size_t>(get_width()[2], absolute_max_width);
|
||||
|
||||
std::stringstream ss;
|
||||
ss << std::setw(14) << "AddressRange"
|
||||
<< " " << std::setw(14) << "InstrCount"
|
||||
<< " " << std::setw(w0 + 8) << std::left << "Module"
|
||||
<< " " << std::setw(w1 + 8) << std::left << "Function"
|
||||
<< " " << std::setw(w2 + 8) << std::left << "FunctionSignature"
|
||||
<< "\n";
|
||||
os << ss.str();
|
||||
}
|
||||
|
||||
friend std::ostream& operator<<(std::ostream& os, const module_function& rhs)
|
||||
{
|
||||
std::stringstream ss;
|
||||
|
||||
static size_t absolute_max = 80;
|
||||
auto w0 = std::min<size_t>(get_width()[0], absolute_max);
|
||||
auto w1 = std::min<size_t>(get_width()[1], absolute_max);
|
||||
auto w2 = std::min<size_t>(get_width()[2], absolute_max);
|
||||
auto w0 = std::min<size_t>(get_width()[0], absolute_max_width);
|
||||
auto w1 = std::min<size_t>(get_width()[1], absolute_max_width);
|
||||
auto w2 = std::min<size_t>(get_width()[2], absolute_max_width);
|
||||
|
||||
auto _get_str = [](const std::string& _inc) {
|
||||
if(_inc.length() > absolute_max)
|
||||
return _inc.substr(0, absolute_max - 3) + "...";
|
||||
if(_inc.length() > absolute_max_width)
|
||||
return _inc.substr(0, absolute_max_width - 3) + "...";
|
||||
return _inc;
|
||||
};
|
||||
|
||||
ss << std::setw(w0 + 8) << std::left << _get_str(rhs.module) << " "
|
||||
// clang-format off
|
||||
ss << std::setw(14) << rhs.address_range << " "
|
||||
<< std::setw(14) << rhs.instr_count << " "
|
||||
<< std::setw(w0 + 8) << std::left << _get_str(rhs.module) << " "
|
||||
<< std::setw(w1 + 8) << std::left << _get_str(rhs.function) << " "
|
||||
<< std::setw(w2 + 8) << std::left << _get_str(rhs.signature.get());
|
||||
// clang-format on
|
||||
|
||||
os << ss.str();
|
||||
return os;
|
||||
}
|
||||
|
||||
string_t module = "";
|
||||
string_t function = "";
|
||||
size_t address_range = 0;
|
||||
size_t instr_count = 0;
|
||||
string_t module = {};
|
||||
string_t function = {};
|
||||
function_signature signature;
|
||||
};
|
||||
//
|
||||
//======================================================================================//
|
||||
//
|
||||
static inline void
|
||||
dump_info(const string_t& _oname, const fmodset_t& _data, int level)
|
||||
dump_info(std::ostream& _os, const fmodset_t& _data)
|
||||
{
|
||||
if(!debug_print && verbose_level < level)
|
||||
return;
|
||||
|
||||
module_function::reset_width();
|
||||
for(const auto& itr : _data)
|
||||
module_function::update_width(itr);
|
||||
|
||||
module_function::write_header(_os);
|
||||
for(const auto& itr : _data)
|
||||
_os << itr << '\n';
|
||||
|
||||
module_function::reset_width();
|
||||
}
|
||||
//
|
||||
static inline void
|
||||
dump_info(const string_t& _oname, const fmodset_t& _data, int _level)
|
||||
{
|
||||
if(!debug_print && verbose_level < _level)
|
||||
return;
|
||||
|
||||
std::ofstream ofs(_oname);
|
||||
if(ofs)
|
||||
{
|
||||
verbprintf(level, "Dumping '%s'... ", _oname.c_str());
|
||||
for(const auto& itr : _data)
|
||||
ofs << itr << '\n';
|
||||
verbprintf(level, "Done\n");
|
||||
verbprintf(_level, "Dumping '%s'... ", _oname.c_str());
|
||||
dump_info(ofs, _data);
|
||||
verbprintf(_level, "Done\n");
|
||||
}
|
||||
ofs.close();
|
||||
|
||||
module_function::reset_width();
|
||||
}
|
||||
//
|
||||
//======================================================================================//
|
||||
@@ -554,7 +583,7 @@ private:
|
||||
//
|
||||
static inline address_space_t*
|
||||
hosttrace_get_address_space(patch_pointer_t _bpatch, int _cmdc, char** _cmdv,
|
||||
bool _rewrite, int _pid = -1, string_t _name = "")
|
||||
bool _rewrite, int _pid = -1, string_t _name = {})
|
||||
{
|
||||
address_space_t* mutatee = nullptr;
|
||||
|
||||
|
||||
@@ -0,0 +1,209 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#if !defined(TIMEMORY_USE_PERFETTO)
|
||||
# include <perfetto.h>
|
||||
# define PERFETTO_CATEGORIES \
|
||||
perfetto::Category("hosttrace").SetDescription("Function trace")
|
||||
#else
|
||||
# define PERFETTO_CATEGORIES \
|
||||
perfetto::Category("hosttrace").SetDescription("Function trace"), \
|
||||
perfetto::Category("timemory") \
|
||||
.SetDescription("Events from the timemory API")
|
||||
# define TIMEMORY_PERFETTO_CATEGORIES PERFETTO_CATEGORIES
|
||||
#endif
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
#include <fstream>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <sys/types.h>
|
||||
#include <thread>
|
||||
#include <unistd.h>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "timemory/api.hpp"
|
||||
#include "timemory/backends/mpi.hpp"
|
||||
#include "timemory/backends/process.hpp"
|
||||
#include "timemory/backends/threading.hpp"
|
||||
#include "timemory/components.hpp"
|
||||
#include "timemory/components/gotcha/mpip.hpp"
|
||||
#include "timemory/config.hpp"
|
||||
#include "timemory/environment.hpp"
|
||||
#include "timemory/manager.hpp"
|
||||
#include "timemory/mpl/apply.hpp"
|
||||
#include "timemory/operations.hpp"
|
||||
#include "timemory/runtime.hpp"
|
||||
#include "timemory/settings.hpp"
|
||||
#include "timemory/storage.hpp"
|
||||
#include "timemory/variadic.hpp"
|
||||
|
||||
// forward decl of the API
|
||||
extern "C"
|
||||
{
|
||||
void hosttrace_push_trace(const char* name) TIMEMORY_VISIBILITY("default");
|
||||
void hosttrace_pop_trace(const char* name) TIMEMORY_VISIBILITY("default");
|
||||
void hosttrace_trace_init(const char*, bool, const char*)
|
||||
TIMEMORY_VISIBILITY("default");
|
||||
void hosttrace_trace_finalize(void) TIMEMORY_VISIBILITY("default");
|
||||
void hosttrace_trace_set_env(const char* env_name, const char* env_val)
|
||||
TIMEMORY_VISIBILITY("default");
|
||||
void hosttrace_trace_set_mpi(bool use, bool attached) TIMEMORY_VISIBILITY("default");
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------------//
|
||||
|
||||
// same sort of functionality as python's " ".join([...])
|
||||
#if !defined(JOIN)
|
||||
# define JOIN(...) tim::mpl::apply<std::string>::join(__VA_ARGS__)
|
||||
#endif
|
||||
|
||||
#define HOSTTRACE_DEBUG(...) \
|
||||
if(get_debug()) \
|
||||
{ \
|
||||
fprintf(stderr, __VA_ARGS__); \
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------------//
|
||||
|
||||
namespace audit = tim::audit;
|
||||
namespace comp = tim::component;
|
||||
namespace quirk = tim::quirk;
|
||||
namespace threading = tim::threading;
|
||||
|
||||
// this is used to wrap fork()
|
||||
struct fork_gotcha : comp::base<fork_gotcha, void>
|
||||
{
|
||||
using gotcha_data_t = comp::gotcha_data;
|
||||
|
||||
TIMEMORY_DEFAULT_OBJECT(fork_gotcha)
|
||||
|
||||
// this will get called right before fork
|
||||
void audit(const gotcha_data_t& _data, audit::incoming);
|
||||
|
||||
// this will get called right after fork with the return value
|
||||
void audit(const gotcha_data_t& _data, audit::outgoing, pid_t _pid);
|
||||
};
|
||||
|
||||
// this is used to wrap MPI_Init and MPI_Init_thread
|
||||
struct mpi_gotcha : comp::base<mpi_gotcha, void>
|
||||
{
|
||||
using gotcha_data_t = comp::gotcha_data;
|
||||
|
||||
TIMEMORY_DEFAULT_OBJECT(mpi_gotcha)
|
||||
|
||||
// this will get called right before MPI_Init with that functions arguments
|
||||
void audit(const gotcha_data_t& _data, audit::incoming, int*, char***);
|
||||
|
||||
// this will get called right before MPI_Init_thread with that functions arguments
|
||||
void audit(const gotcha_data_t& _data, audit::incoming, int*, char***, int, int*);
|
||||
|
||||
// this will get called right after MPI_Init and MPI_Init_thread with the return value
|
||||
void audit(const gotcha_data_t& _data, audit::outgoing, int _retval);
|
||||
};
|
||||
|
||||
// timemory api struct
|
||||
struct hosttrace : tim::concepts::api
|
||||
{};
|
||||
|
||||
// timemory component which calls hosttrace functions
|
||||
// (used in gotcha wrappers)
|
||||
struct hosttrace_component : tim::component::base<hosttrace_component, void>
|
||||
{
|
||||
void start();
|
||||
void stop();
|
||||
void set_prefix(const char*);
|
||||
|
||||
private:
|
||||
const char* m_prefix = nullptr;
|
||||
};
|
||||
|
||||
using fork_gotcha_t = comp::gotcha<4, tim::component_tuple<fork_gotcha>, hosttrace>;
|
||||
using mpi_gotcha_t = comp::gotcha<4, tim::component_tuple<mpi_gotcha>, hosttrace>;
|
||||
using hosttrace_bundle_t =
|
||||
tim::lightweight_tuple<comp::wall_clock, comp::peak_rss, comp::cpu_clock,
|
||||
comp::cpu_util, comp::user_global_bundle, fork_gotcha_t,
|
||||
mpi_gotcha_t>;
|
||||
using bundle_t =
|
||||
tim::component_bundle<hosttrace, comp::wall_clock*, comp::user_global_bundle*>;
|
||||
using bundle_allocator_t = tim::data::ring_buffer_allocator<bundle_t>;
|
||||
|
||||
//--------------------------------------------------------------------------------------//
|
||||
|
||||
#if !defined(TIMEMORY_USE_PERFETTO)
|
||||
PERFETTO_DEFINE_CATEGORIES(PERFETTO_CATEGORIES);
|
||||
#endif
|
||||
|
||||
#if defined(CUSTOM_DATA_SOURCE)
|
||||
class CustomDataSource : public perfetto::DataSource<CustomDataSource>
|
||||
{
|
||||
public:
|
||||
void OnSetup(const SetupArgs&) override
|
||||
{
|
||||
// Use this callback to apply any custom configuration to your data source
|
||||
// based on the TraceConfig in SetupArgs.
|
||||
PRINT_HERE("%s", "setup");
|
||||
}
|
||||
|
||||
void OnStart(const StartArgs&) override
|
||||
{
|
||||
// This notification can be used to initialize the GPU driver, enable
|
||||
// counters, etc. StartArgs will contains the DataSourceDescriptor,
|
||||
// which can be extended.
|
||||
PRINT_HERE("%s", "start");
|
||||
}
|
||||
|
||||
void OnStop(const StopArgs&) override
|
||||
{
|
||||
// Undo any initialization done in OnStart.
|
||||
PRINT_HERE("%s", "stop");
|
||||
}
|
||||
|
||||
// Data sources can also have per-instance state.
|
||||
int my_custom_state = 0;
|
||||
};
|
||||
|
||||
PERFETTO_DECLARE_DATA_SOURCE_STATIC_MEMBERS(CustomDataSource);
|
||||
#endif
|
||||
|
||||
//--------------------------------------------------------------------------------------//
|
||||
|
||||
// used for specifying the state of hosttrace
|
||||
enum class State : unsigned short
|
||||
{
|
||||
DelayedInit = 0,
|
||||
PreInit,
|
||||
Active,
|
||||
Finalized
|
||||
};
|
||||
|
||||
bool
|
||||
get_debug();
|
||||
|
||||
State&
|
||||
get_state();
|
||||
|
||||
std::unique_ptr<hosttrace_bundle_t>&
|
||||
get_main_bundle();
|
||||
|
||||
//--------------------------------------------------------------------------------------//
|
||||
|
||||
// there are currently some strange things that happen with vector<bundle_t> so using
|
||||
// vector<bundle_t*> and timemory's ring_buffer_allocator to create contiguous memory-page
|
||||
// aligned instances of the bundle
|
||||
struct hosttrace_timemory_data
|
||||
{
|
||||
static constexpr size_t max_supported_threads = 1024;
|
||||
using instance_array_t = std::array<hosttrace_timemory_data, max_supported_threads>;
|
||||
|
||||
bundle_allocator_t allocator{};
|
||||
std::vector<bundle_t*> bundles{};
|
||||
|
||||
static instance_array_t& instances();
|
||||
};
|
||||
|
||||
//--------------------------------------------------------------------------------------//
|
||||
@@ -323,77 +323,6 @@ error_func_fake(error_level_t level, int num, const char* const* params)
|
||||
// It does nothing.
|
||||
}
|
||||
|
||||
//======================================================================================//
|
||||
//
|
||||
bool
|
||||
find_func_or_calls(std::vector<const char*> names, bpvector_t<point_t*>& points,
|
||||
image_t* app_image, procedure_loc_t loc)
|
||||
{
|
||||
using function_t = procedure_t;
|
||||
using function_vec_t = bpvector_t<function_t*>;
|
||||
using point_vec_t = bpvector_t<point_t*>;
|
||||
|
||||
function_t* func = nullptr;
|
||||
for(auto nitr = names.begin(); nitr != names.end(); ++nitr)
|
||||
{
|
||||
function_t* f = find_function(app_image, *nitr);
|
||||
if(f && f->getModule()->isSharedLib())
|
||||
{
|
||||
func = f;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if(func)
|
||||
{
|
||||
point_vec_t* fpoints = func->findPoint(loc);
|
||||
if(fpoints && fpoints->size())
|
||||
{
|
||||
for(auto pitr = fpoints->begin(); pitr != fpoints->end(); ++pitr)
|
||||
points.push_back(*pitr);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Moderately expensive loop here. Perhaps we should make a name->point map first
|
||||
// and just do lookups through that.
|
||||
function_vec_t* all_funcs = app_image->getProcedures();
|
||||
auto initial_points_size = points.size();
|
||||
for(auto nitr = names.begin(); nitr != names.end(); ++nitr)
|
||||
{
|
||||
for(auto fitr = all_funcs->begin(); fitr != all_funcs->end(); ++fitr)
|
||||
{
|
||||
function_t* f = *fitr;
|
||||
if(f->getModule()->isSharedLib())
|
||||
continue;
|
||||
point_vec_t* fpoints = f->findPoint(BPatch_locSubroutine);
|
||||
if(!fpoints || fpoints->empty())
|
||||
continue;
|
||||
for(auto pitr = fpoints->begin(); pitr != fpoints->end(); pitr++)
|
||||
{
|
||||
std::string callee = (*pitr)->getCalledFunctionName();
|
||||
if(callee == std::string(*nitr))
|
||||
points.push_back(*pitr);
|
||||
}
|
||||
}
|
||||
if(points.size() != initial_points_size)
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
//======================================================================================//
|
||||
//
|
||||
bool
|
||||
find_func_or_calls(const char* name, bpvector_t<point_t*>& points, image_t* image,
|
||||
procedure_loc_t loc)
|
||||
{
|
||||
std::vector<const char*> v;
|
||||
v.push_back(name);
|
||||
return find_func_or_calls(v, points, image, loc);
|
||||
}
|
||||
|
||||
//======================================================================================//
|
||||
//
|
||||
bool
|
||||
@@ -516,11 +445,11 @@ c_stdlib_function_constraint(const std::string& _func)
|
||||
"compat|vfork_|elision_init|cr_|cri_|aio_|mq_|sem_init|waitpid$|sigcancel_"
|
||||
"handler|sighandler_setxid|start_thread$|clock$|semctl$|shm_open$|shm_unlink$|"
|
||||
"printf|dprintf|walker$|clear_once_control$|libcr_|sem_wait$|sem_trywait$|vfork|"
|
||||
"pause$|wait$|msgrcv$|sigwait$|sigsuspend$|recvmsg$|sendmsg$|ftrylockfile$|"
|
||||
"funlockfile$|tee$|setbuf$|setbuffer$|enlarge_userbuf$|convert_and_print$|"
|
||||
"feraise|lio_|atomic_|err$|errx$|print_errno_message$|error_tail$|clntunix_|"
|
||||
"sem_destroy|setxid_mark_thread|feupdate|send$|connect$|longjmp|pwrite|accept$|"
|
||||
"stpncpy$|writeunix$|xflowf$|mbrlen$)",
|
||||
"pause$|wait$|waitid$|msgrcv$|sigwait$|sigsuspend$|recvmsg$|sendmsg$|"
|
||||
"ftrylockfile$|funlockfile$|tee$|setbuf$|setbuffer$|enlarge_userbuf$|convert_and_"
|
||||
"print$|feraise|lio_|atomic_|err$|errx$|print_errno_message$|error_tail$|"
|
||||
"clntunix_|sem_destroy|setxid_mark_thread|feupdate|send$|connect$|longjmp|pwrite|"
|
||||
"accept$|stpncpy$|writeunix$|xflowf$|mbrlen$)",
|
||||
regex_opts);
|
||||
|
||||
return std::regex_search(_func, _pattern);
|
||||
|
||||
@@ -28,25 +28,32 @@
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
static bool is_driver = false;
|
||||
static size_t batch_size = 50;
|
||||
static strset_t extra_libs = {};
|
||||
static std::vector<std::pair<uint64_t, string_t>> hash_ids;
|
||||
static std::map<string_t, bool> use_stubs;
|
||||
static std::map<string_t, procedure_t*> beg_stubs;
|
||||
static std::map<string_t, procedure_t*> end_stubs;
|
||||
static strvec_t init_stub_names;
|
||||
static strvec_t fini_stub_names;
|
||||
static strset_t used_stub_names;
|
||||
static std::vector<call_expr_pointer_t> env_variables;
|
||||
static std::map<string_t, call_expr_pointer_t> beg_expr;
|
||||
static std::map<string_t, call_expr_pointer_t> end_expr;
|
||||
static const auto npos_v = string_t::npos;
|
||||
static string_t instr_mode = "trace";
|
||||
static string_t instr_push_func = "hosttrace_push_trace";
|
||||
static string_t instr_pop_func = "hosttrace_pop_trace";
|
||||
static string_t instr_push_hash = "hosttrace_push_trace_hash";
|
||||
static string_t instr_pop_hash = "hosttrace_pop_trace_hash";
|
||||
static bool is_driver = false;
|
||||
static bool allow_overlapping = false;
|
||||
static size_t batch_size = 50;
|
||||
static strset_t extra_libs = {};
|
||||
static size_t min_address_range = (1 << 9); // 512
|
||||
static size_t min_loop_address_range = (1 << 6); // 64
|
||||
static std::vector<std::pair<uint64_t, string_t>> hash_ids = {};
|
||||
static std::map<string_t, bool> use_stubs = {};
|
||||
static std::map<string_t, procedure_t*> beg_stubs = {};
|
||||
static std::map<string_t, procedure_t*> end_stubs = {};
|
||||
static strvec_t init_stub_names = {};
|
||||
static strvec_t fini_stub_names = {};
|
||||
static strset_t used_stub_names = {};
|
||||
static std::vector<call_expr_pointer_t> env_variables = {};
|
||||
static std::map<string_t, call_expr_pointer_t> beg_expr = {};
|
||||
static std::map<string_t, call_expr_pointer_t> end_expr = {};
|
||||
static const auto npos_v = string_t::npos;
|
||||
static string_t instr_mode = "trace";
|
||||
static string_t instr_push_func = "hosttrace_push_trace";
|
||||
static string_t instr_pop_func = "hosttrace_pop_trace";
|
||||
static string_t instr_push_hash = "hosttrace_push_trace_hash";
|
||||
static string_t instr_pop_hash = "hosttrace_pop_trace_hash";
|
||||
static string_t print_instrumented = {};
|
||||
static string_t print_available = {};
|
||||
static string_t print_overlapping = {};
|
||||
static std::string modfunc_dump_dir = "hosttrace-module-functions";
|
||||
|
||||
std::string
|
||||
get_absolute_exe_filepath(std::string exe_name);
|
||||
@@ -228,7 +235,8 @@ main(int argc, char** argv)
|
||||
.count(1);
|
||||
parser.add_argument()
|
||||
.names({ "-d", "--default-components" })
|
||||
.description("Default components to instrument");
|
||||
.description("Default components to instrument (only useful when timemory is "
|
||||
"enabled in hosttrace library)");
|
||||
parser.add_argument()
|
||||
.names({ "-M", "--mode" })
|
||||
.description("Instrumentation mode. 'trace' mode is immutable, 'region' mode is "
|
||||
@@ -237,8 +245,9 @@ main(int argc, char** argv)
|
||||
.count(1);
|
||||
parser.add_argument()
|
||||
.names({ "--env" })
|
||||
.description(
|
||||
"Environment variables to add to the runtime in form VARIABLE=VALUE");
|
||||
.description("Environment variables to add to the runtime in form "
|
||||
"VARIABLE=VALUE. E.g. use '--env HOSTTRACE_USE_TIMEMORY=ON' to "
|
||||
"default to using timemory instead of perfetto");
|
||||
parser.add_argument()
|
||||
.names({ "--prefer" })
|
||||
.description("Prefer this library types when available")
|
||||
@@ -250,12 +259,12 @@ main(int argc, char** argv)
|
||||
parser
|
||||
.add_argument({ "--mpi" },
|
||||
"Enable MPI support (requires hosttrace built w/ MPI and GOTCHA "
|
||||
"support)")
|
||||
"support). NOTE: this will automatically be activated if "
|
||||
"MPI_Init/MPI_Init_thread and MPI_Finalize are found in the symbol "
|
||||
"table of target")
|
||||
.count(0);
|
||||
parser.add_argument({ "--label" }, "Labeling info for functions")
|
||||
.choices({ "file", "line", "return", "args" });
|
||||
parser.add_argument({ "--mpip" }, "Enable MPI profiling via GOTCHA").count(0);
|
||||
parser.add_argument({ "--ompt" }, "Enable OpenMP profiling via OMPT").count(0);
|
||||
parser.add_argument({ "--load" },
|
||||
"Supplemental instrumentation library names w/o extension (e.g. "
|
||||
"'libinstr' for 'libinstr.so' or 'libinstr.a')");
|
||||
@@ -271,7 +280,75 @@ main(int argc, char** argv)
|
||||
"Dyninst supports batch insertion of multiple points. If one large batch "
|
||||
"insertion fails, this value will be used to create smaller batches")
|
||||
.count(1)
|
||||
.dtype("size_t")
|
||||
.action([](parser_t& p) { batch_size = p.get<size_t>("batch-size"); });
|
||||
parser
|
||||
.add_argument({ "-r", "--min-address-range" },
|
||||
"If the address range of a function is less than this value, "
|
||||
"exclude it from instrumentation")
|
||||
.count(1)
|
||||
.dtype("size_t")
|
||||
.set_default(min_address_range)
|
||||
.action(
|
||||
[](parser_t& p) { min_address_range = p.get<size_t>("min-address-range"); });
|
||||
parser
|
||||
.add_argument({ "--min-address-range-loop" },
|
||||
"If the address range of a function containing a loop is less than "
|
||||
"this value, "
|
||||
"exclude it from instrumentation")
|
||||
.count(1)
|
||||
.dtype("size_t")
|
||||
.set_default(min_loop_address_range)
|
||||
.action([](parser_t& p) {
|
||||
min_loop_address_range = p.get<size_t>("min-address-range-loop");
|
||||
});
|
||||
parser.add_argument()
|
||||
.names({ "--allow-overlapping" })
|
||||
.description(
|
||||
"Allow dyninst to instrument either multiple functions which overlap (share "
|
||||
"part of same function body) or single functions with multiple entry points. "
|
||||
"For more info, see Section 2 of the DyninstAPI documentation.")
|
||||
.count(0)
|
||||
.action([](parser_t&) { allow_overlapping = true; });
|
||||
parser
|
||||
.add_argument(
|
||||
{ "--print-dir" },
|
||||
"Output directory for diagnostic available/instrumented/overlapping module "
|
||||
"function lists, e.g. {print-dir}/available.txt")
|
||||
.count(1)
|
||||
.dtype("string")
|
||||
.set_default(modfunc_dump_dir)
|
||||
.action([](parser_t& p) { modfunc_dump_dir = p.get<std::string>("print-dir"); });
|
||||
parser
|
||||
.add_argument(
|
||||
{ "--print-instrumented" },
|
||||
"Print the instrumented entities (functions, modules, or module-function "
|
||||
"pair) to stdout after applying regular expressions and exit")
|
||||
.count(1)
|
||||
.choices({ "functions", "modules", "functions+", "pair", "pair+" })
|
||||
.action([](parser_t& p) {
|
||||
print_instrumented = p.get<std::string>("print-instrumented");
|
||||
});
|
||||
parser
|
||||
.add_argument(
|
||||
{ "--print-available" },
|
||||
"Print the available entities for instrumentation (functions, modules, or "
|
||||
"module-function pair) to stdout applying regular expressions and exit")
|
||||
.count(1)
|
||||
.choices({ "functions", "modules", "functions+", "pair", "pair+" })
|
||||
.action(
|
||||
[](parser_t& p) { print_available = p.get<std::string>("print-available"); });
|
||||
parser
|
||||
.add_argument(
|
||||
{ "--print-overlapping" },
|
||||
"Print the entities for instrumentation (functions, modules, or "
|
||||
"module-function pair) which overlap other function calls or have multiple "
|
||||
"entry points to stdout applying regular expressions and exit")
|
||||
.count(1)
|
||||
.choices({ "functions", "modules", "functions+", "pair", "pair+" })
|
||||
.action([](parser_t& p) {
|
||||
print_overlapping = p.get<std::string>("print-overlapping");
|
||||
});
|
||||
|
||||
if(_cmdc == 0)
|
||||
{
|
||||
@@ -380,23 +457,13 @@ main(int argc, char** argv)
|
||||
if(parser.exists("mpi"))
|
||||
use_mpi = true;
|
||||
|
||||
if(parser.exists("mpip"))
|
||||
use_stubs["mpip"] = true;
|
||||
else
|
||||
use_stubs["mpip"] = false;
|
||||
|
||||
if(parser.exists("ompt"))
|
||||
use_stubs["ompt"] = true;
|
||||
else
|
||||
use_stubs["ompt"] = false;
|
||||
|
||||
if(parser.exists("p"))
|
||||
_pid = parser.get<int>("p");
|
||||
|
||||
if(parser.exists("d"))
|
||||
{
|
||||
auto _components = parser.get<strvec_t>("default-components");
|
||||
default_components = "";
|
||||
default_components = {};
|
||||
for(size_t i = 0; i < _components.size(); ++i)
|
||||
{
|
||||
if(_components.at(i) == "none")
|
||||
@@ -409,7 +476,7 @@ main(int argc, char** argv)
|
||||
default_components += ",";
|
||||
}
|
||||
if(default_components == "none")
|
||||
default_components = "";
|
||||
default_components = {};
|
||||
else
|
||||
{
|
||||
auto _strcomp = parser.get<std::string>("d");
|
||||
@@ -460,6 +527,9 @@ main(int argc, char** argv)
|
||||
fini_stub_names = parser.get<strvec_t>("fini-functions");
|
||||
auto env_vars = parser.get<strvec_t>("env");
|
||||
|
||||
if(verbose_level >= 0)
|
||||
tim::makedir(modfunc_dump_dir);
|
||||
|
||||
//----------------------------------------------------------------------------------//
|
||||
//
|
||||
// REGEX OPTIONS
|
||||
@@ -603,6 +673,19 @@ main(int argc, char** argv)
|
||||
//----------------------------------------------------------------------------------//
|
||||
std::set<std::string> module_names;
|
||||
|
||||
auto _add_overlapping = [](module_t* mitr, procedure_t* pitr) {
|
||||
std::vector<procedure_t*> _overlapping{};
|
||||
if(pitr->findOverlapping(_overlapping))
|
||||
{
|
||||
overlapping_module_functions.insert(module_function{ mitr, pitr });
|
||||
for(auto oitr : _overlapping)
|
||||
{
|
||||
overlapping_module_functions.insert(
|
||||
module_function{ oitr->getModule(), oitr });
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
if(app_modules && !app_modules->empty())
|
||||
{
|
||||
modules = *app_modules;
|
||||
@@ -616,6 +699,7 @@ main(int argc, char** argv)
|
||||
auto _modfn = module_function(itr, pitr);
|
||||
module_names.insert(_modfn.module);
|
||||
available_module_functions.insert(std::move(_modfn));
|
||||
_add_overlapping(itr, pitr);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -636,6 +720,7 @@ main(int argc, char** argv)
|
||||
auto _modfn = module_function(mod, itr);
|
||||
module_names.insert(_modfn.module);
|
||||
available_module_functions.insert(std::move(_modfn));
|
||||
_add_overlapping(mod, itr);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -667,7 +752,10 @@ main(int argc, char** argv)
|
||||
std::cout << '\n' << std::endl;
|
||||
}
|
||||
|
||||
dump_info("available_module_functions.txt", available_module_functions, 1);
|
||||
dump_info(TIMEMORY_JOIN('/', modfunc_dump_dir, "available.txt"),
|
||||
available_module_functions, 1);
|
||||
dump_info(TIMEMORY_JOIN('/', modfunc_dump_dir, "overlapping.txt"),
|
||||
overlapping_module_functions, 1);
|
||||
|
||||
//----------------------------------------------------------------------------------//
|
||||
//
|
||||
@@ -756,12 +844,6 @@ main(int argc, char** argv)
|
||||
|
||||
load_library(get_library_ext(libname));
|
||||
|
||||
if(use_stubs["mpip"] && !is_static_exe)
|
||||
load_library({ "libhosttrace-mpip.so" });
|
||||
|
||||
if(use_stubs["ompt"])
|
||||
load_library(get_library_ext({ "libhosttrace-ompt" }));
|
||||
|
||||
for(const auto& itr : extra_libs)
|
||||
load_library(get_library_ext({ itr }));
|
||||
|
||||
@@ -795,6 +877,10 @@ main(int argc, char** argv)
|
||||
if(mpi_init_func && mpi_fini_func)
|
||||
use_mpi = true;
|
||||
|
||||
bool use_mpip = false;
|
||||
if(use_mpi && binary_rewrite)
|
||||
use_mpip = true;
|
||||
|
||||
//----------------------------------------------------------------------------------//
|
||||
//
|
||||
// Handle supplemental instrumentation library functions
|
||||
@@ -824,11 +910,6 @@ main(int argc, char** argv)
|
||||
return false;
|
||||
};
|
||||
|
||||
if(use_stubs["mpip"])
|
||||
add_instr_library("mpip", "hosttrace_register_mpip", "hosttrace_deregister_mpip");
|
||||
if(use_stubs["ompt"])
|
||||
add_instr_library("ompt", "hosttrace_register_ompt", "hosttrace_deregister_ompt");
|
||||
|
||||
if(!extra_libs.empty())
|
||||
{
|
||||
verbprintf(2, "Adding extra libraries...\n");
|
||||
@@ -963,20 +1044,6 @@ main(int argc, char** argv)
|
||||
"with MPI and GOTCHA support");
|
||||
}
|
||||
|
||||
if(use_stubs["mpip"] &&
|
||||
!(beg_stubs["mpip"] != nullptr || end_stubs["mpip"] != nullptr))
|
||||
{
|
||||
throw std::runtime_error("MPIP support was requested but could not find "
|
||||
"hosttrace_{register,deregister}_mpip functions");
|
||||
}
|
||||
|
||||
if(use_stubs["ompt"] &&
|
||||
!(beg_stubs["ompt"] != nullptr || end_stubs["ompt"] != nullptr))
|
||||
{
|
||||
throw std::runtime_error("OMPT support was requested but could not find "
|
||||
"hosttrace_{register,deregister}_ompt functions");
|
||||
}
|
||||
|
||||
auto check_for_debug_info = [](bool& _has_debug_info, auto* _func) {
|
||||
// This heuristic guesses that debugging info is available if function
|
||||
// is not defined in the DEFAULT_MODULE
|
||||
@@ -1063,12 +1130,10 @@ main(int argc, char** argv)
|
||||
auto mpie_init_args = hosttrace_call_expr("HOSTTRACE_MPI_INIT", "OFF");
|
||||
auto mpie_fini_args = hosttrace_call_expr("HOSTTRACE_MPI_FINALIZE", "OFF");
|
||||
auto trace_call_args =
|
||||
hosttrace_call_expr("HOSTTRACE_TRACE_COMPONENTS", default_components);
|
||||
auto mpip_call_args =
|
||||
hosttrace_call_expr("HOSTTRACE_MPIP_COMPONENTS", default_components);
|
||||
auto ompt_call_args =
|
||||
hosttrace_call_expr("HOSTTRACE_OMPT_COMPONENTS", default_components);
|
||||
auto none_call_args = hosttrace_call_expr();
|
||||
hosttrace_call_expr("HOSTTRACE_COMPONENTS", default_components);
|
||||
auto use_mpi_call_args = hosttrace_call_expr("HOSTTRACE_USE_MPI", "ON");
|
||||
auto use_mpip_call_args = hosttrace_call_expr("HOSTTRACE_USE_MPIP", "ON");
|
||||
auto none_call_args = hosttrace_call_expr();
|
||||
|
||||
verbprintf(2, "Done\n");
|
||||
verbprintf(2, "Getting call snippets... ");
|
||||
@@ -1080,12 +1145,12 @@ main(int argc, char** argv)
|
||||
auto main_beg_call = main_call_args.get(entr_trace);
|
||||
auto main_end_call = main_call_args.get(exit_trace);
|
||||
|
||||
auto trace_env_call = trace_call_args.get(env_func);
|
||||
auto mode_env_call = mode_call_args.get(env_func);
|
||||
auto mpip_env_call = mpip_call_args.get(env_func);
|
||||
auto ompt_env_call = ompt_call_args.get(env_func);
|
||||
auto mpii_env_call = mpie_init_args.get(env_func);
|
||||
auto mpif_env_call = mpie_fini_args.get(env_func);
|
||||
auto trace_env_call = trace_call_args.get(env_func);
|
||||
auto mode_env_call = mode_call_args.get(env_func);
|
||||
auto mpii_env_call = mpie_init_args.get(env_func);
|
||||
auto mpif_env_call = mpie_fini_args.get(env_func);
|
||||
auto use_mpi_env_call = use_mpi_call_args.get(env_func);
|
||||
auto use_mpip_env_call = use_mpip_call_args.get(env_func);
|
||||
|
||||
verbprintf(2, "Done\n");
|
||||
|
||||
@@ -1125,10 +1190,10 @@ main(int argc, char** argv)
|
||||
init_names.push_back(mpii_env_call.get());
|
||||
if(mpif_env_call)
|
||||
init_names.push_back(mpif_env_call.get());
|
||||
if(use_stubs["mpip"] && mpip_env_call)
|
||||
init_names.push_back(mpip_env_call.get());
|
||||
if(use_stubs["ompt"] && ompt_env_call)
|
||||
init_names.push_back(ompt_env_call.get());
|
||||
if(use_mpi && use_mpi_env_call)
|
||||
init_names.push_back(use_mpi_env_call.get());
|
||||
if(use_mpip && use_mpip_env_call)
|
||||
init_names.push_back(use_mpip_env_call.get());
|
||||
|
||||
for(const auto& itr : env_variables)
|
||||
{
|
||||
@@ -1229,7 +1294,13 @@ main(int argc, char** argv)
|
||||
else
|
||||
itr->getModuleName(modname, MUTNAMELEN);
|
||||
|
||||
if(strstr(modname, "libdyninst") != nullptr)
|
||||
if(!itr->isInstrumentable())
|
||||
{
|
||||
verbprintf(2, "Skipping uninstrumentable function: %s\n", fname);
|
||||
continue;
|
||||
}
|
||||
|
||||
if(std::string{ modname }.find("libdyninst") != std::string::npos)
|
||||
continue;
|
||||
|
||||
if(module_constraint(modname) || !process_file_for_instrumentation(modname))
|
||||
@@ -1240,12 +1311,6 @@ main(int argc, char** argv)
|
||||
|
||||
itr->getName(fname, FUNCNAMELEN);
|
||||
|
||||
if(!itr->isInstrumentable())
|
||||
{
|
||||
verbprintf(1, "Skipping uninstrumentable function: %s\n", fname);
|
||||
continue;
|
||||
}
|
||||
|
||||
auto name = get_func_file_line_info(mod, itr);
|
||||
|
||||
if(name.get().empty())
|
||||
@@ -1268,13 +1333,93 @@ main(int argc, char** argv)
|
||||
continue;
|
||||
}
|
||||
|
||||
if(is_static_exe && has_debug_info && strcmp(fname, "_fini") != 0 &&
|
||||
strcmp(modname, "DEFAULT_MODULE") == 0)
|
||||
if(is_static_exe && has_debug_info && string_t{ fname } == "_fini" &&
|
||||
string_t{ modname } == "DEFAULT_MODULE")
|
||||
{
|
||||
verbprintf(1, "Skipping function [DEFAULT_MODULE]: %s\n", fname);
|
||||
continue;
|
||||
}
|
||||
|
||||
_add_overlapping(mod, itr);
|
||||
|
||||
if(!allow_overlapping &&
|
||||
overlapping_module_functions.find(module_function{ mod, itr }) !=
|
||||
overlapping_module_functions.end())
|
||||
{
|
||||
verbprintf(1, "Skipping function [overlapping]: %s / %s\n",
|
||||
name.m_name.c_str(), name.get().c_str());
|
||||
continue;
|
||||
}
|
||||
|
||||
// directly try to get loop entry points
|
||||
const std::vector<point_t*>* _loop_entries =
|
||||
itr->findPoint(BPatch_locLoopEntry);
|
||||
|
||||
// try to get loops via the control flow graph
|
||||
flow_graph_t* cfg = itr->getCFG();
|
||||
basic_loop_vec_t basic_loop{};
|
||||
if(cfg)
|
||||
cfg->getOuterLoops(basic_loop);
|
||||
|
||||
// if the function has dynamic callsites and we are in binary rewrite mode,
|
||||
// force the instrumentation
|
||||
bool _force_instr = false;
|
||||
if(cfg && binary_rewrite)
|
||||
_force_instr = cfg->containsDynamicCallsites();
|
||||
|
||||
auto _address_range = module_function{ mod, itr }.address_range;
|
||||
auto _num_loop_entries =
|
||||
(_loop_entries)
|
||||
? std::max<size_t>(_loop_entries->size(), basic_loop.size())
|
||||
: basic_loop.size();
|
||||
auto _has_loop_entries = (_num_loop_entries > 0);
|
||||
|
||||
if(_address_range < min_address_range && !_has_loop_entries && !_force_instr)
|
||||
{
|
||||
verbprintf(1,
|
||||
"Skipping function [min-address-range]: %s / %s (address "
|
||||
"range = %lu, minimum = %lu)\n",
|
||||
name.m_name.c_str(), name.get().c_str(),
|
||||
(unsigned long) _address_range,
|
||||
(unsigned long) min_address_range);
|
||||
continue;
|
||||
}
|
||||
else if(_address_range < min_loop_address_range && _has_loop_entries &&
|
||||
!_force_instr)
|
||||
{
|
||||
verbprintf(1,
|
||||
"Skipping function [min-loop-address-range]: %s / %s (address "
|
||||
"range = %lu, minimum = %lu)\n",
|
||||
name.m_name.c_str(), name.get().c_str(),
|
||||
(unsigned long) _address_range,
|
||||
(unsigned long) min_loop_address_range);
|
||||
continue;
|
||||
}
|
||||
else if(_address_range >= min_loop_address_range &&
|
||||
_address_range < min_address_range && _has_loop_entries)
|
||||
{
|
||||
verbprintf(
|
||||
1,
|
||||
"Enabling function [min-loop-address-range]: %s / %s despite not "
|
||||
"satisfy minimum loop address range (address range = %lu, minimum "
|
||||
"= %lu) because it has at least one loop (found: %lu)\n",
|
||||
name.m_name.c_str(), name.get().c_str(),
|
||||
(unsigned long) _address_range,
|
||||
(unsigned long) min_loop_address_range,
|
||||
(unsigned long) _num_loop_entries);
|
||||
}
|
||||
else if(_address_range < min_address_range && _force_instr)
|
||||
{
|
||||
verbprintf(1,
|
||||
"Enabling function [min-address-range]: %s / %s despite not "
|
||||
"satisfy minimum address range (address range = %lu, minimum "
|
||||
"= %lu) because contains dynamic callsites which may not be "
|
||||
"instrumented in binary rewrite mode\n",
|
||||
name.m_name.c_str(), name.get().c_str(),
|
||||
(unsigned long) _address_range,
|
||||
(unsigned long) min_address_range);
|
||||
}
|
||||
|
||||
hash_ids.emplace_back(std::hash<string_t>()(name.get()), name.get());
|
||||
available_module_functions.insert(module_function(mod, itr));
|
||||
instrumented_module_functions.insert(module_function(mod, itr));
|
||||
@@ -1302,13 +1447,9 @@ main(int argc, char** argv)
|
||||
verbprintf(1, "Instrumenting at the loop level: %s\n",
|
||||
name.m_name.c_str());
|
||||
|
||||
flow_graph_t* flow = itr->getCFG();
|
||||
basic_loop_vec_t basic_loop;
|
||||
if(flow)
|
||||
flow->getOuterLoops(basic_loop);
|
||||
for(auto* litr : basic_loop)
|
||||
{
|
||||
auto lname = get_loop_file_line_info(mod, itr, flow, litr);
|
||||
auto lname = get_loop_file_line_info(mod, itr, cfg, litr);
|
||||
auto _lname = lname.get();
|
||||
auto _lhash = std::hash<string_t>()(_lname);
|
||||
hash_ids.emplace_back(_lhash, _lname);
|
||||
@@ -1324,8 +1465,8 @@ main(int argc, char** argv)
|
||||
auto _lexit =
|
||||
_ltrace_exit.get((exit_hash) ? exit_hash : exit_trace);
|
||||
|
||||
insert_instr(addr_space, itr, _lentr, BPatch_entry, flow, litr);
|
||||
insert_instr(addr_space, itr, _lexit, BPatch_exit, flow, litr);
|
||||
insert_instr(addr_space, itr, _lentr, BPatch_entry, cfg, litr);
|
||||
insert_instr(addr_space, itr, _lexit, BPatch_exit, cfg, litr);
|
||||
};
|
||||
instr_procedure_functions.emplace_back(_lf);
|
||||
}
|
||||
@@ -1333,24 +1474,6 @@ main(int argc, char** argv)
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------------//
|
||||
//
|
||||
// Load the dependent libraries (currently unused)
|
||||
//
|
||||
//----------------------------------------------------------------------------------//
|
||||
|
||||
if(is_static_exe && false)
|
||||
{
|
||||
char* bindings = new char[MUTNAMELEN];
|
||||
bool loadResult = load_dependent_libraries(addr_space, bindings);
|
||||
delete[] bindings;
|
||||
if(!loadResult)
|
||||
{
|
||||
fprintf(stderr, "Failed to load dependent libraries\n");
|
||||
throw std::runtime_error("Failed to load dependent libraries");
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------------//
|
||||
//
|
||||
// Do a first pass through all procedures to generate the hash ids
|
||||
@@ -1489,8 +1612,87 @@ main(int argc, char** argv)
|
||||
//
|
||||
//----------------------------------------------------------------------------------//
|
||||
|
||||
dump_info("available_module_functions.txt", available_module_functions, 0);
|
||||
dump_info("instrumented_module_functions.txt", instrumented_module_functions, 0);
|
||||
bool _dump_and_exit = ((print_available.length() + print_instrumented.length() +
|
||||
print_overlapping.length()) > 0);
|
||||
|
||||
dump_info(TIMEMORY_JOIN('/', modfunc_dump_dir, "available.txt"),
|
||||
available_module_functions, 0);
|
||||
dump_info(TIMEMORY_JOIN('/', modfunc_dump_dir, "instrumented.txt"),
|
||||
instrumented_module_functions, 0);
|
||||
dump_info(TIMEMORY_JOIN('/', modfunc_dump_dir, "overlapping.txt"),
|
||||
overlapping_module_functions, 0);
|
||||
|
||||
auto _dump_info = [](string_t _mode, const fmodset_t& _modset) {
|
||||
std::map<std::string, std::vector<std::string>> _data{};
|
||||
std::unordered_map<std::string, std::unordered_set<std::string>> _dups{};
|
||||
auto _insert = [&](const std::string& _m, const std::string& _v) {
|
||||
if(_dups[_m].find(_v) == _dups[_m].end())
|
||||
{
|
||||
_dups[_m].emplace(_v);
|
||||
_data[_m].emplace_back(_v);
|
||||
}
|
||||
};
|
||||
if(_mode == "modules")
|
||||
{
|
||||
for(const auto& itr : _modset)
|
||||
_insert(itr.module, itr.module);
|
||||
}
|
||||
else if(_mode == "functions")
|
||||
{
|
||||
for(const auto& itr : _modset)
|
||||
_insert(itr.module, itr.function);
|
||||
}
|
||||
else if(_mode == "functions+")
|
||||
{
|
||||
for(const auto& itr : _modset)
|
||||
_insert(itr.module, itr.signature.get());
|
||||
}
|
||||
else if(_mode == "pair")
|
||||
{
|
||||
for(const auto& itr : _modset)
|
||||
{
|
||||
std::stringstream _ss{};
|
||||
_ss << std::boolalpha;
|
||||
_ss << "[" << itr.module << "] --> [ " << itr.address_range << " ]["
|
||||
<< itr.function << "]";
|
||||
_insert(itr.module, _ss.str());
|
||||
}
|
||||
}
|
||||
else if(_mode == "pair+")
|
||||
{
|
||||
for(const auto& itr : _modset)
|
||||
{
|
||||
std::stringstream _ss{};
|
||||
_ss << std::boolalpha;
|
||||
_ss << "[" << itr.module << "] --> [ " << itr.address_range << " ]["
|
||||
<< itr.signature.get() << "]";
|
||||
_insert(itr.module, _ss.str());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
throw std::runtime_error("Unknown mode " + _mode);
|
||||
}
|
||||
for(auto& mitr : _data)
|
||||
{
|
||||
if(_mode != "modules")
|
||||
std::cout << "\n" << mitr.first << ":\n";
|
||||
for(auto& itr : mitr.second)
|
||||
{
|
||||
std::cout << " " << itr << "\n";
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
if(!print_available.empty())
|
||||
_dump_info(print_available, available_module_functions);
|
||||
if(!print_instrumented.empty())
|
||||
_dump_info(print_instrumented, instrumented_module_functions);
|
||||
if(!print_overlapping.empty())
|
||||
_dump_info(print_overlapping, overlapping_module_functions);
|
||||
|
||||
if(_dump_and_exit)
|
||||
exit(EXIT_SUCCESS);
|
||||
|
||||
//----------------------------------------------------------------------------------//
|
||||
//
|
||||
@@ -1650,7 +1852,7 @@ process_file_for_instrumentation(const string_t& file_name)
|
||||
return true;
|
||||
}
|
||||
|
||||
string_t ext_str = "\\.S$";
|
||||
string_t ext_str = "\\.(s|S)$";
|
||||
static std::regex ext_regex(ext_str, regex_opts);
|
||||
static std::regex sys_regex("^(s|k|e|w)_[A-Za-z_0-9\\-]+\\.(c|C)$", regex_opts);
|
||||
static std::regex userlib_regex(
|
||||
@@ -1660,14 +1862,11 @@ process_file_for_instrumentation(const string_t& file_name)
|
||||
static std::regex corelib_regex("^lib(rt-|dl-|util-|python)", regex_opts);
|
||||
// these are all due to TAU
|
||||
static std::regex prefix_regex(
|
||||
"^(RT|Tau|Profiler|Rts|Papi|Py|Comp_xl\\.cpp|Comp_gnu\\.cpp|"
|
||||
"^(_|\\.|RT|Tau|Profiler|Rts|Papi|Py|Comp_xl\\.cpp|Comp_gnu\\.cpp|"
|
||||
"UserEvent\\.cpp|FunctionInfo\\.cpp|PthreadLayer\\.cpp|"
|
||||
"Comp_intel[0-9]\\.cpp|Tracer\\.cpp|cxx11|locale|pmap_|rpc_|elf_|elf32_|elf64_|"
|
||||
"gelf_|reg-[a-z]+\\.c|sched_|io[a-z_]+\\.c|arg[zp]-|thrd_[a-z]+\\.c|pthread_|sem_"
|
||||
"|mtx_[a-z]+\\.c|cnd_[a-z]+\\.c|tss_[a-z]+\\.c|pt-[a-z]+\\.c|set[a-z]*gid\\.c|"
|
||||
"streams-[a-z]+\\.c|stat[a-z_]+\\.c|fstat[a-z_]+\\.c|epoll_[a-z_]+\\.c|ppoll|"
|
||||
"time[a-z_]+\\.c)",
|
||||
"Comp_intel[0-9]\\.cpp|Tracer\\.cpp)",
|
||||
regex_opts);
|
||||
/*
|
||||
static std::regex suffix_regex(
|
||||
"(printf|gettext|^sig[a-z]+|^exit|^setenv|on_exit|quick_exit|_crypt|^str[a-z_]+|"
|
||||
"mmap[0-9]+|^err|getu[a-z]+|^call_once|^sendto|^timer_[a-z]+|^read|^close|^recv|^"
|
||||
@@ -1677,14 +1876,14 @@ process_file_for_instrumentation(const string_t& file_name)
|
||||
"vscanf|memmove|uid|tsz|gid|cvt|cvt_r|^error|_r|[a-z]64|^f[a-z]+|^makecontext|^"
|
||||
"basename|^wcp[a-z]+|[a-z]+dir|^mb[a-z]+|^dir[a-z]+|euid[a-z]+|^c[36][24][a-z]+|^"
|
||||
"set[a-z_]+|^get[a-z_]+|^shm[a-z]+|^wc[a-z_]+|brk|^write[a-z]+)\\.c$",
|
||||
regex_opts);
|
||||
regex_opts);*/
|
||||
|
||||
if(!cstd_func_instr && c_stdlib_module_constraint(file_name))
|
||||
/*if(!cstd_func_instr && c_stdlib_module_constraint(file_name))
|
||||
{
|
||||
verbprintf(3, "Excluding instrumentation [c std library] : '%s'...\n",
|
||||
file_name.c_str());
|
||||
return false;
|
||||
}
|
||||
}*/
|
||||
|
||||
if(std::regex_search(file_name, ext_regex))
|
||||
{
|
||||
@@ -1721,12 +1920,12 @@ process_file_for_instrumentation(const string_t& file_name)
|
||||
return false;
|
||||
}
|
||||
|
||||
if(std::regex_search(file_name, suffix_regex))
|
||||
/*if(std::regex_search(file_name, suffix_regex))
|
||||
{
|
||||
verbprintf(3, "Excluding instrumentation [suffix match] : '%s'...\n",
|
||||
file_name.c_str());
|
||||
return false;
|
||||
}
|
||||
}*/
|
||||
|
||||
bool use = is_include(true) && !is_exclude();
|
||||
if(use)
|
||||
@@ -1787,9 +1986,14 @@ instrument_entity(const string_t& function_name)
|
||||
regex_opts);
|
||||
static std::regex exclude_cxx("(std::_Sp_counted_base|std::use_facet)", regex_opts);
|
||||
static std::regex leading(
|
||||
"^(_|frame_dummy|\\(|targ|new|delete|operator new|operator delete|std::allocat|"
|
||||
"nvtx|gcov|main\\.cold\\.|TAU|tau|Tau|dyn|RT|dl|sys|pthread|posix|clone|thunk)",
|
||||
"^(_|\\.|frame_dummy|\\(|targ|new|delete|operator new|operator "
|
||||
"delete|std::allocat|"
|
||||
"nvtx|gcov|main\\.cold|TAU|tau|Tau|dyn|RT|dl|sys|pthread|posix|clone|virtual "
|
||||
"thunk|non-virtual thunk|transaction "
|
||||
"clone|RtsLayer|DYNINST|PthreadLayer|threaded_func|targ8)",
|
||||
regex_opts);
|
||||
static std::regex trailing("(\\.part\\.[0-9]+|\\.constprop\\.[0-9]+|\\.|\\.[0-9]+)$",
|
||||
regex_opts);
|
||||
static std::regex stlfunc("^std::", regex_opts);
|
||||
strset_t whole = { "init", "fini", "_init", "_fini", "atexit" };
|
||||
|
||||
@@ -1799,11 +2003,11 @@ instrument_entity(const string_t& function_name)
|
||||
return false;
|
||||
}
|
||||
|
||||
if(!cstd_func_instr && c_stdlib_function_constraint(function_name))
|
||||
/*if(!cstd_func_instr && c_stdlib_function_constraint(function_name))
|
||||
{
|
||||
verbprintf(3, "Excluding function [libc] : '%s'...\n", function_name.c_str());
|
||||
return false;
|
||||
}
|
||||
}*/
|
||||
|
||||
// don't instrument the functions when key is found anywhere in function name
|
||||
if(std::regex_search(function_name, exclude))
|
||||
@@ -1829,6 +2033,14 @@ instrument_entity(const string_t& function_name)
|
||||
return false;
|
||||
}
|
||||
|
||||
// don't instrument the functions when key is found at the end of the function name
|
||||
if(std::regex_search(function_name, trailing))
|
||||
{
|
||||
verbprintf(3, "Excluding function [critical, trailing match] : '%s'...\n",
|
||||
function_name.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
if(whole.count(function_name) > 0)
|
||||
{
|
||||
verbprintf(3, "Excluding function [critical, whole match] : '%s'...\n",
|
||||
@@ -1918,31 +2130,28 @@ bool
|
||||
module_constraint(char* fname)
|
||||
{
|
||||
// fname is the name of module/file
|
||||
int len = strlen(fname);
|
||||
|
||||
string_t _fname = fname;
|
||||
if(_fname.find("hosttrace") != string_t::npos ||
|
||||
_fname.find("tim::") != string_t::npos)
|
||||
|
||||
// never instrumentat any module matching hosttrace
|
||||
if(_fname.find("hosttrace") != string_t::npos)
|
||||
return true;
|
||||
|
||||
if((strcmp(fname, "DEFAULT_MODULE") == 0) || (strcmp(fname, "LIBRARY_MODULE") == 0) ||
|
||||
((fname[len - 2] == '.') && (fname[len - 1] == 'c')) ||
|
||||
((fname[len - 2] == '.') && (fname[len - 1] == 'C')) ||
|
||||
((fname[len - 3] == '.') && (fname[len - 2] == 'c') && (fname[len - 1] == 'c')) ||
|
||||
((fname[len - 4] == '.') && (fname[len - 3] == 'c') && (fname[len - 2] == 'p') &&
|
||||
(fname[len - 1] == 'p')) ||
|
||||
((fname[len - 4] == '.') && (fname[len - 3] == 'f') && (fname[len - 2] == '9') &&
|
||||
(fname[len - 1] == '0')) ||
|
||||
((fname[len - 4] == '.') && (fname[len - 3] == 'F') && (fname[len - 2] == '9') &&
|
||||
(fname[len - 1] == '0')) ||
|
||||
((fname[len - 2] == '.') && (fname[len - 1] == 'F')) ||
|
||||
((fname[len - 2] == '.') && (fname[len - 1] == 'f')))
|
||||
{
|
||||
//((fname[len-3] == '.') && (fname[len-2] == 's') && (fname[len-1] == 'o'))||
|
||||
// always instrument these modules
|
||||
if(_fname == "DEFAULT_MODULE" || _fname == "LIBRARY_MODULE")
|
||||
return false;
|
||||
}
|
||||
|
||||
if(process_file_for_instrumentation(string_t(fname)))
|
||||
// auto _valid_file_extension = std::regex_search(
|
||||
// _fname, std::regex{ "\\.(a|c|f|o|cc|so|cxx|cpp|C|F|CC|f90|F90|so\\.[0-9\\.]+)$",
|
||||
// regex_opts });
|
||||
|
||||
auto _valid_file_regex = process_file_for_instrumentation(_fname);
|
||||
|
||||
// if module compiled from C, C++, or Fortran or a library
|
||||
// if(_valid_file_extension && _valid_file_regex)
|
||||
// return false;
|
||||
|
||||
// apply regex expressions
|
||||
if(_valid_file_regex)
|
||||
return false;
|
||||
|
||||
// do not instrument
|
||||
@@ -1959,19 +2168,10 @@ routine_constraint(const char* fname)
|
||||
if(_fname.find("hosttrace") != string_t::npos)
|
||||
return true;
|
||||
|
||||
if((strstr(fname, "FunctionInfo") != nullptr) ||
|
||||
(strncmp(fname, "RtsLayer", 8) == 0) || (strncmp(fname, "DYNINST", 7) == 0) ||
|
||||
(strncmp(fname, "PthreadLayer", 12) == 0) ||
|
||||
(strncmp(fname, "threaded_func", 13) == 0) || (strncmp(fname, "targ8", 5) == 0) ||
|
||||
(strncmp(fname, "__intel_", 8) == 0) || (strncmp(fname, "_intel_", 7) == 0) ||
|
||||
(strncmp(fname, "The", 3) == 0) ||
|
||||
// The following functions show up in static executables
|
||||
(strncmp(fname, "__mmap", 6) == 0) || (strncmp(fname, "_IO_printf", 10) == 0) ||
|
||||
(strncmp(fname, "__write", 7) == 0) || (strncmp(fname, "__munmap", 8) == 0) ||
|
||||
(strstr(fname, "_L_lock") != nullptr) || (strstr(fname, "_L_unlock") != nullptr))
|
||||
{
|
||||
auto npos = std::string::npos;
|
||||
if(_fname.find("FunctionInfo") != npos || _fname.find("_L_lock") != npos ||
|
||||
_fname.find("_L_unlock") != npos)
|
||||
return true; // Don't instrument
|
||||
}
|
||||
else
|
||||
{
|
||||
// Should the routine fname be instrumented?
|
||||
@@ -1988,53 +2188,6 @@ routine_constraint(const char* fname)
|
||||
}
|
||||
}
|
||||
|
||||
//======================================================================================//
|
||||
//
|
||||
bool
|
||||
load_dependent_libraries(address_space_t* bedit, char* bindings)
|
||||
{
|
||||
// Order of load matters, just like command line arguments to a standalone linker
|
||||
|
||||
char deplibs[1024];
|
||||
char bindir[] = TIMEMORY_BIN_DIR;
|
||||
char cmd[1024];
|
||||
verbprintf(0, "Inside load_dependent_libraries: bindings=%s\n", bindings);
|
||||
sprintf(cmd, "%s/hosttrace_show_libs %s/../lib/Makefile.hosttrace%s", bindir, bindir,
|
||||
bindings);
|
||||
verbprintf(0, "cmd = %s\n", cmd);
|
||||
FILE* fp = popen(cmd, "r");
|
||||
|
||||
if(fp == nullptr)
|
||||
{
|
||||
perror("hosttrace: Error launching hosttrace_show_libs to get list of "
|
||||
"dependent static libraries for static binary");
|
||||
return false;
|
||||
}
|
||||
|
||||
while((fgets(deplibs, 1024, fp)) != nullptr)
|
||||
{
|
||||
int len = strlen(deplibs);
|
||||
if(deplibs[len - 2] == ',' && deplibs[len - 3] == '"' && deplibs[0] == '"')
|
||||
{
|
||||
deplibs[len - 3] = '\0';
|
||||
verbprintf(0, "LOADING %s\n", &deplibs[1]);
|
||||
if(!bedit->loadLibrary(&deplibs[1]))
|
||||
{
|
||||
fprintf(stderr, "Failed to load dependent library: %s\n", &deplibs[1]);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
printf("WARNING: hosttrace_show_libs in hosttrace: Comma not found! "
|
||||
"deplibs = %s\n",
|
||||
deplibs);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
//======================================================================================//
|
||||
//
|
||||
std::string
|
||||
|
||||
@@ -0,0 +1,124 @@
|
||||
#include "library.hpp"
|
||||
|
||||
//
|
||||
// This file contains miscellaneous function definitions related to timemory
|
||||
// placed in separate file so that, during development, the long compile-times
|
||||
// arising from compiling timemory's gotcha wrappers are reduced
|
||||
//
|
||||
|
||||
namespace
|
||||
{
|
||||
uint64_t mpip_index = std::numeric_limits<uint64_t>::max();
|
||||
|
||||
// this ensures hosttrace_trace_finalize is called before MPI_Finalize
|
||||
void
|
||||
hosttrace_mpi_set_attr()
|
||||
{
|
||||
#if defined(TIMEMORY_USE_MPI)
|
||||
static auto _mpi_finalize = [](MPI_Comm, int, void*, void*) {
|
||||
if(mpip_index != std::numeric_limits<uint64_t>::max())
|
||||
comp::deactivate_mpip<tim::component_tuple<hosttrace_component>, hosttrace>(
|
||||
mpip_index);
|
||||
hosttrace_pop_trace("MPI_Finalize()");
|
||||
hosttrace_trace_finalize();
|
||||
return MPI_SUCCESS;
|
||||
};
|
||||
using func_t = int (*)(MPI_Comm, int, void*, void*);
|
||||
int _comm_key = -1;
|
||||
if(PMPI_Comm_create_keyval(nullptr, static_cast<func_t>(_mpi_finalize), &_comm_key,
|
||||
nullptr) == MPI_SUCCESS)
|
||||
PMPI_Comm_set_attr(MPI_COMM_SELF, _comm_key, nullptr);
|
||||
#endif
|
||||
}
|
||||
} // namespace
|
||||
|
||||
void
|
||||
fork_gotcha::audit(const gotcha_data_t&, audit::incoming)
|
||||
{
|
||||
HOSTTRACE_DEBUG(
|
||||
"Warning! Calling fork() within an OpenMPI application using libfabric "
|
||||
"may result is segmentation fault\n");
|
||||
TIMEMORY_CONDITIONAL_DEMANGLED_BACKTRACE(get_debug(), 16);
|
||||
}
|
||||
|
||||
void
|
||||
fork_gotcha::audit(const gotcha_data_t& _data, audit::outgoing, pid_t _pid)
|
||||
{
|
||||
HOSTTRACE_DEBUG("%s() return PID %i\n", _data.tool_id.c_str(), (int) _pid);
|
||||
}
|
||||
|
||||
void
|
||||
mpi_gotcha::audit(const gotcha_data_t& _data, audit::incoming, int*, char***)
|
||||
{
|
||||
HOSTTRACE_DEBUG("[%s] %s(int*, char***)\n", __FUNCTION__, _data.tool_id.c_str());
|
||||
if(get_state() == ::State::DelayedInit)
|
||||
get_state() = ::State::PreInit;
|
||||
}
|
||||
|
||||
void
|
||||
mpi_gotcha::audit(const gotcha_data_t& _data, audit::incoming, int*, char***, int, int*)
|
||||
{
|
||||
HOSTTRACE_DEBUG("[%s] %s(int*, char***, int, int*)\n", __FUNCTION__,
|
||||
_data.tool_id.c_str());
|
||||
if(get_state() == ::State::DelayedInit)
|
||||
get_state() = ::State::PreInit;
|
||||
}
|
||||
|
||||
void
|
||||
mpi_gotcha::audit(const gotcha_data_t& _data, audit::outgoing, int _retval)
|
||||
{
|
||||
HOSTTRACE_DEBUG("[%s] %s() returned %i\n", __FUNCTION__, _data.tool_id.c_str(),
|
||||
(int) _retval);
|
||||
if(_retval == tim::mpi::success_v && get_state() == ::State::PreInit)
|
||||
{
|
||||
hosttrace_mpi_set_attr();
|
||||
// hosttrace will set this environement variable to true in binary rewrite mode
|
||||
// when it detects MPI. Hides this env variable from the user to avoid this
|
||||
// being activated unwaringly during runtime instrumentation because that
|
||||
// will result in double instrumenting the MPI functions (unless the MPI functions
|
||||
// were excluded via a regex expression)
|
||||
if(tim::get_env("HOSTTRACE_USE_MPIP", false, false))
|
||||
{
|
||||
HOSTTRACE_DEBUG("[%s] Activating MPI wrappers...\n", __FUNCTION__);
|
||||
comp::configure_mpip<tim::component_tuple<hosttrace_component>, hosttrace>();
|
||||
mpip_index = comp::activate_mpip<tim::component_tuple<hosttrace_component>,
|
||||
hosttrace>();
|
||||
}
|
||||
hosttrace_push_trace(_data.tool_id.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
hosttrace_component::start()
|
||||
{
|
||||
if(m_prefix)
|
||||
hosttrace_push_trace(m_prefix);
|
||||
}
|
||||
|
||||
void
|
||||
hosttrace_component::stop()
|
||||
{
|
||||
if(m_prefix)
|
||||
hosttrace_pop_trace(m_prefix);
|
||||
}
|
||||
|
||||
void
|
||||
hosttrace_component::set_prefix(const char* _prefix)
|
||||
{
|
||||
m_prefix = _prefix;
|
||||
}
|
||||
|
||||
hosttrace_timemory_data::instance_array_t&
|
||||
hosttrace_timemory_data::instances()
|
||||
{
|
||||
static auto _v = instance_array_t{};
|
||||
return _v;
|
||||
}
|
||||
|
||||
PERFETTO_TRACK_EVENT_STATIC_STORAGE();
|
||||
TIMEMORY_INITIALIZE_STORAGE(fork_gotcha, mpi_gotcha, comp::wall_clock,
|
||||
comp::user_global_bundle)
|
||||
|
||||
#if defined(CUSTOM_DATA_SOURCE)
|
||||
PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(CustomDataSource);
|
||||
#endif
|
||||
@@ -1,103 +1,6 @@
|
||||
|
||||
#include <perfetto.h>
|
||||
#include "library.hpp"
|
||||
|
||||
#if defined(NDEBUG)
|
||||
# undef NDEBUG
|
||||
#endif
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
#include <fstream>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "timemory/api.hpp"
|
||||
#include "timemory/backends/process.hpp"
|
||||
#include "timemory/backends/threading.hpp"
|
||||
#include "timemory/components.hpp"
|
||||
#include "timemory/config.hpp"
|
||||
#include "timemory/environment.hpp"
|
||||
#include "timemory/manager.hpp"
|
||||
#include "timemory/mpl/apply.hpp"
|
||||
#include "timemory/operations.hpp"
|
||||
#include "timemory/settings.hpp"
|
||||
#include "timemory/storage.hpp"
|
||||
#include "timemory/variadic.hpp"
|
||||
|
||||
#if !defined(JOIN)
|
||||
# define JOIN(...) tim::mpl::apply<std::string>::join(__VA_ARGS__)
|
||||
#endif
|
||||
|
||||
namespace audit = tim::audit;
|
||||
namespace comp = tim::component;
|
||||
namespace quirk = tim::quirk;
|
||||
|
||||
struct fork_gotcha : tim::component::base<fork_gotcha, void>
|
||||
{
|
||||
using gotcha_data_t = tim::component::gotcha_data;
|
||||
|
||||
TIMEMORY_DEFAULT_OBJECT(fork_gotcha)
|
||||
|
||||
void audit(const gotcha_data_t& _data, audit::incoming);
|
||||
void audit(const gotcha_data_t& _data, audit::outgoing, pid_t _pid);
|
||||
};
|
||||
|
||||
struct fork_gotcha_api : tim::concepts::api
|
||||
{};
|
||||
|
||||
using fork_gotcha_t =
|
||||
tim::component::gotcha<4, tim::component_tuple<fork_gotcha>, fork_gotcha_api>;
|
||||
using fork_bundle_t =
|
||||
tim::lightweight_tuple<comp::wall_clock, comp::peak_rss, comp::cpu_clock,
|
||||
comp::cpu_util, fork_gotcha_t>;
|
||||
|
||||
//--------------------------------------------------------------------------------------//
|
||||
|
||||
PERFETTO_DEFINE_CATEGORIES(
|
||||
perfetto::Category("hosttrace").SetDescription("Function trace"));
|
||||
|
||||
#if defined(CUSTOM_DATA_SOURCE)
|
||||
class CustomDataSource : public perfetto::DataSource<CustomDataSource>
|
||||
{
|
||||
public:
|
||||
void OnSetup(const SetupArgs&) override
|
||||
{
|
||||
// Use this callback to apply any custom configuration to your data source
|
||||
// based on the TraceConfig in SetupArgs.
|
||||
PRINT_HERE("%s", "setup");
|
||||
}
|
||||
|
||||
void OnStart(const StartArgs&) override
|
||||
{
|
||||
// This notification can be used to initialize the GPU driver, enable
|
||||
// counters, etc. StartArgs will contains the DataSourceDescriptor,
|
||||
// which can be extended.
|
||||
PRINT_HERE("%s", "start");
|
||||
}
|
||||
|
||||
void OnStop(const StopArgs&) override
|
||||
{
|
||||
// Undo any initialization done in OnStart.
|
||||
PRINT_HERE("%s", "stop");
|
||||
}
|
||||
|
||||
// Data sources can also have per-instance state.
|
||||
int my_custom_state = 0;
|
||||
};
|
||||
|
||||
PERFETTO_DECLARE_DATA_SOURCE_STATIC_MEMBERS(CustomDataSource);
|
||||
#endif
|
||||
|
||||
extern "C" void
|
||||
hosttrace_trace_finalize();
|
||||
|
||||
namespace
|
||||
{
|
||||
bool
|
||||
get_debug()
|
||||
{
|
||||
@@ -105,30 +8,72 @@ get_debug()
|
||||
return _v;
|
||||
}
|
||||
|
||||
void
|
||||
setup_fork_gotcha()
|
||||
State&
|
||||
get_state()
|
||||
{
|
||||
CONDITIONAL_PRINT_HERE(get_debug(), "%s", "configuring gotcha wrapper around fork");
|
||||
static State _v{ State::PreInit };
|
||||
return _v;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------------//
|
||||
|
||||
namespace
|
||||
{
|
||||
auto
|
||||
get_use_perfetto()
|
||||
{
|
||||
// if using timemory, default to perfetto being off
|
||||
static auto _default_v = !tim::get_env<bool>("HOSTTRACE_USE_TIMEMORY", false, false);
|
||||
// explicit env control for using perfetto
|
||||
static auto _v = tim::get_env<bool>("HOSTTRACE_USE_PERFETTO", _default_v);
|
||||
return _v;
|
||||
}
|
||||
|
||||
auto
|
||||
get_use_timemory()
|
||||
{
|
||||
// default to opposite of whether perfetto setting
|
||||
// to use both timemory and perfetto, both HOSTTRACE_USE_TIMEMORY and
|
||||
// HOSTTRACE_USE_PERFETTO must be true
|
||||
static auto _v = tim::get_env<bool>("HOSTTRACE_USE_TIMEMORY", !get_use_perfetto());
|
||||
return _v;
|
||||
}
|
||||
|
||||
bool&
|
||||
get_use_mpi()
|
||||
{
|
||||
// this does not enable anything particularly useful when not using timemory
|
||||
static bool _v = tim::get_env("HOSTTRACE_USE_MPI", false, get_use_timemory());
|
||||
return _v;
|
||||
}
|
||||
|
||||
void
|
||||
setup_gotchas()
|
||||
{
|
||||
static bool _initialized = false;
|
||||
if(_initialized)
|
||||
return;
|
||||
_initialized = true;
|
||||
|
||||
HOSTTRACE_DEBUG(
|
||||
"[%s] Configuring gotcha wrapper around fork, MPI_Init, and MPI_Init_thread\n",
|
||||
__FUNCTION__);
|
||||
|
||||
fork_gotcha_t::get_initializer() = []() {
|
||||
TIMEMORY_C_GOTCHA(fork_gotcha_t, 0, fork);
|
||||
};
|
||||
}
|
||||
|
||||
auto&
|
||||
get_fork_gotcha()
|
||||
{
|
||||
static auto _v =
|
||||
(setup_fork_gotcha(), std::make_unique<fork_bundle_t>(
|
||||
"hosttrace", quirk::config<quirk::auto_start>{}));
|
||||
return _v;
|
||||
mpi_gotcha_t::get_initializer() = []() {
|
||||
mpi_gotcha_t::template configure<0, int, int*, char***>("MPI_Init");
|
||||
mpi_gotcha_t::template configure<1, int, int*, char***, int, int*>(
|
||||
"MPI_Init_thread");
|
||||
};
|
||||
}
|
||||
|
||||
auto
|
||||
ensure_finalization()
|
||||
{
|
||||
if(get_debug())
|
||||
fprintf(stderr, "[%s]\n", __FUNCTION__);
|
||||
HOSTTRACE_DEBUG("[%s]\n", __FUNCTION__);
|
||||
return tim::scope::destructor{ []() { hosttrace_trace_finalize(); } };
|
||||
}
|
||||
|
||||
@@ -139,40 +84,30 @@ get_trace_session()
|
||||
return _session;
|
||||
}
|
||||
|
||||
enum class State : unsigned short
|
||||
{
|
||||
PreInit = 0,
|
||||
Active,
|
||||
Finalized
|
||||
};
|
||||
|
||||
auto&
|
||||
get_state()
|
||||
{
|
||||
static State _v{ State::PreInit };
|
||||
return _v;
|
||||
}
|
||||
|
||||
auto&
|
||||
get_output_filename()
|
||||
auto
|
||||
get_perfetto_output_filename()
|
||||
{
|
||||
static auto _v = []() {
|
||||
auto _tmp = tim::get_env<std::string>(
|
||||
// default name: perfetto-trace.<pid>.proto or perfetto-trace.<rank>.proto
|
||||
auto _default_fname = tim::settings::compose_output_filename(
|
||||
JOIN('.', "perfetto-trace", (get_use_mpi()) ? "%rank%" : "%pid%"), "proto");
|
||||
// have the default display the full path to the output file
|
||||
return tim::get_env<std::string>(
|
||||
"HOSTTRACE_OUTPUT_FILE",
|
||||
JOIN('/', tim::get_env<std::string>("PWD", ".", false),
|
||||
"hosttrace.perfetto-trace-%pid%"));
|
||||
auto _replace = [&_tmp](const std::string& _key, auto _val) {
|
||||
auto _pos = _tmp.find(_key);
|
||||
if(_pos != std::string::npos)
|
||||
_tmp.replace(_pos, _key.length(), std::to_string(_val));
|
||||
};
|
||||
_replace("%pid%", tim::process::get_id());
|
||||
_replace("%rank%", tim::mpi::rank());
|
||||
// backwards compatibility
|
||||
_replace("%p", tim::process::get_id());
|
||||
return _tmp;
|
||||
JOIN('/', tim::get_env<std::string>("PWD", ".", false), _default_fname));
|
||||
}();
|
||||
return _v;
|
||||
|
||||
auto _tmp = _v;
|
||||
auto _replace = [&_tmp](const std::string& _key, auto&& _val) {
|
||||
auto _pos = _tmp.find(_key);
|
||||
if(_pos != std::string::npos)
|
||||
_tmp.replace(_pos, _key.length(), std::to_string(_val()));
|
||||
};
|
||||
_replace("%pid%", []() { return tim::process::get_id(); });
|
||||
_replace("%rank%", []() { return tim::mpi::rank(); });
|
||||
// backwards compatibility
|
||||
_replace("%p", []() { return tim::process::get_id(); });
|
||||
return _tmp;
|
||||
}
|
||||
|
||||
auto&
|
||||
@@ -195,63 +130,185 @@ is_system_backend()
|
||||
return (get_backend() != "inprocess");
|
||||
}
|
||||
|
||||
auto&
|
||||
get_timemory_data()
|
||||
{
|
||||
static thread_local auto& _v =
|
||||
hosttrace_timemory_data::instances().at(threading::get_id());
|
||||
return _v;
|
||||
}
|
||||
|
||||
auto&
|
||||
get_functors()
|
||||
{
|
||||
using functor_t = std::function<void(const char*)>;
|
||||
static auto _v =
|
||||
std::pair<functor_t, functor_t>{ [](const char*) {}, [](const char*) {} };
|
||||
return _v;
|
||||
}
|
||||
|
||||
bool
|
||||
hosttrace_init_perfetto()
|
||||
{
|
||||
if(get_debug())
|
||||
fprintf(stderr, "[%s]\n", __FUNCTION__);
|
||||
|
||||
if(get_state() != State::PreInit)
|
||||
return false;
|
||||
|
||||
HOSTTRACE_DEBUG("[%s]\n", __FUNCTION__);
|
||||
|
||||
// always initialize timemory because gotcha wrappers are always used
|
||||
tim::settings::flamegraph_output() = false;
|
||||
tim::settings::file_output() = false;
|
||||
tim::settings::cout_output() = false;
|
||||
tim::settings::file_output() = true;
|
||||
tim::settings::enable_signal_handler() = true;
|
||||
tim::timemory_init({ "hosttrace" });
|
||||
tim::settings::collapse_processes() = false;
|
||||
tim::settings::collapse_threads() = false;
|
||||
tim::settings::max_thread_bookmarks() = 1;
|
||||
tim::settings::global_components() = tim::get_env<std::string>(
|
||||
"HOSTTRACE_COMPONENTS", "wall_clock", get_use_timemory());
|
||||
|
||||
auto& _fork_gotcha = get_fork_gotcha();
|
||||
// enable timestamp directories when perfetto + mpi is activated
|
||||
if(get_use_perfetto() && get_use_mpi())
|
||||
tim::settings::time_output() = true;
|
||||
|
||||
auto _cmd = tim::read_command_line(tim::process::get_id());
|
||||
auto _exe = (_cmd.empty()) ? "hosttrace" : _cmd.front();
|
||||
auto _pos = _exe.find_last_of('/');
|
||||
if(_pos < _exe.length() - 1)
|
||||
_exe = _exe.substr(_pos + 1);
|
||||
|
||||
tim::timemory_init({ _exe }, "hosttrace-");
|
||||
|
||||
if(get_use_timemory())
|
||||
{
|
||||
comp::user_global_bundle::global_init();
|
||||
std::set<int> _comps{};
|
||||
// convert string into set of enumerations
|
||||
for(auto&& itr : tim::delimit(tim::settings::global_components()))
|
||||
_comps.emplace(tim::runtime::enumerate(itr));
|
||||
if(_comps.size() == 1 && _comps.find(TIMEMORY_WALL_CLOCK) != _comps.end())
|
||||
{
|
||||
// using wall_clock directly is lower overhead than using it via user_bundle
|
||||
bundle_t::get_initializer() = [](bundle_t& _bundle) {
|
||||
_bundle.initialize<comp::wall_clock>();
|
||||
};
|
||||
}
|
||||
else if(!_comps.empty())
|
||||
{
|
||||
// use user_bundle for other than wall-clock
|
||||
bundle_t::get_initializer() = [](bundle_t& _bundle) {
|
||||
_bundle.initialize<comp::user_global_bundle>();
|
||||
};
|
||||
}
|
||||
else
|
||||
{
|
||||
tim::trait::runtime_enabled<hosttrace>::set(false);
|
||||
}
|
||||
}
|
||||
|
||||
// always activate gotcha wrappers
|
||||
auto& _fork_gotcha = get_main_bundle();
|
||||
_fork_gotcha->start();
|
||||
assert(_fork_gotcha->get<fork_gotcha_t>()->get_is_running());
|
||||
|
||||
// environment settings
|
||||
auto shmem_size_hint = tim::get_env<size_t>("HOSTTRACE_SHMEM_SIZE_HINT_KB", 40960);
|
||||
auto buffer_size = tim::get_env<size_t>("HOSTTRACE_BUFFER_SIZE_KB", 1024000);
|
||||
assert(_fork_gotcha->get<mpi_gotcha_t>()->get_is_running());
|
||||
|
||||
perfetto::TracingInitArgs args{};
|
||||
perfetto::TraceConfig cfg{};
|
||||
perfetto::protos::gen::TrackEventConfig track_event_cfg{};
|
||||
|
||||
auto *buffer_config = cfg.add_buffers();
|
||||
buffer_config->set_size_kb(buffer_size);
|
||||
buffer_config->set_fill_policy(perfetto::protos::gen::TraceConfig_BufferConfig_FillPolicy_DISCARD);
|
||||
// perfetto initialization
|
||||
if(get_use_perfetto())
|
||||
{
|
||||
// environment settings
|
||||
auto shmem_size_hint =
|
||||
tim::get_env<size_t>("HOSTTRACE_SHMEM_SIZE_HINT_KB", 40960);
|
||||
auto buffer_size = tim::get_env<size_t>("HOSTTRACE_BUFFER_SIZE_KB", 1024000);
|
||||
|
||||
auto* ds_cfg = cfg.add_data_sources()->mutable_config();
|
||||
ds_cfg->set_name("track_event");
|
||||
ds_cfg->set_track_event_config_raw(track_event_cfg.SerializeAsString());
|
||||
auto* buffer_config = cfg.add_buffers();
|
||||
buffer_config->set_size_kb(buffer_size);
|
||||
buffer_config->set_fill_policy(
|
||||
perfetto::protos::gen::TraceConfig_BufferConfig_FillPolicy_DISCARD);
|
||||
|
||||
args.shmem_size_hint_kb = shmem_size_hint;
|
||||
auto* ds_cfg = cfg.add_data_sources()->mutable_config();
|
||||
ds_cfg->set_name("track_event");
|
||||
ds_cfg->set_track_event_config_raw(track_event_cfg.SerializeAsString());
|
||||
|
||||
if(get_backend() != "inprocess")
|
||||
args.backends |= perfetto::kSystemBackend;
|
||||
if(get_backend() != "system")
|
||||
args.backends |= perfetto::kInProcessBackend;
|
||||
args.shmem_size_hint_kb = shmem_size_hint;
|
||||
|
||||
perfetto::Tracing::Initialize(args);
|
||||
perfetto::TrackEvent::Register();
|
||||
if(get_backend() != "inprocess")
|
||||
args.backends |= perfetto::kSystemBackend;
|
||||
if(get_backend() != "system")
|
||||
args.backends |= perfetto::kInProcessBackend;
|
||||
|
||||
(void) get_output_filename();
|
||||
perfetto::Tracing::Initialize(args);
|
||||
perfetto::TrackEvent::Register();
|
||||
|
||||
tim::print_env(std::cerr,
|
||||
[](const std::string& _v) { return _v.find("HOSTTRACE_") == 0; });
|
||||
(void) get_perfetto_output_filename();
|
||||
}
|
||||
|
||||
if(!is_system_backend())
|
||||
// functors for starting and stopping timemory
|
||||
static auto _push_timemory = [](const char* name) {
|
||||
auto& _data = get_timemory_data();
|
||||
// this generates a hash for the raw string array
|
||||
auto _hash = tim::add_hash_id(tim::string_view_t{ name });
|
||||
auto* _bundle = _data.allocator.allocate(1);
|
||||
_data.bundles.emplace_back(_bundle);
|
||||
_data.allocator.construct(_bundle, _hash);
|
||||
_bundle->start();
|
||||
};
|
||||
|
||||
static auto _pop_timemory = [](const char* name) {
|
||||
auto& _data = get_timemory_data();
|
||||
if(_data.bundles.empty())
|
||||
{
|
||||
HOSTTRACE_DEBUG("[%s] skipped %s :: empty bundle stack\n",
|
||||
"hosttrace_pop_trace", name);
|
||||
return;
|
||||
}
|
||||
_data.bundles.back()->stop();
|
||||
_data.allocator.destroy(_data.bundles.back());
|
||||
_data.allocator.deallocate(_data.bundles.back(), 1);
|
||||
_data.bundles.pop_back();
|
||||
};
|
||||
|
||||
if(get_use_perfetto() && get_use_timemory())
|
||||
{
|
||||
// if both are used, then use perfetto overload for calling lambda to launch
|
||||
// timemory
|
||||
get_functors().first = [](const char* name) {
|
||||
TRACE_EVENT_BEGIN("hosttrace", perfetto::StaticString(name),
|
||||
[&](perfetto::EventContext) { _push_timemory(name); });
|
||||
};
|
||||
get_functors().second = [](const char* name) {
|
||||
TRACE_EVENT_END("hosttrace",
|
||||
[&](perfetto::EventContext) { _pop_timemory(name); });
|
||||
};
|
||||
}
|
||||
else if(get_use_perfetto())
|
||||
{
|
||||
get_functors().first = [](const char* name) {
|
||||
TRACE_EVENT_BEGIN("hosttrace", perfetto::StaticString(name));
|
||||
};
|
||||
get_functors().second = [](const char*) { TRACE_EVENT_END("hosttrace"); };
|
||||
}
|
||||
else if(get_use_timemory())
|
||||
{
|
||||
get_functors().first = _push_timemory;
|
||||
get_functors().second = _pop_timemory;
|
||||
}
|
||||
|
||||
if(tim::dmp::rank() == 0)
|
||||
{
|
||||
tim::print_env(std::cerr,
|
||||
[](const std::string& _v) { return _v.find("HOSTTRACE_") == 0; });
|
||||
}
|
||||
|
||||
if(get_use_perfetto() && !is_system_backend())
|
||||
{
|
||||
#if defined(CUSTOM_DATA_SOURCE)
|
||||
// Add the following:
|
||||
perfetto::DataSourceDescriptor dsd{};
|
||||
dsd.set_name("com.example.custom_data_source");
|
||||
CustomDataSource::Register(dsd);
|
||||
ds_cfg = cfg.add_data_sources()->mutable_config();
|
||||
auto* ds_cfg = cfg.add_data_sources()->mutable_config();
|
||||
ds_cfg->set_name("com.example.custom_data_source");
|
||||
CustomDataSource::Trace([](CustomDataSource::TraceContext ctx) {
|
||||
auto packet = ctx.NewTracePacket();
|
||||
@@ -273,87 +330,118 @@ hosttrace_init_perfetto()
|
||||
// ends the tracing session
|
||||
static auto _ensure_finalization = ensure_finalization();
|
||||
|
||||
puts("");
|
||||
if(tim::dmp::rank() == 0)
|
||||
puts("");
|
||||
return true;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
//--------------------------------------------------------------------------------------//
|
||||
|
||||
extern "C"
|
||||
{
|
||||
void hosttrace_push_trace(const char* name)
|
||||
{
|
||||
if(get_debug())
|
||||
fprintf(stderr, "[%s] %s\n", __FUNCTION__, name);
|
||||
// return if not active
|
||||
if(get_state() != State::Active && !hosttrace_init_perfetto())
|
||||
if(get_state() == State::Finalized)
|
||||
return;
|
||||
// TRACE_EVENT_BEGIN(
|
||||
// "hosttrace", perfetto::StaticString(name),
|
||||
// [&](perfetto::EventContext ctx) { PRINT_HERE("executing %s", name); });
|
||||
TRACE_EVENT_BEGIN("hosttrace", perfetto::StaticString(name));
|
||||
|
||||
if(get_state() != State::Active && !hosttrace_init_perfetto())
|
||||
{
|
||||
HOSTTRACE_DEBUG("[%s] %s :: not active and perfetto not initialized\n",
|
||||
__FUNCTION__, name);
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
HOSTTRACE_DEBUG("[%s] %s\n", __FUNCTION__, name);
|
||||
}
|
||||
|
||||
get_functors().first(name);
|
||||
}
|
||||
|
||||
void hosttrace_pop_trace(const char* name)
|
||||
{
|
||||
if(get_debug())
|
||||
fprintf(stderr, "[%s] %s\n", __FUNCTION__, name);
|
||||
// return if not active
|
||||
if(get_state() != State::Active)
|
||||
return;
|
||||
// TRACE_EVENT_END("hosttrace",
|
||||
// [&](perfetto::EventContext ctx) { PRINT_HERE("executing %s", name); });
|
||||
TRACE_EVENT_END("hosttrace");
|
||||
if(get_state() == State::Active)
|
||||
{
|
||||
HOSTTRACE_DEBUG("[%s] %s\n", __FUNCTION__, name);
|
||||
get_functors().second(name);
|
||||
}
|
||||
else
|
||||
{
|
||||
HOSTTRACE_DEBUG("[%s] %s :: not active\n", __FUNCTION__, name);
|
||||
}
|
||||
}
|
||||
|
||||
void hosttrace_trace_init(const char*, bool, const char*)
|
||||
{
|
||||
if(get_debug())
|
||||
fprintf(stderr, "[%s]\n", __FUNCTION__);
|
||||
HOSTTRACE_DEBUG("[%s]\n", __FUNCTION__);
|
||||
hosttrace_init_perfetto();
|
||||
}
|
||||
|
||||
void hosttrace_trace_finalize(void)
|
||||
{
|
||||
if(get_debug())
|
||||
fprintf(stderr, "[%s]\n", __FUNCTION__);
|
||||
// return if not active
|
||||
if(get_state() != State::Active)
|
||||
return;
|
||||
|
||||
puts("");
|
||||
HOSTTRACE_DEBUG("[%s]\n", __FUNCTION__);
|
||||
|
||||
if(tim::dmp::rank() == 0)
|
||||
puts("");
|
||||
|
||||
get_state() = State::Finalized;
|
||||
|
||||
if(get_fork_gotcha())
|
||||
if(get_main_bundle())
|
||||
{
|
||||
get_fork_gotcha()->stop();
|
||||
std::cout << *get_fork_gotcha() << std::endl;
|
||||
get_fork_gotcha().reset();
|
||||
get_main_bundle()->stop();
|
||||
int64_t _id = (get_use_mpi()) ? tim::dmp::rank() : tim::process::get_id();
|
||||
std::stringstream _ss{};
|
||||
_ss << "[" << __FUNCTION__ << "][" << _id << "] " << *get_main_bundle()
|
||||
<< "\n";
|
||||
std::cout << _ss.str();
|
||||
get_main_bundle().reset();
|
||||
}
|
||||
|
||||
if(!is_system_backend())
|
||||
// ensure that all the MT instances are flushed
|
||||
for(auto& itr : hosttrace_timemory_data::instances())
|
||||
{
|
||||
while(!itr.bundles.empty())
|
||||
{
|
||||
itr.bundles.back()->stop();
|
||||
itr.bundles.back()->pop();
|
||||
itr.allocator.destroy(itr.bundles.back());
|
||||
itr.allocator.deallocate(itr.bundles.back(), 1);
|
||||
itr.bundles.pop_back();
|
||||
}
|
||||
}
|
||||
|
||||
if(get_use_perfetto() && !is_system_backend())
|
||||
{
|
||||
// Make sure the last event is closed for this example.
|
||||
perfetto::TrackEvent::Flush();
|
||||
|
||||
auto& tracing_session = get_trace_session();
|
||||
tracing_session->StopBlocking();
|
||||
|
||||
std::vector<char> trace_data{ tracing_session->ReadTraceBlocking() };
|
||||
|
||||
if(trace_data.empty())
|
||||
{
|
||||
fprintf(stderr,
|
||||
"[%s]> trace data is empty. File '%s' will not be written...\n",
|
||||
__FUNCTION__, get_output_filename().c_str());
|
||||
__FUNCTION__, get_perfetto_output_filename().c_str());
|
||||
return;
|
||||
}
|
||||
// Write the trace into a file.
|
||||
fprintf(stderr, "[%s]> Outputting '%s'. Trace data: %lu bytes...\n",
|
||||
__FUNCTION__, get_output_filename().c_str(),
|
||||
__FUNCTION__, get_perfetto_output_filename().c_str(),
|
||||
(unsigned long) trace_data.size());
|
||||
std::ofstream output{};
|
||||
output.open(get_output_filename(), std::ios::out | std::ios::binary);
|
||||
output.open(get_perfetto_output_filename(), std::ios::out | std::ios::binary);
|
||||
if(!output)
|
||||
fprintf(stderr, "[%s]> Error opening '%s'...\n", __FUNCTION__,
|
||||
get_output_filename().c_str());
|
||||
get_perfetto_output_filename().c_str());
|
||||
else
|
||||
output.write(&trace_data[0], trace_data.size());
|
||||
output.close();
|
||||
@@ -364,26 +452,33 @@ extern "C"
|
||||
|
||||
void hosttrace_trace_set_env(const char* env_name, const char* env_val)
|
||||
{
|
||||
if(get_debug())
|
||||
fprintf(stderr, "[%s] Setting env: %s=%s\n", __FUNCTION__, env_name, env_val);
|
||||
HOSTTRACE_DEBUG("[%s] Setting env: %s=%s\n", __FUNCTION__, env_name, env_val);
|
||||
|
||||
tim::set_env(env_name, env_val, 0);
|
||||
}
|
||||
|
||||
void hosttrace_trace_set_mpi(bool use, bool attached)
|
||||
{
|
||||
HOSTTRACE_DEBUG("[%s] use: %s, attached: %s\n", __FUNCTION__, (use) ? "y" : "n",
|
||||
(attached) ? "y" : "n");
|
||||
if(use && !attached)
|
||||
{
|
||||
auto& _fork_gotcha = get_main_bundle();
|
||||
_fork_gotcha->start();
|
||||
tim::set_env("HOSTTRACE_USE_MPI", "ON", 1);
|
||||
get_use_mpi() = true;
|
||||
get_state() = State::DelayedInit;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
fork_gotcha::audit(const gotcha_data_t& _data, audit::incoming)
|
||||
std::unique_ptr<hosttrace_bundle_t>&
|
||||
get_main_bundle()
|
||||
{
|
||||
PRINT_HERE("%s",
|
||||
"Warning! Calling fork() within an OpenMPI application using libfabric "
|
||||
"may result is segmentation fault");
|
||||
TIMEMORY_CONDITIONAL_DEMANGLED_BACKTRACE(get_debug(), 16);
|
||||
}
|
||||
|
||||
void
|
||||
fork_gotcha::audit(const gotcha_data_t& _data, audit::outgoing, pid_t _pid)
|
||||
{
|
||||
PRINT_HERE("%s() return PID %i", _data.tool_id.c_str(), (int) _pid);
|
||||
static auto _v =
|
||||
(setup_gotchas(), std::make_unique<hosttrace_bundle_t>(
|
||||
"hosttrace", quirk::config<quirk::auto_start>{}));
|
||||
return _v;
|
||||
}
|
||||
|
||||
namespace
|
||||
@@ -393,10 +488,3 @@ namespace
|
||||
// but static variable in hosttrace_init_perfetto is more likely
|
||||
auto _ensure_finalization = ensure_finalization();
|
||||
} // namespace
|
||||
|
||||
PERFETTO_TRACK_EVENT_STATIC_STORAGE();
|
||||
TIMEMORY_INITIALIZE_STORAGE(fork_gotcha)
|
||||
|
||||
#if defined(CUSTOM_DATA_SOURCE)
|
||||
PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(CustomDataSource);
|
||||
#endif
|
||||
|
||||
Criar uma nova questão referindo esta
Bloquear um utilizador