2
0

Improved analysis of functions to instrument + MPI support + timemory support (#2)

* various tweaks
* build updates + cleanup + overlap guard + min addr range
* Library source reorg + miscellaneous tweaks
* Removed unnecessary fwd decls
* Print address range in --print-X pair mode

- hosttrace modifications
  - disable instrumenting functions with overlapping sections or multiple entry points by default (control via --allow-overlapping option)
  - disable instrumenting functions whose address range < 512 bytes unless a loop is present by default (control via --min-address-range option)
  - disable instrumenting functions w/ loops whose address range < 64 bytes (control via --min-loop-address-range)
- Support for wrapping MPI function calls even in binary rewrite mode
  - e.g. use gotcha to wrap MPI functions with hosttrace_push_trace and hosttrace_pop_trace
- New timemory only mode --> HOSTTRACE_USE_TIMEMORY=ON
- New timemory + perfetto mode --> HOSTTRACE_USE_PERFETTO=ON + HOSTTRACE_USE_TIMEMORY=ON
- Full support for all timemory components
- parallel-overhead example for measuring the overhead in a MT-parallelized application with very small instrumentation functions
- improvements to output directories for hosttrace exe
- improvements to output directories for hosttrace library
- new hosttrace options
  - --print-instrumented <type> prints out the instrumented entities and exits
  - --print-available <type> prints out the available instrumentation entities and exits
  - --print-overlapping <type> prints out the overlapping entities and exits
  - NOTE: <type> above refers to the information printed out, e.g. module name vs. function name vs. module and function name, etc.

[ROCm/rocprofiler-systems commit: 1f15b3070f]
Este cometimento está contido em:
Jonathan R. Madsen
2021-09-02 11:38:39 -05:00
cometido por GitHub
ascendente 1ff2dfed88
cometimento 6825578603
15 ficheiros modificados com 1202 adições e 565 eliminações
+2
Ver ficheiro
@@ -30,4 +30,6 @@
*.exe
*.out
*.app
/build*
/.vscode
+15 -3
Ver ficheiro
@@ -29,6 +29,7 @@ include(BuildSettings) # compiler flags
set(CMAKE_CXX_STANDARD 17 CACHE STRING "CXX language standard")
add_option(CMAKE_CXX_STANDARD_REQUIRED "Require C++ language standard" ON)
add_option(CMAKE_CXX_EXTENSIONS "Compiler specific language extensions" OFF)
add_option(CMAKE_INSTALL_RPATH_USE_LINK_PATH "Enable rpath to linked libraries" ON)
add_option(HOSTTRACE_USE_CLANG_TIDY "Enable clang-tidy" OFF)
include(Packages) # finds third-party libraries
@@ -45,6 +46,8 @@ option(HOSTTRACE_CUSTOM_DATA_SOURCE "Enable custom data source" OFF)
add_library(hosttrace-library SHARED
${CMAKE_CURRENT_LIST_DIR}/src/library.cpp
${CMAKE_CURRENT_LIST_DIR}/src/libmisc.cpp
${CMAKE_CURRENT_LIST_DIR}/include/library.hpp
${perfetto_DIR}/sdk/perfetto.cc)
target_include_directories(hosttrace-library PRIVATE
@@ -55,15 +58,23 @@ target_include_directories(hosttrace-library SYSTEM PRIVATE
target_compile_definitions(hosttrace-library PRIVATE
$<IF:$<BOOL:${HOSTTRACE_CUSTOM_DATA_SOURCE}>,CUSTOM_DATA_SOURCE,>)
target_link_libraries(hosttrace-library PRIVATE
hosttrace::hosttrace-threading
$<BUILD_INTERFACE:timemory::timemory-headers>
$<BUILD_INTERFACE:timemory::timemory-gotcha>
$<BUILD_INTERFACE:timemory::timemory-cxx-shared>
$<BUILD_INTERFACE:timemory::timemory-threading>
$<BUILD_INTERFACE:timemory::timemory-compile-options>
$<IF:$<BOOL:${hosttrace_USE_SANITIZER}>,hosttrace::hosttrace-sanitizer,>)
if(DYNINST_API_RT)
get_filename_component(DYNINST_API_RT_DIR "${DYNINST_API_RT}" DIRECTORY)
endif()
set_target_properties(hosttrace-library PROPERTIES
OUTPUT_NAME hosttrace)
OUTPUT_NAME hosttrace
INSTALL_RPATH "\$ORIGIN:${DYNINST_API_RT_DIR}:${CMAKE_INSTALL_RPATH}")
install(
TARGETS hosttrace-library
@@ -87,7 +98,8 @@ target_include_directories(hosttrace-exe PRIVATE
target_link_libraries(hosttrace-exe PRIVATE
$<BUILD_INTERFACE:timemory::timemory-headers>
hosttrace::hosttrace-dyninst
hosttrace::hosttrace-compile-options)
hosttrace::hosttrace-compile-options
$<IF:$<BOOL:${hosttrace_USE_SANITIZER}>,hosttrace::hosttrace-sanitizer,>)
set_target_properties(hosttrace-exe PROPERTIES
OUTPUT_NAME hosttrace
+17
Ver ficheiro
@@ -204,6 +204,8 @@ set(TIMEMORY_USE_GOTCHA ON CACHE BOOL "Enable GOTCHA support in tim
set(TIMEMORY_USE_PERFETTO OFF CACHE BOOL "Disable perfetto support in timemory")
# timemory feature build settings
set(TIMEMORY_BUILD_GOTCHA ON CACHE BOOL "Enable building GOTCHA library from submodule")
# timemory build settings
set(TIMEMORY_TLS_MODEL "global-dynamic" CACHE STRING "Thread-local static model" FORCE)
checkout_git_submodule(
RELATIVE_PATH external/timemory
@@ -211,4 +213,19 @@ checkout_git_submodule(
REPO_URL https://github.com/NERSC/timemory.git
REPO_BRANCH develop)
hosttrace_save_variables(BUILD_CONFIG
BUILD_SHARED_LIBS
BUILD_STATIC_LIBS
CMAKE_POSITION_INDEPENDENT_CODE)
# ensure timemory builds PIC static libs so that we don't have to install timemory shared lib
set(BUILD_SHARED_LIBS ON)
set(BUILD_STATIC_LIBS OFF)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
add_subdirectory(external/timemory)
hosttrace_restore_variables(BUILD_CONFIG
BUILD_SHARED_LIBS
BUILD_STATIC_LIBS
CMAKE_POSITION_INDEPENDENT_CODE)
+1
Ver ficheiro
@@ -4,3 +4,4 @@ project(hosttrace-dyninst-examples
LANGUAGES CXX)
add_subdirectory(transpose)
add_subdirectory(parallel-overhead)
@@ -0,0 +1,5 @@
set(CMAKE_BUILD_TYPE "Release")
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}")
add_executable(parallel-overhead parallel-overhead.cpp)
target_link_libraries(parallel-overhead Threads::Threads)
@@ -0,0 +1,52 @@
#include <atomic>
#include <cstdio>
#include <cstdlib>
#include <thread>
#include <vector>
std::atomic<long> total{ 0 };
long
fib(long n) __attribute__((noinline));
void
run(size_t nitr, long) __attribute__((noinline));
long
fib(long n)
{
return (n < 2) ? n : fib(n - 1) + fib(n - 2);
}
void
run(size_t nitr, long n)
{
long local = 0;
for(size_t i = 0; i < nitr; ++i)
local += fib(n);
total += local;
}
int
main(int argc, char** argv)
{
size_t nthread = 16;
size_t nitr = 50000;
long nfib = 10;
if(argc > 1)
nfib = atol(argv[1]);
if(argc > 2)
nthread = atol(argv[2]);
if(argc > 3)
nitr = atol(argv[3]);
std::vector<std::thread> threads{};
for(size_t i = 0; i < nthread; ++i)
threads.emplace_back(&run, nitr, nfib);
for(auto& itr : threads)
itr.join();
printf("fibonacci(%li) x %lu = %li\n", nfib, nthread, total.load());
return 0;
}
+16 -4
Ver ficheiro
@@ -45,21 +45,33 @@ if(TARGET MPI::MPI_C)
get_target_property(INCLUDE_DIRS MPI::MPI_C INTERFACE_INCLUDE_DIRECTORIES)
foreach(_IDIR ${INCLUDE_DIRS})
set(transpose_CXX_FLAGS "${transpose_CXX_FLAGS} -I${_IDIR}")
endforeach()
if(MPI_C_LINK_FLAGS)
set(transpose_LINK_FLAGS "${transpose_LINK_FLAGS} ${MPI_C_LINK_FLAGS}")
endif()
set(_LINK_LIBS "")
foreach(_LIB ${MPI_C_LIB_NAMES})
string(APPEND _LINK_LIBS "-l${_LIB} ")
endforeach()
foreach(_IDIR ${INCLUDE_DIRS} ${MPI_mpich_LIBRARY} ${MPI_mpi_LIBRARY} ${MPI_LIBRARY_DIRS})
get_filename_component(_LIBDIR "${_IDIR}" DIRECTORY)
if(EXISTS "${_IDIR}/libmpi${CMAKE_SHARED_LIBRARY_SUFFIX}")
set(transpose_LINK_FLAGS "${transpose_LINK_FLAGS} -L${_IDIR} ${_LINK_LIBS}")
endif()
if(EXISTS "${_LIBDIR}/libmpi${CMAKE_SHARED_LIBRARY_SUFFIX}")
set(transpose_LINK_FLAGS "${transpose_LINK_FLAGS} -L${_LIBDIR} ${_LINK_LIBS}")
endif()
foreach(_LDIR lib lib64)
set(_LIBDIR_SAVE "${_LIBDIR}")
if(NOT EXISTS "${_LIBDIR}/${_LDIR}")
get_filename_component(_LIBDIR "${_LIBDIR}" DIRECTORY)
endif()
if(EXISTS "${_LIBDIR}/${_LDIR}")
set(transpose_LINK_FLAGS "${transpose_LINK_FLAGS} -L${_LIBDIR}/${_LDIR} -lmpi")
set(transpose_LINK_FLAGS "${transpose_LINK_FLAGS} -L${_LIBDIR}/${_LDIR} ${_LINK_LIBS}")
endif()
set(_LIBDIR "${_LIBDIR_SAVE}")
endforeach()
endforeach()
if(MPI_C_LINK_FLAGS)
set(transpose_LINK_FLAGS "${transpose_LINK_FLAGS} ${MPI_C_LINK_FLAGS}")
endif()
endif()
# remove generator expressions
+5 -1
Ver ficheiro
@@ -155,11 +155,15 @@ run(int argc, char** argv)
int
main(int argc, char** argv)
{
int rank = 0;
#if defined(USE_MPI)
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
#endif
run(argc, argv);
if(rank == 0)
run(argc, argv);
#if defined(USE_MPI)
MPI_Barrier(MPI_COMM_WORLD);
MPI_Finalize();
#endif
return 0;
Submódulo projects/rocprofiler-systems/external/timemory modificado: 7542f48e65...aa4a0ed7b2
+78 -49
Ver ficheiro
@@ -140,24 +140,25 @@ static int verbose_level = tim::get_env<int>("TIMEMORY_RUN_VERBOSE", 0);
// string settings
//
static string_t main_fname = "main";
static string_t argv0 = "";
static string_t cmdv0 = "";
static string_t argv0 = {};
static string_t cmdv0 = {};
static string_t default_components = "wall_clock";
static string_t prefer_library = "";
static string_t prefer_library = {};
//
// global variables
//
static patch_pointer_t bpatch;
static call_expr_t* initialize_expr = nullptr;
static call_expr_t* terminate_expr = nullptr;
static snippet_vec_t init_names;
static snippet_vec_t fini_names;
static fmodset_t available_module_functions;
static fmodset_t instrumented_module_functions;
static regexvec_t func_include;
static regexvec_t func_exclude;
static regexvec_t file_include;
static regexvec_t file_exclude;
static patch_pointer_t bpatch = {};
static call_expr_t* initialize_expr = nullptr;
static call_expr_t* terminate_expr = nullptr;
static snippet_vec_t init_names = {};
static snippet_vec_t fini_names = {};
static fmodset_t available_module_functions = {};
static fmodset_t instrumented_module_functions = {};
static fmodset_t overlapping_module_functions = {};
static regexvec_t func_include = {};
static regexvec_t func_exclude = {};
static regexvec_t file_include = {};
static regexvec_t file_exclude = {};
static auto regex_opts = std::regex_constants::egrep | std::regex_constants::optimize;
//
//======================================================================================//
@@ -219,17 +220,6 @@ error_func_real(error_level_t level, int num, const char* const* params);
void
error_func_fake(error_level_t level, int num, const char* const* params);
bool
find_func_or_calls(std::vector<const char*> names, bpvector_t<point_t*>& points,
image_t* appImage, procedure_loc_t loc = BPatch_locEntry);
bool
find_func_or_calls(const char* name, bpvector_t<point_t*>& points, image_t* image,
procedure_loc_t loc = BPatch_locEntry);
bool
load_dependent_libraries(address_space_t* bedit, char* bindings);
bool
c_stdlib_module_constraint(const string_t& file);
@@ -283,10 +273,10 @@ struct function_signature
location_t m_row = { 0, 0 };
location_t m_col = { 0, 0 };
string_t m_return = "void";
string_t m_name = "";
string_t m_name = {};
string_t m_params = "()";
string_t m_file = "";
mutable string_t m_signature = "";
string_t m_file = {};
mutable string_t m_signature = {};
TIMEMORY_DEFAULT_OBJECT(function_signature)
@@ -360,7 +350,10 @@ struct function_signature
//
struct module_function
{
using width_t = std::array<size_t, 3>;
using width_t = std::array<size_t, 3>;
using address_t = Dyninst::Address;
static constexpr size_t absolute_max_width = 80;
static auto& get_width()
{
@@ -399,6 +392,13 @@ struct module_function
module = modname;
function = fname;
signature = get_func_file_line_info(mod, proc);
assert(proc->isInstrumentable() == true);
std::pair<address_t, address_t> _range{};
if(proc->getAddressRange(_range.first, _range.second))
address_range = _range.second - _range.first;
auto _instructions = proc->findPoint(BPatch_locInstruction);
if(_instructions)
instr_count = _instructions->size();
}
friend bool operator<(const module_function& lhs, const module_function& rhs)
@@ -410,56 +410,85 @@ struct module_function
: (lhs.module < rhs.module);
}
static void write_header(std::ostream& os)
{
auto w0 = std::min<size_t>(get_width()[0], absolute_max_width);
auto w1 = std::min<size_t>(get_width()[1], absolute_max_width);
auto w2 = std::min<size_t>(get_width()[2], absolute_max_width);
std::stringstream ss;
ss << std::setw(14) << "AddressRange"
<< " " << std::setw(14) << "InstrCount"
<< " " << std::setw(w0 + 8) << std::left << "Module"
<< " " << std::setw(w1 + 8) << std::left << "Function"
<< " " << std::setw(w2 + 8) << std::left << "FunctionSignature"
<< "\n";
os << ss.str();
}
friend std::ostream& operator<<(std::ostream& os, const module_function& rhs)
{
std::stringstream ss;
static size_t absolute_max = 80;
auto w0 = std::min<size_t>(get_width()[0], absolute_max);
auto w1 = std::min<size_t>(get_width()[1], absolute_max);
auto w2 = std::min<size_t>(get_width()[2], absolute_max);
auto w0 = std::min<size_t>(get_width()[0], absolute_max_width);
auto w1 = std::min<size_t>(get_width()[1], absolute_max_width);
auto w2 = std::min<size_t>(get_width()[2], absolute_max_width);
auto _get_str = [](const std::string& _inc) {
if(_inc.length() > absolute_max)
return _inc.substr(0, absolute_max - 3) + "...";
if(_inc.length() > absolute_max_width)
return _inc.substr(0, absolute_max_width - 3) + "...";
return _inc;
};
ss << std::setw(w0 + 8) << std::left << _get_str(rhs.module) << " "
// clang-format off
ss << std::setw(14) << rhs.address_range << " "
<< std::setw(14) << rhs.instr_count << " "
<< std::setw(w0 + 8) << std::left << _get_str(rhs.module) << " "
<< std::setw(w1 + 8) << std::left << _get_str(rhs.function) << " "
<< std::setw(w2 + 8) << std::left << _get_str(rhs.signature.get());
// clang-format on
os << ss.str();
return os;
}
string_t module = "";
string_t function = "";
size_t address_range = 0;
size_t instr_count = 0;
string_t module = {};
string_t function = {};
function_signature signature;
};
//
//======================================================================================//
//
static inline void
dump_info(const string_t& _oname, const fmodset_t& _data, int level)
dump_info(std::ostream& _os, const fmodset_t& _data)
{
if(!debug_print && verbose_level < level)
return;
module_function::reset_width();
for(const auto& itr : _data)
module_function::update_width(itr);
module_function::write_header(_os);
for(const auto& itr : _data)
_os << itr << '\n';
module_function::reset_width();
}
//
static inline void
dump_info(const string_t& _oname, const fmodset_t& _data, int _level)
{
if(!debug_print && verbose_level < _level)
return;
std::ofstream ofs(_oname);
if(ofs)
{
verbprintf(level, "Dumping '%s'... ", _oname.c_str());
for(const auto& itr : _data)
ofs << itr << '\n';
verbprintf(level, "Done\n");
verbprintf(_level, "Dumping '%s'... ", _oname.c_str());
dump_info(ofs, _data);
verbprintf(_level, "Done\n");
}
ofs.close();
module_function::reset_width();
}
//
//======================================================================================//
@@ -554,7 +583,7 @@ private:
//
static inline address_space_t*
hosttrace_get_address_space(patch_pointer_t _bpatch, int _cmdc, char** _cmdv,
bool _rewrite, int _pid = -1, string_t _name = "")
bool _rewrite, int _pid = -1, string_t _name = {})
{
address_space_t* mutatee = nullptr;
+209
Ver ficheiro
@@ -0,0 +1,209 @@
#pragma once
#if !defined(TIMEMORY_USE_PERFETTO)
# include <perfetto.h>
# define PERFETTO_CATEGORIES \
perfetto::Category("hosttrace").SetDescription("Function trace")
#else
# define PERFETTO_CATEGORIES \
perfetto::Category("hosttrace").SetDescription("Function trace"), \
perfetto::Category("timemory") \
.SetDescription("Events from the timemory API")
# define TIMEMORY_PERFETTO_CATEGORIES PERFETTO_CATEGORIES
#endif
#include <cassert>
#include <cstdint>
#include <cstdio>
#include <fstream>
#include <memory>
#include <mutex>
#include <string>
#include <sys/types.h>
#include <thread>
#include <unistd.h>
#include <utility>
#include <vector>
#include "timemory/api.hpp"
#include "timemory/backends/mpi.hpp"
#include "timemory/backends/process.hpp"
#include "timemory/backends/threading.hpp"
#include "timemory/components.hpp"
#include "timemory/components/gotcha/mpip.hpp"
#include "timemory/config.hpp"
#include "timemory/environment.hpp"
#include "timemory/manager.hpp"
#include "timemory/mpl/apply.hpp"
#include "timemory/operations.hpp"
#include "timemory/runtime.hpp"
#include "timemory/settings.hpp"
#include "timemory/storage.hpp"
#include "timemory/variadic.hpp"
// forward decl of the API
extern "C"
{
void hosttrace_push_trace(const char* name) TIMEMORY_VISIBILITY("default");
void hosttrace_pop_trace(const char* name) TIMEMORY_VISIBILITY("default");
void hosttrace_trace_init(const char*, bool, const char*)
TIMEMORY_VISIBILITY("default");
void hosttrace_trace_finalize(void) TIMEMORY_VISIBILITY("default");
void hosttrace_trace_set_env(const char* env_name, const char* env_val)
TIMEMORY_VISIBILITY("default");
void hosttrace_trace_set_mpi(bool use, bool attached) TIMEMORY_VISIBILITY("default");
}
//--------------------------------------------------------------------------------------//
// same sort of functionality as python's " ".join([...])
#if !defined(JOIN)
# define JOIN(...) tim::mpl::apply<std::string>::join(__VA_ARGS__)
#endif
#define HOSTTRACE_DEBUG(...) \
if(get_debug()) \
{ \
fprintf(stderr, __VA_ARGS__); \
}
//--------------------------------------------------------------------------------------//
namespace audit = tim::audit;
namespace comp = tim::component;
namespace quirk = tim::quirk;
namespace threading = tim::threading;
// this is used to wrap fork()
struct fork_gotcha : comp::base<fork_gotcha, void>
{
using gotcha_data_t = comp::gotcha_data;
TIMEMORY_DEFAULT_OBJECT(fork_gotcha)
// this will get called right before fork
void audit(const gotcha_data_t& _data, audit::incoming);
// this will get called right after fork with the return value
void audit(const gotcha_data_t& _data, audit::outgoing, pid_t _pid);
};
// this is used to wrap MPI_Init and MPI_Init_thread
struct mpi_gotcha : comp::base<mpi_gotcha, void>
{
using gotcha_data_t = comp::gotcha_data;
TIMEMORY_DEFAULT_OBJECT(mpi_gotcha)
// this will get called right before MPI_Init with that functions arguments
void audit(const gotcha_data_t& _data, audit::incoming, int*, char***);
// this will get called right before MPI_Init_thread with that functions arguments
void audit(const gotcha_data_t& _data, audit::incoming, int*, char***, int, int*);
// this will get called right after MPI_Init and MPI_Init_thread with the return value
void audit(const gotcha_data_t& _data, audit::outgoing, int _retval);
};
// timemory api struct
struct hosttrace : tim::concepts::api
{};
// timemory component which calls hosttrace functions
// (used in gotcha wrappers)
struct hosttrace_component : tim::component::base<hosttrace_component, void>
{
void start();
void stop();
void set_prefix(const char*);
private:
const char* m_prefix = nullptr;
};
using fork_gotcha_t = comp::gotcha<4, tim::component_tuple<fork_gotcha>, hosttrace>;
using mpi_gotcha_t = comp::gotcha<4, tim::component_tuple<mpi_gotcha>, hosttrace>;
using hosttrace_bundle_t =
tim::lightweight_tuple<comp::wall_clock, comp::peak_rss, comp::cpu_clock,
comp::cpu_util, comp::user_global_bundle, fork_gotcha_t,
mpi_gotcha_t>;
using bundle_t =
tim::component_bundle<hosttrace, comp::wall_clock*, comp::user_global_bundle*>;
using bundle_allocator_t = tim::data::ring_buffer_allocator<bundle_t>;
//--------------------------------------------------------------------------------------//
#if !defined(TIMEMORY_USE_PERFETTO)
PERFETTO_DEFINE_CATEGORIES(PERFETTO_CATEGORIES);
#endif
#if defined(CUSTOM_DATA_SOURCE)
class CustomDataSource : public perfetto::DataSource<CustomDataSource>
{
public:
void OnSetup(const SetupArgs&) override
{
// Use this callback to apply any custom configuration to your data source
// based on the TraceConfig in SetupArgs.
PRINT_HERE("%s", "setup");
}
void OnStart(const StartArgs&) override
{
// This notification can be used to initialize the GPU driver, enable
// counters, etc. StartArgs will contains the DataSourceDescriptor,
// which can be extended.
PRINT_HERE("%s", "start");
}
void OnStop(const StopArgs&) override
{
// Undo any initialization done in OnStart.
PRINT_HERE("%s", "stop");
}
// Data sources can also have per-instance state.
int my_custom_state = 0;
};
PERFETTO_DECLARE_DATA_SOURCE_STATIC_MEMBERS(CustomDataSource);
#endif
//--------------------------------------------------------------------------------------//
// used for specifying the state of hosttrace
enum class State : unsigned short
{
DelayedInit = 0,
PreInit,
Active,
Finalized
};
bool
get_debug();
State&
get_state();
std::unique_ptr<hosttrace_bundle_t>&
get_main_bundle();
//--------------------------------------------------------------------------------------//
// there are currently some strange things that happen with vector<bundle_t> so using
// vector<bundle_t*> and timemory's ring_buffer_allocator to create contiguous memory-page
// aligned instances of the bundle
struct hosttrace_timemory_data
{
static constexpr size_t max_supported_threads = 1024;
using instance_array_t = std::array<hosttrace_timemory_data, max_supported_threads>;
bundle_allocator_t allocator{};
std::vector<bundle_t*> bundles{};
static instance_array_t& instances();
};
//--------------------------------------------------------------------------------------//
+5 -76
Ver ficheiro
@@ -323,77 +323,6 @@ error_func_fake(error_level_t level, int num, const char* const* params)
// It does nothing.
}
//======================================================================================//
//
bool
find_func_or_calls(std::vector<const char*> names, bpvector_t<point_t*>& points,
image_t* app_image, procedure_loc_t loc)
{
using function_t = procedure_t;
using function_vec_t = bpvector_t<function_t*>;
using point_vec_t = bpvector_t<point_t*>;
function_t* func = nullptr;
for(auto nitr = names.begin(); nitr != names.end(); ++nitr)
{
function_t* f = find_function(app_image, *nitr);
if(f && f->getModule()->isSharedLib())
{
func = f;
break;
}
}
if(func)
{
point_vec_t* fpoints = func->findPoint(loc);
if(fpoints && fpoints->size())
{
for(auto pitr = fpoints->begin(); pitr != fpoints->end(); ++pitr)
points.push_back(*pitr);
return true;
}
}
// Moderately expensive loop here. Perhaps we should make a name->point map first
// and just do lookups through that.
function_vec_t* all_funcs = app_image->getProcedures();
auto initial_points_size = points.size();
for(auto nitr = names.begin(); nitr != names.end(); ++nitr)
{
for(auto fitr = all_funcs->begin(); fitr != all_funcs->end(); ++fitr)
{
function_t* f = *fitr;
if(f->getModule()->isSharedLib())
continue;
point_vec_t* fpoints = f->findPoint(BPatch_locSubroutine);
if(!fpoints || fpoints->empty())
continue;
for(auto pitr = fpoints->begin(); pitr != fpoints->end(); pitr++)
{
std::string callee = (*pitr)->getCalledFunctionName();
if(callee == std::string(*nitr))
points.push_back(*pitr);
}
}
if(points.size() != initial_points_size)
return true;
}
return false;
}
//======================================================================================//
//
bool
find_func_or_calls(const char* name, bpvector_t<point_t*>& points, image_t* image,
procedure_loc_t loc)
{
std::vector<const char*> v;
v.push_back(name);
return find_func_or_calls(v, points, image, loc);
}
//======================================================================================//
//
bool
@@ -516,11 +445,11 @@ c_stdlib_function_constraint(const std::string& _func)
"compat|vfork_|elision_init|cr_|cri_|aio_|mq_|sem_init|waitpid$|sigcancel_"
"handler|sighandler_setxid|start_thread$|clock$|semctl$|shm_open$|shm_unlink$|"
"printf|dprintf|walker$|clear_once_control$|libcr_|sem_wait$|sem_trywait$|vfork|"
"pause$|wait$|msgrcv$|sigwait$|sigsuspend$|recvmsg$|sendmsg$|ftrylockfile$|"
"funlockfile$|tee$|setbuf$|setbuffer$|enlarge_userbuf$|convert_and_print$|"
"feraise|lio_|atomic_|err$|errx$|print_errno_message$|error_tail$|clntunix_|"
"sem_destroy|setxid_mark_thread|feupdate|send$|connect$|longjmp|pwrite|accept$|"
"stpncpy$|writeunix$|xflowf$|mbrlen$)",
"pause$|wait$|waitid$|msgrcv$|sigwait$|sigsuspend$|recvmsg$|sendmsg$|"
"ftrylockfile$|funlockfile$|tee$|setbuf$|setbuffer$|enlarge_userbuf$|convert_and_"
"print$|feraise|lio_|atomic_|err$|errx$|print_errno_message$|error_tail$|"
"clntunix_|sem_destroy|setxid_mark_thread|feupdate|send$|connect$|longjmp|pwrite|"
"accept$|stpncpy$|writeunix$|xflowf$|mbrlen$)",
regex_opts);
return std::regex_search(_func, _pattern);
+363 -210
Ver ficheiro
@@ -28,25 +28,32 @@
#include <sys/stat.h>
#include <sys/types.h>
static bool is_driver = false;
static size_t batch_size = 50;
static strset_t extra_libs = {};
static std::vector<std::pair<uint64_t, string_t>> hash_ids;
static std::map<string_t, bool> use_stubs;
static std::map<string_t, procedure_t*> beg_stubs;
static std::map<string_t, procedure_t*> end_stubs;
static strvec_t init_stub_names;
static strvec_t fini_stub_names;
static strset_t used_stub_names;
static std::vector<call_expr_pointer_t> env_variables;
static std::map<string_t, call_expr_pointer_t> beg_expr;
static std::map<string_t, call_expr_pointer_t> end_expr;
static const auto npos_v = string_t::npos;
static string_t instr_mode = "trace";
static string_t instr_push_func = "hosttrace_push_trace";
static string_t instr_pop_func = "hosttrace_pop_trace";
static string_t instr_push_hash = "hosttrace_push_trace_hash";
static string_t instr_pop_hash = "hosttrace_pop_trace_hash";
static bool is_driver = false;
static bool allow_overlapping = false;
static size_t batch_size = 50;
static strset_t extra_libs = {};
static size_t min_address_range = (1 << 9); // 512
static size_t min_loop_address_range = (1 << 6); // 64
static std::vector<std::pair<uint64_t, string_t>> hash_ids = {};
static std::map<string_t, bool> use_stubs = {};
static std::map<string_t, procedure_t*> beg_stubs = {};
static std::map<string_t, procedure_t*> end_stubs = {};
static strvec_t init_stub_names = {};
static strvec_t fini_stub_names = {};
static strset_t used_stub_names = {};
static std::vector<call_expr_pointer_t> env_variables = {};
static std::map<string_t, call_expr_pointer_t> beg_expr = {};
static std::map<string_t, call_expr_pointer_t> end_expr = {};
static const auto npos_v = string_t::npos;
static string_t instr_mode = "trace";
static string_t instr_push_func = "hosttrace_push_trace";
static string_t instr_pop_func = "hosttrace_pop_trace";
static string_t instr_push_hash = "hosttrace_push_trace_hash";
static string_t instr_pop_hash = "hosttrace_pop_trace_hash";
static string_t print_instrumented = {};
static string_t print_available = {};
static string_t print_overlapping = {};
static std::string modfunc_dump_dir = "hosttrace-module-functions";
std::string
get_absolute_exe_filepath(std::string exe_name);
@@ -228,7 +235,8 @@ main(int argc, char** argv)
.count(1);
parser.add_argument()
.names({ "-d", "--default-components" })
.description("Default components to instrument");
.description("Default components to instrument (only useful when timemory is "
"enabled in hosttrace library)");
parser.add_argument()
.names({ "-M", "--mode" })
.description("Instrumentation mode. 'trace' mode is immutable, 'region' mode is "
@@ -237,8 +245,9 @@ main(int argc, char** argv)
.count(1);
parser.add_argument()
.names({ "--env" })
.description(
"Environment variables to add to the runtime in form VARIABLE=VALUE");
.description("Environment variables to add to the runtime in form "
"VARIABLE=VALUE. E.g. use '--env HOSTTRACE_USE_TIMEMORY=ON' to "
"default to using timemory instead of perfetto");
parser.add_argument()
.names({ "--prefer" })
.description("Prefer this library types when available")
@@ -250,12 +259,12 @@ main(int argc, char** argv)
parser
.add_argument({ "--mpi" },
"Enable MPI support (requires hosttrace built w/ MPI and GOTCHA "
"support)")
"support). NOTE: this will automatically be activated if "
"MPI_Init/MPI_Init_thread and MPI_Finalize are found in the symbol "
"table of target")
.count(0);
parser.add_argument({ "--label" }, "Labeling info for functions")
.choices({ "file", "line", "return", "args" });
parser.add_argument({ "--mpip" }, "Enable MPI profiling via GOTCHA").count(0);
parser.add_argument({ "--ompt" }, "Enable OpenMP profiling via OMPT").count(0);
parser.add_argument({ "--load" },
"Supplemental instrumentation library names w/o extension (e.g. "
"'libinstr' for 'libinstr.so' or 'libinstr.a')");
@@ -271,7 +280,75 @@ main(int argc, char** argv)
"Dyninst supports batch insertion of multiple points. If one large batch "
"insertion fails, this value will be used to create smaller batches")
.count(1)
.dtype("size_t")
.action([](parser_t& p) { batch_size = p.get<size_t>("batch-size"); });
parser
.add_argument({ "-r", "--min-address-range" },
"If the address range of a function is less than this value, "
"exclude it from instrumentation")
.count(1)
.dtype("size_t")
.set_default(min_address_range)
.action(
[](parser_t& p) { min_address_range = p.get<size_t>("min-address-range"); });
parser
.add_argument({ "--min-address-range-loop" },
"If the address range of a function containing a loop is less than "
"this value, "
"exclude it from instrumentation")
.count(1)
.dtype("size_t")
.set_default(min_loop_address_range)
.action([](parser_t& p) {
min_loop_address_range = p.get<size_t>("min-address-range-loop");
});
parser.add_argument()
.names({ "--allow-overlapping" })
.description(
"Allow dyninst to instrument either multiple functions which overlap (share "
"part of same function body) or single functions with multiple entry points. "
"For more info, see Section 2 of the DyninstAPI documentation.")
.count(0)
.action([](parser_t&) { allow_overlapping = true; });
parser
.add_argument(
{ "--print-dir" },
"Output directory for diagnostic available/instrumented/overlapping module "
"function lists, e.g. {print-dir}/available.txt")
.count(1)
.dtype("string")
.set_default(modfunc_dump_dir)
.action([](parser_t& p) { modfunc_dump_dir = p.get<std::string>("print-dir"); });
parser
.add_argument(
{ "--print-instrumented" },
"Print the instrumented entities (functions, modules, or module-function "
"pair) to stdout after applying regular expressions and exit")
.count(1)
.choices({ "functions", "modules", "functions+", "pair", "pair+" })
.action([](parser_t& p) {
print_instrumented = p.get<std::string>("print-instrumented");
});
parser
.add_argument(
{ "--print-available" },
"Print the available entities for instrumentation (functions, modules, or "
"module-function pair) to stdout applying regular expressions and exit")
.count(1)
.choices({ "functions", "modules", "functions+", "pair", "pair+" })
.action(
[](parser_t& p) { print_available = p.get<std::string>("print-available"); });
parser
.add_argument(
{ "--print-overlapping" },
"Print the entities for instrumentation (functions, modules, or "
"module-function pair) which overlap other function calls or have multiple "
"entry points to stdout applying regular expressions and exit")
.count(1)
.choices({ "functions", "modules", "functions+", "pair", "pair+" })
.action([](parser_t& p) {
print_overlapping = p.get<std::string>("print-overlapping");
});
if(_cmdc == 0)
{
@@ -380,23 +457,13 @@ main(int argc, char** argv)
if(parser.exists("mpi"))
use_mpi = true;
if(parser.exists("mpip"))
use_stubs["mpip"] = true;
else
use_stubs["mpip"] = false;
if(parser.exists("ompt"))
use_stubs["ompt"] = true;
else
use_stubs["ompt"] = false;
if(parser.exists("p"))
_pid = parser.get<int>("p");
if(parser.exists("d"))
{
auto _components = parser.get<strvec_t>("default-components");
default_components = "";
default_components = {};
for(size_t i = 0; i < _components.size(); ++i)
{
if(_components.at(i) == "none")
@@ -409,7 +476,7 @@ main(int argc, char** argv)
default_components += ",";
}
if(default_components == "none")
default_components = "";
default_components = {};
else
{
auto _strcomp = parser.get<std::string>("d");
@@ -460,6 +527,9 @@ main(int argc, char** argv)
fini_stub_names = parser.get<strvec_t>("fini-functions");
auto env_vars = parser.get<strvec_t>("env");
if(verbose_level >= 0)
tim::makedir(modfunc_dump_dir);
//----------------------------------------------------------------------------------//
//
// REGEX OPTIONS
@@ -603,6 +673,19 @@ main(int argc, char** argv)
//----------------------------------------------------------------------------------//
std::set<std::string> module_names;
auto _add_overlapping = [](module_t* mitr, procedure_t* pitr) {
std::vector<procedure_t*> _overlapping{};
if(pitr->findOverlapping(_overlapping))
{
overlapping_module_functions.insert(module_function{ mitr, pitr });
for(auto oitr : _overlapping)
{
overlapping_module_functions.insert(
module_function{ oitr->getModule(), oitr });
}
}
};
if(app_modules && !app_modules->empty())
{
modules = *app_modules;
@@ -616,6 +699,7 @@ main(int argc, char** argv)
auto _modfn = module_function(itr, pitr);
module_names.insert(_modfn.module);
available_module_functions.insert(std::move(_modfn));
_add_overlapping(itr, pitr);
}
}
}
@@ -636,6 +720,7 @@ main(int argc, char** argv)
auto _modfn = module_function(mod, itr);
module_names.insert(_modfn.module);
available_module_functions.insert(std::move(_modfn));
_add_overlapping(mod, itr);
}
}
}
@@ -667,7 +752,10 @@ main(int argc, char** argv)
std::cout << '\n' << std::endl;
}
dump_info("available_module_functions.txt", available_module_functions, 1);
dump_info(TIMEMORY_JOIN('/', modfunc_dump_dir, "available.txt"),
available_module_functions, 1);
dump_info(TIMEMORY_JOIN('/', modfunc_dump_dir, "overlapping.txt"),
overlapping_module_functions, 1);
//----------------------------------------------------------------------------------//
//
@@ -756,12 +844,6 @@ main(int argc, char** argv)
load_library(get_library_ext(libname));
if(use_stubs["mpip"] && !is_static_exe)
load_library({ "libhosttrace-mpip.so" });
if(use_stubs["ompt"])
load_library(get_library_ext({ "libhosttrace-ompt" }));
for(const auto& itr : extra_libs)
load_library(get_library_ext({ itr }));
@@ -795,6 +877,10 @@ main(int argc, char** argv)
if(mpi_init_func && mpi_fini_func)
use_mpi = true;
bool use_mpip = false;
if(use_mpi && binary_rewrite)
use_mpip = true;
//----------------------------------------------------------------------------------//
//
// Handle supplemental instrumentation library functions
@@ -824,11 +910,6 @@ main(int argc, char** argv)
return false;
};
if(use_stubs["mpip"])
add_instr_library("mpip", "hosttrace_register_mpip", "hosttrace_deregister_mpip");
if(use_stubs["ompt"])
add_instr_library("ompt", "hosttrace_register_ompt", "hosttrace_deregister_ompt");
if(!extra_libs.empty())
{
verbprintf(2, "Adding extra libraries...\n");
@@ -963,20 +1044,6 @@ main(int argc, char** argv)
"with MPI and GOTCHA support");
}
if(use_stubs["mpip"] &&
!(beg_stubs["mpip"] != nullptr || end_stubs["mpip"] != nullptr))
{
throw std::runtime_error("MPIP support was requested but could not find "
"hosttrace_{register,deregister}_mpip functions");
}
if(use_stubs["ompt"] &&
!(beg_stubs["ompt"] != nullptr || end_stubs["ompt"] != nullptr))
{
throw std::runtime_error("OMPT support was requested but could not find "
"hosttrace_{register,deregister}_ompt functions");
}
auto check_for_debug_info = [](bool& _has_debug_info, auto* _func) {
// This heuristic guesses that debugging info is available if function
// is not defined in the DEFAULT_MODULE
@@ -1063,12 +1130,10 @@ main(int argc, char** argv)
auto mpie_init_args = hosttrace_call_expr("HOSTTRACE_MPI_INIT", "OFF");
auto mpie_fini_args = hosttrace_call_expr("HOSTTRACE_MPI_FINALIZE", "OFF");
auto trace_call_args =
hosttrace_call_expr("HOSTTRACE_TRACE_COMPONENTS", default_components);
auto mpip_call_args =
hosttrace_call_expr("HOSTTRACE_MPIP_COMPONENTS", default_components);
auto ompt_call_args =
hosttrace_call_expr("HOSTTRACE_OMPT_COMPONENTS", default_components);
auto none_call_args = hosttrace_call_expr();
hosttrace_call_expr("HOSTTRACE_COMPONENTS", default_components);
auto use_mpi_call_args = hosttrace_call_expr("HOSTTRACE_USE_MPI", "ON");
auto use_mpip_call_args = hosttrace_call_expr("HOSTTRACE_USE_MPIP", "ON");
auto none_call_args = hosttrace_call_expr();
verbprintf(2, "Done\n");
verbprintf(2, "Getting call snippets... ");
@@ -1080,12 +1145,12 @@ main(int argc, char** argv)
auto main_beg_call = main_call_args.get(entr_trace);
auto main_end_call = main_call_args.get(exit_trace);
auto trace_env_call = trace_call_args.get(env_func);
auto mode_env_call = mode_call_args.get(env_func);
auto mpip_env_call = mpip_call_args.get(env_func);
auto ompt_env_call = ompt_call_args.get(env_func);
auto mpii_env_call = mpie_init_args.get(env_func);
auto mpif_env_call = mpie_fini_args.get(env_func);
auto trace_env_call = trace_call_args.get(env_func);
auto mode_env_call = mode_call_args.get(env_func);
auto mpii_env_call = mpie_init_args.get(env_func);
auto mpif_env_call = mpie_fini_args.get(env_func);
auto use_mpi_env_call = use_mpi_call_args.get(env_func);
auto use_mpip_env_call = use_mpip_call_args.get(env_func);
verbprintf(2, "Done\n");
@@ -1125,10 +1190,10 @@ main(int argc, char** argv)
init_names.push_back(mpii_env_call.get());
if(mpif_env_call)
init_names.push_back(mpif_env_call.get());
if(use_stubs["mpip"] && mpip_env_call)
init_names.push_back(mpip_env_call.get());
if(use_stubs["ompt"] && ompt_env_call)
init_names.push_back(ompt_env_call.get());
if(use_mpi && use_mpi_env_call)
init_names.push_back(use_mpi_env_call.get());
if(use_mpip && use_mpip_env_call)
init_names.push_back(use_mpip_env_call.get());
for(const auto& itr : env_variables)
{
@@ -1229,7 +1294,13 @@ main(int argc, char** argv)
else
itr->getModuleName(modname, MUTNAMELEN);
if(strstr(modname, "libdyninst") != nullptr)
if(!itr->isInstrumentable())
{
verbprintf(2, "Skipping uninstrumentable function: %s\n", fname);
continue;
}
if(std::string{ modname }.find("libdyninst") != std::string::npos)
continue;
if(module_constraint(modname) || !process_file_for_instrumentation(modname))
@@ -1240,12 +1311,6 @@ main(int argc, char** argv)
itr->getName(fname, FUNCNAMELEN);
if(!itr->isInstrumentable())
{
verbprintf(1, "Skipping uninstrumentable function: %s\n", fname);
continue;
}
auto name = get_func_file_line_info(mod, itr);
if(name.get().empty())
@@ -1268,13 +1333,93 @@ main(int argc, char** argv)
continue;
}
if(is_static_exe && has_debug_info && strcmp(fname, "_fini") != 0 &&
strcmp(modname, "DEFAULT_MODULE") == 0)
if(is_static_exe && has_debug_info && string_t{ fname } == "_fini" &&
string_t{ modname } == "DEFAULT_MODULE")
{
verbprintf(1, "Skipping function [DEFAULT_MODULE]: %s\n", fname);
continue;
}
_add_overlapping(mod, itr);
if(!allow_overlapping &&
overlapping_module_functions.find(module_function{ mod, itr }) !=
overlapping_module_functions.end())
{
verbprintf(1, "Skipping function [overlapping]: %s / %s\n",
name.m_name.c_str(), name.get().c_str());
continue;
}
// directly try to get loop entry points
const std::vector<point_t*>* _loop_entries =
itr->findPoint(BPatch_locLoopEntry);
// try to get loops via the control flow graph
flow_graph_t* cfg = itr->getCFG();
basic_loop_vec_t basic_loop{};
if(cfg)
cfg->getOuterLoops(basic_loop);
// if the function has dynamic callsites and we are in binary rewrite mode,
// force the instrumentation
bool _force_instr = false;
if(cfg && binary_rewrite)
_force_instr = cfg->containsDynamicCallsites();
auto _address_range = module_function{ mod, itr }.address_range;
auto _num_loop_entries =
(_loop_entries)
? std::max<size_t>(_loop_entries->size(), basic_loop.size())
: basic_loop.size();
auto _has_loop_entries = (_num_loop_entries > 0);
if(_address_range < min_address_range && !_has_loop_entries && !_force_instr)
{
verbprintf(1,
"Skipping function [min-address-range]: %s / %s (address "
"range = %lu, minimum = %lu)\n",
name.m_name.c_str(), name.get().c_str(),
(unsigned long) _address_range,
(unsigned long) min_address_range);
continue;
}
else if(_address_range < min_loop_address_range && _has_loop_entries &&
!_force_instr)
{
verbprintf(1,
"Skipping function [min-loop-address-range]: %s / %s (address "
"range = %lu, minimum = %lu)\n",
name.m_name.c_str(), name.get().c_str(),
(unsigned long) _address_range,
(unsigned long) min_loop_address_range);
continue;
}
else if(_address_range >= min_loop_address_range &&
_address_range < min_address_range && _has_loop_entries)
{
verbprintf(
1,
"Enabling function [min-loop-address-range]: %s / %s despite not "
"satisfy minimum loop address range (address range = %lu, minimum "
"= %lu) because it has at least one loop (found: %lu)\n",
name.m_name.c_str(), name.get().c_str(),
(unsigned long) _address_range,
(unsigned long) min_loop_address_range,
(unsigned long) _num_loop_entries);
}
else if(_address_range < min_address_range && _force_instr)
{
verbprintf(1,
"Enabling function [min-address-range]: %s / %s despite not "
"satisfy minimum address range (address range = %lu, minimum "
"= %lu) because contains dynamic callsites which may not be "
"instrumented in binary rewrite mode\n",
name.m_name.c_str(), name.get().c_str(),
(unsigned long) _address_range,
(unsigned long) min_address_range);
}
hash_ids.emplace_back(std::hash<string_t>()(name.get()), name.get());
available_module_functions.insert(module_function(mod, itr));
instrumented_module_functions.insert(module_function(mod, itr));
@@ -1302,13 +1447,9 @@ main(int argc, char** argv)
verbprintf(1, "Instrumenting at the loop level: %s\n",
name.m_name.c_str());
flow_graph_t* flow = itr->getCFG();
basic_loop_vec_t basic_loop;
if(flow)
flow->getOuterLoops(basic_loop);
for(auto* litr : basic_loop)
{
auto lname = get_loop_file_line_info(mod, itr, flow, litr);
auto lname = get_loop_file_line_info(mod, itr, cfg, litr);
auto _lname = lname.get();
auto _lhash = std::hash<string_t>()(_lname);
hash_ids.emplace_back(_lhash, _lname);
@@ -1324,8 +1465,8 @@ main(int argc, char** argv)
auto _lexit =
_ltrace_exit.get((exit_hash) ? exit_hash : exit_trace);
insert_instr(addr_space, itr, _lentr, BPatch_entry, flow, litr);
insert_instr(addr_space, itr, _lexit, BPatch_exit, flow, litr);
insert_instr(addr_space, itr, _lentr, BPatch_entry, cfg, litr);
insert_instr(addr_space, itr, _lexit, BPatch_exit, cfg, litr);
};
instr_procedure_functions.emplace_back(_lf);
}
@@ -1333,24 +1474,6 @@ main(int argc, char** argv)
}
};
//----------------------------------------------------------------------------------//
//
// Load the dependent libraries (currently unused)
//
//----------------------------------------------------------------------------------//
if(is_static_exe && false)
{
char* bindings = new char[MUTNAMELEN];
bool loadResult = load_dependent_libraries(addr_space, bindings);
delete[] bindings;
if(!loadResult)
{
fprintf(stderr, "Failed to load dependent libraries\n");
throw std::runtime_error("Failed to load dependent libraries");
}
}
//----------------------------------------------------------------------------------//
//
// Do a first pass through all procedures to generate the hash ids
@@ -1489,8 +1612,87 @@ main(int argc, char** argv)
//
//----------------------------------------------------------------------------------//
dump_info("available_module_functions.txt", available_module_functions, 0);
dump_info("instrumented_module_functions.txt", instrumented_module_functions, 0);
bool _dump_and_exit = ((print_available.length() + print_instrumented.length() +
print_overlapping.length()) > 0);
dump_info(TIMEMORY_JOIN('/', modfunc_dump_dir, "available.txt"),
available_module_functions, 0);
dump_info(TIMEMORY_JOIN('/', modfunc_dump_dir, "instrumented.txt"),
instrumented_module_functions, 0);
dump_info(TIMEMORY_JOIN('/', modfunc_dump_dir, "overlapping.txt"),
overlapping_module_functions, 0);
auto _dump_info = [](string_t _mode, const fmodset_t& _modset) {
std::map<std::string, std::vector<std::string>> _data{};
std::unordered_map<std::string, std::unordered_set<std::string>> _dups{};
auto _insert = [&](const std::string& _m, const std::string& _v) {
if(_dups[_m].find(_v) == _dups[_m].end())
{
_dups[_m].emplace(_v);
_data[_m].emplace_back(_v);
}
};
if(_mode == "modules")
{
for(const auto& itr : _modset)
_insert(itr.module, itr.module);
}
else if(_mode == "functions")
{
for(const auto& itr : _modset)
_insert(itr.module, itr.function);
}
else if(_mode == "functions+")
{
for(const auto& itr : _modset)
_insert(itr.module, itr.signature.get());
}
else if(_mode == "pair")
{
for(const auto& itr : _modset)
{
std::stringstream _ss{};
_ss << std::boolalpha;
_ss << "[" << itr.module << "] --> [ " << itr.address_range << " ]["
<< itr.function << "]";
_insert(itr.module, _ss.str());
}
}
else if(_mode == "pair+")
{
for(const auto& itr : _modset)
{
std::stringstream _ss{};
_ss << std::boolalpha;
_ss << "[" << itr.module << "] --> [ " << itr.address_range << " ]["
<< itr.signature.get() << "]";
_insert(itr.module, _ss.str());
}
}
else
{
throw std::runtime_error("Unknown mode " + _mode);
}
for(auto& mitr : _data)
{
if(_mode != "modules")
std::cout << "\n" << mitr.first << ":\n";
for(auto& itr : mitr.second)
{
std::cout << " " << itr << "\n";
}
}
};
if(!print_available.empty())
_dump_info(print_available, available_module_functions);
if(!print_instrumented.empty())
_dump_info(print_instrumented, instrumented_module_functions);
if(!print_overlapping.empty())
_dump_info(print_overlapping, overlapping_module_functions);
if(_dump_and_exit)
exit(EXIT_SUCCESS);
//----------------------------------------------------------------------------------//
//
@@ -1650,7 +1852,7 @@ process_file_for_instrumentation(const string_t& file_name)
return true;
}
string_t ext_str = "\\.S$";
string_t ext_str = "\\.(s|S)$";
static std::regex ext_regex(ext_str, regex_opts);
static std::regex sys_regex("^(s|k|e|w)_[A-Za-z_0-9\\-]+\\.(c|C)$", regex_opts);
static std::regex userlib_regex(
@@ -1660,14 +1862,11 @@ process_file_for_instrumentation(const string_t& file_name)
static std::regex corelib_regex("^lib(rt-|dl-|util-|python)", regex_opts);
// these are all due to TAU
static std::regex prefix_regex(
"^(RT|Tau|Profiler|Rts|Papi|Py|Comp_xl\\.cpp|Comp_gnu\\.cpp|"
"^(_|\\.|RT|Tau|Profiler|Rts|Papi|Py|Comp_xl\\.cpp|Comp_gnu\\.cpp|"
"UserEvent\\.cpp|FunctionInfo\\.cpp|PthreadLayer\\.cpp|"
"Comp_intel[0-9]\\.cpp|Tracer\\.cpp|cxx11|locale|pmap_|rpc_|elf_|elf32_|elf64_|"
"gelf_|reg-[a-z]+\\.c|sched_|io[a-z_]+\\.c|arg[zp]-|thrd_[a-z]+\\.c|pthread_|sem_"
"|mtx_[a-z]+\\.c|cnd_[a-z]+\\.c|tss_[a-z]+\\.c|pt-[a-z]+\\.c|set[a-z]*gid\\.c|"
"streams-[a-z]+\\.c|stat[a-z_]+\\.c|fstat[a-z_]+\\.c|epoll_[a-z_]+\\.c|ppoll|"
"time[a-z_]+\\.c)",
"Comp_intel[0-9]\\.cpp|Tracer\\.cpp)",
regex_opts);
/*
static std::regex suffix_regex(
"(printf|gettext|^sig[a-z]+|^exit|^setenv|on_exit|quick_exit|_crypt|^str[a-z_]+|"
"mmap[0-9]+|^err|getu[a-z]+|^call_once|^sendto|^timer_[a-z]+|^read|^close|^recv|^"
@@ -1677,14 +1876,14 @@ process_file_for_instrumentation(const string_t& file_name)
"vscanf|memmove|uid|tsz|gid|cvt|cvt_r|^error|_r|[a-z]64|^f[a-z]+|^makecontext|^"
"basename|^wcp[a-z]+|[a-z]+dir|^mb[a-z]+|^dir[a-z]+|euid[a-z]+|^c[36][24][a-z]+|^"
"set[a-z_]+|^get[a-z_]+|^shm[a-z]+|^wc[a-z_]+|brk|^write[a-z]+)\\.c$",
regex_opts);
regex_opts);*/
if(!cstd_func_instr && c_stdlib_module_constraint(file_name))
/*if(!cstd_func_instr && c_stdlib_module_constraint(file_name))
{
verbprintf(3, "Excluding instrumentation [c std library] : '%s'...\n",
file_name.c_str());
return false;
}
}*/
if(std::regex_search(file_name, ext_regex))
{
@@ -1721,12 +1920,12 @@ process_file_for_instrumentation(const string_t& file_name)
return false;
}
if(std::regex_search(file_name, suffix_regex))
/*if(std::regex_search(file_name, suffix_regex))
{
verbprintf(3, "Excluding instrumentation [suffix match] : '%s'...\n",
file_name.c_str());
return false;
}
}*/
bool use = is_include(true) && !is_exclude();
if(use)
@@ -1787,9 +1986,14 @@ instrument_entity(const string_t& function_name)
regex_opts);
static std::regex exclude_cxx("(std::_Sp_counted_base|std::use_facet)", regex_opts);
static std::regex leading(
"^(_|frame_dummy|\\(|targ|new|delete|operator new|operator delete|std::allocat|"
"nvtx|gcov|main\\.cold\\.|TAU|tau|Tau|dyn|RT|dl|sys|pthread|posix|clone|thunk)",
"^(_|\\.|frame_dummy|\\(|targ|new|delete|operator new|operator "
"delete|std::allocat|"
"nvtx|gcov|main\\.cold|TAU|tau|Tau|dyn|RT|dl|sys|pthread|posix|clone|virtual "
"thunk|non-virtual thunk|transaction "
"clone|RtsLayer|DYNINST|PthreadLayer|threaded_func|targ8)",
regex_opts);
static std::regex trailing("(\\.part\\.[0-9]+|\\.constprop\\.[0-9]+|\\.|\\.[0-9]+)$",
regex_opts);
static std::regex stlfunc("^std::", regex_opts);
strset_t whole = { "init", "fini", "_init", "_fini", "atexit" };
@@ -1799,11 +2003,11 @@ instrument_entity(const string_t& function_name)
return false;
}
if(!cstd_func_instr && c_stdlib_function_constraint(function_name))
/*if(!cstd_func_instr && c_stdlib_function_constraint(function_name))
{
verbprintf(3, "Excluding function [libc] : '%s'...\n", function_name.c_str());
return false;
}
}*/
// don't instrument the functions when key is found anywhere in function name
if(std::regex_search(function_name, exclude))
@@ -1829,6 +2033,14 @@ instrument_entity(const string_t& function_name)
return false;
}
// don't instrument the functions when key is found at the end of the function name
if(std::regex_search(function_name, trailing))
{
verbprintf(3, "Excluding function [critical, trailing match] : '%s'...\n",
function_name.c_str());
return false;
}
if(whole.count(function_name) > 0)
{
verbprintf(3, "Excluding function [critical, whole match] : '%s'...\n",
@@ -1918,31 +2130,28 @@ bool
module_constraint(char* fname)
{
// fname is the name of module/file
int len = strlen(fname);
string_t _fname = fname;
if(_fname.find("hosttrace") != string_t::npos ||
_fname.find("tim::") != string_t::npos)
// never instrumentat any module matching hosttrace
if(_fname.find("hosttrace") != string_t::npos)
return true;
if((strcmp(fname, "DEFAULT_MODULE") == 0) || (strcmp(fname, "LIBRARY_MODULE") == 0) ||
((fname[len - 2] == '.') && (fname[len - 1] == 'c')) ||
((fname[len - 2] == '.') && (fname[len - 1] == 'C')) ||
((fname[len - 3] == '.') && (fname[len - 2] == 'c') && (fname[len - 1] == 'c')) ||
((fname[len - 4] == '.') && (fname[len - 3] == 'c') && (fname[len - 2] == 'p') &&
(fname[len - 1] == 'p')) ||
((fname[len - 4] == '.') && (fname[len - 3] == 'f') && (fname[len - 2] == '9') &&
(fname[len - 1] == '0')) ||
((fname[len - 4] == '.') && (fname[len - 3] == 'F') && (fname[len - 2] == '9') &&
(fname[len - 1] == '0')) ||
((fname[len - 2] == '.') && (fname[len - 1] == 'F')) ||
((fname[len - 2] == '.') && (fname[len - 1] == 'f')))
{
//((fname[len-3] == '.') && (fname[len-2] == 's') && (fname[len-1] == 'o'))||
// always instrument these modules
if(_fname == "DEFAULT_MODULE" || _fname == "LIBRARY_MODULE")
return false;
}
if(process_file_for_instrumentation(string_t(fname)))
// auto _valid_file_extension = std::regex_search(
// _fname, std::regex{ "\\.(a|c|f|o|cc|so|cxx|cpp|C|F|CC|f90|F90|so\\.[0-9\\.]+)$",
// regex_opts });
auto _valid_file_regex = process_file_for_instrumentation(_fname);
// if module compiled from C, C++, or Fortran or a library
// if(_valid_file_extension && _valid_file_regex)
// return false;
// apply regex expressions
if(_valid_file_regex)
return false;
// do not instrument
@@ -1959,19 +2168,10 @@ routine_constraint(const char* fname)
if(_fname.find("hosttrace") != string_t::npos)
return true;
if((strstr(fname, "FunctionInfo") != nullptr) ||
(strncmp(fname, "RtsLayer", 8) == 0) || (strncmp(fname, "DYNINST", 7) == 0) ||
(strncmp(fname, "PthreadLayer", 12) == 0) ||
(strncmp(fname, "threaded_func", 13) == 0) || (strncmp(fname, "targ8", 5) == 0) ||
(strncmp(fname, "__intel_", 8) == 0) || (strncmp(fname, "_intel_", 7) == 0) ||
(strncmp(fname, "The", 3) == 0) ||
// The following functions show up in static executables
(strncmp(fname, "__mmap", 6) == 0) || (strncmp(fname, "_IO_printf", 10) == 0) ||
(strncmp(fname, "__write", 7) == 0) || (strncmp(fname, "__munmap", 8) == 0) ||
(strstr(fname, "_L_lock") != nullptr) || (strstr(fname, "_L_unlock") != nullptr))
{
auto npos = std::string::npos;
if(_fname.find("FunctionInfo") != npos || _fname.find("_L_lock") != npos ||
_fname.find("_L_unlock") != npos)
return true; // Don't instrument
}
else
{
// Should the routine fname be instrumented?
@@ -1988,53 +2188,6 @@ routine_constraint(const char* fname)
}
}
//======================================================================================//
//
bool
load_dependent_libraries(address_space_t* bedit, char* bindings)
{
// Order of load matters, just like command line arguments to a standalone linker
char deplibs[1024];
char bindir[] = TIMEMORY_BIN_DIR;
char cmd[1024];
verbprintf(0, "Inside load_dependent_libraries: bindings=%s\n", bindings);
sprintf(cmd, "%s/hosttrace_show_libs %s/../lib/Makefile.hosttrace%s", bindir, bindir,
bindings);
verbprintf(0, "cmd = %s\n", cmd);
FILE* fp = popen(cmd, "r");
if(fp == nullptr)
{
perror("hosttrace: Error launching hosttrace_show_libs to get list of "
"dependent static libraries for static binary");
return false;
}
while((fgets(deplibs, 1024, fp)) != nullptr)
{
int len = strlen(deplibs);
if(deplibs[len - 2] == ',' && deplibs[len - 3] == '"' && deplibs[0] == '"')
{
deplibs[len - 3] = '\0';
verbprintf(0, "LOADING %s\n", &deplibs[1]);
if(!bedit->loadLibrary(&deplibs[1]))
{
fprintf(stderr, "Failed to load dependent library: %s\n", &deplibs[1]);
return false;
}
}
else
{
printf("WARNING: hosttrace_show_libs in hosttrace: Comma not found! "
"deplibs = %s\n",
deplibs);
}
}
return true;
}
//======================================================================================//
//
std::string
+124
Ver ficheiro
@@ -0,0 +1,124 @@
#include "library.hpp"
//
// This file contains miscellaneous function definitions related to timemory
// placed in separate file so that, during development, the long compile-times
// arising from compiling timemory's gotcha wrappers are reduced
//
namespace
{
uint64_t mpip_index = std::numeric_limits<uint64_t>::max();
// this ensures hosttrace_trace_finalize is called before MPI_Finalize
void
hosttrace_mpi_set_attr()
{
#if defined(TIMEMORY_USE_MPI)
static auto _mpi_finalize = [](MPI_Comm, int, void*, void*) {
if(mpip_index != std::numeric_limits<uint64_t>::max())
comp::deactivate_mpip<tim::component_tuple<hosttrace_component>, hosttrace>(
mpip_index);
hosttrace_pop_trace("MPI_Finalize()");
hosttrace_trace_finalize();
return MPI_SUCCESS;
};
using func_t = int (*)(MPI_Comm, int, void*, void*);
int _comm_key = -1;
if(PMPI_Comm_create_keyval(nullptr, static_cast<func_t>(_mpi_finalize), &_comm_key,
nullptr) == MPI_SUCCESS)
PMPI_Comm_set_attr(MPI_COMM_SELF, _comm_key, nullptr);
#endif
}
} // namespace
void
fork_gotcha::audit(const gotcha_data_t&, audit::incoming)
{
HOSTTRACE_DEBUG(
"Warning! Calling fork() within an OpenMPI application using libfabric "
"may result is segmentation fault\n");
TIMEMORY_CONDITIONAL_DEMANGLED_BACKTRACE(get_debug(), 16);
}
void
fork_gotcha::audit(const gotcha_data_t& _data, audit::outgoing, pid_t _pid)
{
HOSTTRACE_DEBUG("%s() return PID %i\n", _data.tool_id.c_str(), (int) _pid);
}
void
mpi_gotcha::audit(const gotcha_data_t& _data, audit::incoming, int*, char***)
{
HOSTTRACE_DEBUG("[%s] %s(int*, char***)\n", __FUNCTION__, _data.tool_id.c_str());
if(get_state() == ::State::DelayedInit)
get_state() = ::State::PreInit;
}
void
mpi_gotcha::audit(const gotcha_data_t& _data, audit::incoming, int*, char***, int, int*)
{
HOSTTRACE_DEBUG("[%s] %s(int*, char***, int, int*)\n", __FUNCTION__,
_data.tool_id.c_str());
if(get_state() == ::State::DelayedInit)
get_state() = ::State::PreInit;
}
void
mpi_gotcha::audit(const gotcha_data_t& _data, audit::outgoing, int _retval)
{
HOSTTRACE_DEBUG("[%s] %s() returned %i\n", __FUNCTION__, _data.tool_id.c_str(),
(int) _retval);
if(_retval == tim::mpi::success_v && get_state() == ::State::PreInit)
{
hosttrace_mpi_set_attr();
// hosttrace will set this environement variable to true in binary rewrite mode
// when it detects MPI. Hides this env variable from the user to avoid this
// being activated unwaringly during runtime instrumentation because that
// will result in double instrumenting the MPI functions (unless the MPI functions
// were excluded via a regex expression)
if(tim::get_env("HOSTTRACE_USE_MPIP", false, false))
{
HOSTTRACE_DEBUG("[%s] Activating MPI wrappers...\n", __FUNCTION__);
comp::configure_mpip<tim::component_tuple<hosttrace_component>, hosttrace>();
mpip_index = comp::activate_mpip<tim::component_tuple<hosttrace_component>,
hosttrace>();
}
hosttrace_push_trace(_data.tool_id.c_str());
}
}
void
hosttrace_component::start()
{
if(m_prefix)
hosttrace_push_trace(m_prefix);
}
void
hosttrace_component::stop()
{
if(m_prefix)
hosttrace_pop_trace(m_prefix);
}
void
hosttrace_component::set_prefix(const char* _prefix)
{
m_prefix = _prefix;
}
hosttrace_timemory_data::instance_array_t&
hosttrace_timemory_data::instances()
{
static auto _v = instance_array_t{};
return _v;
}
PERFETTO_TRACK_EVENT_STATIC_STORAGE();
TIMEMORY_INITIALIZE_STORAGE(fork_gotcha, mpi_gotcha, comp::wall_clock,
comp::user_global_bundle)
#if defined(CUSTOM_DATA_SOURCE)
PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(CustomDataSource);
#endif
+309 -221
Ver ficheiro
@@ -1,103 +1,6 @@
#include <perfetto.h>
#include "library.hpp"
#if defined(NDEBUG)
# undef NDEBUG
#endif
#include <cassert>
#include <cstdint>
#include <cstdio>
#include <fstream>
#include <memory>
#include <string>
#include <sys/types.h>
#include <unistd.h>
#include <utility>
#include <vector>
#include "timemory/api.hpp"
#include "timemory/backends/process.hpp"
#include "timemory/backends/threading.hpp"
#include "timemory/components.hpp"
#include "timemory/config.hpp"
#include "timemory/environment.hpp"
#include "timemory/manager.hpp"
#include "timemory/mpl/apply.hpp"
#include "timemory/operations.hpp"
#include "timemory/settings.hpp"
#include "timemory/storage.hpp"
#include "timemory/variadic.hpp"
#if !defined(JOIN)
# define JOIN(...) tim::mpl::apply<std::string>::join(__VA_ARGS__)
#endif
namespace audit = tim::audit;
namespace comp = tim::component;
namespace quirk = tim::quirk;
struct fork_gotcha : tim::component::base<fork_gotcha, void>
{
using gotcha_data_t = tim::component::gotcha_data;
TIMEMORY_DEFAULT_OBJECT(fork_gotcha)
void audit(const gotcha_data_t& _data, audit::incoming);
void audit(const gotcha_data_t& _data, audit::outgoing, pid_t _pid);
};
struct fork_gotcha_api : tim::concepts::api
{};
using fork_gotcha_t =
tim::component::gotcha<4, tim::component_tuple<fork_gotcha>, fork_gotcha_api>;
using fork_bundle_t =
tim::lightweight_tuple<comp::wall_clock, comp::peak_rss, comp::cpu_clock,
comp::cpu_util, fork_gotcha_t>;
//--------------------------------------------------------------------------------------//
PERFETTO_DEFINE_CATEGORIES(
perfetto::Category("hosttrace").SetDescription("Function trace"));
#if defined(CUSTOM_DATA_SOURCE)
class CustomDataSource : public perfetto::DataSource<CustomDataSource>
{
public:
void OnSetup(const SetupArgs&) override
{
// Use this callback to apply any custom configuration to your data source
// based on the TraceConfig in SetupArgs.
PRINT_HERE("%s", "setup");
}
void OnStart(const StartArgs&) override
{
// This notification can be used to initialize the GPU driver, enable
// counters, etc. StartArgs will contains the DataSourceDescriptor,
// which can be extended.
PRINT_HERE("%s", "start");
}
void OnStop(const StopArgs&) override
{
// Undo any initialization done in OnStart.
PRINT_HERE("%s", "stop");
}
// Data sources can also have per-instance state.
int my_custom_state = 0;
};
PERFETTO_DECLARE_DATA_SOURCE_STATIC_MEMBERS(CustomDataSource);
#endif
extern "C" void
hosttrace_trace_finalize();
namespace
{
bool
get_debug()
{
@@ -105,30 +8,72 @@ get_debug()
return _v;
}
void
setup_fork_gotcha()
State&
get_state()
{
CONDITIONAL_PRINT_HERE(get_debug(), "%s", "configuring gotcha wrapper around fork");
static State _v{ State::PreInit };
return _v;
}
//--------------------------------------------------------------------------------------//
namespace
{
auto
get_use_perfetto()
{
// if using timemory, default to perfetto being off
static auto _default_v = !tim::get_env<bool>("HOSTTRACE_USE_TIMEMORY", false, false);
// explicit env control for using perfetto
static auto _v = tim::get_env<bool>("HOSTTRACE_USE_PERFETTO", _default_v);
return _v;
}
auto
get_use_timemory()
{
// default to opposite of whether perfetto setting
// to use both timemory and perfetto, both HOSTTRACE_USE_TIMEMORY and
// HOSTTRACE_USE_PERFETTO must be true
static auto _v = tim::get_env<bool>("HOSTTRACE_USE_TIMEMORY", !get_use_perfetto());
return _v;
}
bool&
get_use_mpi()
{
// this does not enable anything particularly useful when not using timemory
static bool _v = tim::get_env("HOSTTRACE_USE_MPI", false, get_use_timemory());
return _v;
}
void
setup_gotchas()
{
static bool _initialized = false;
if(_initialized)
return;
_initialized = true;
HOSTTRACE_DEBUG(
"[%s] Configuring gotcha wrapper around fork, MPI_Init, and MPI_Init_thread\n",
__FUNCTION__);
fork_gotcha_t::get_initializer() = []() {
TIMEMORY_C_GOTCHA(fork_gotcha_t, 0, fork);
};
}
auto&
get_fork_gotcha()
{
static auto _v =
(setup_fork_gotcha(), std::make_unique<fork_bundle_t>(
"hosttrace", quirk::config<quirk::auto_start>{}));
return _v;
mpi_gotcha_t::get_initializer() = []() {
mpi_gotcha_t::template configure<0, int, int*, char***>("MPI_Init");
mpi_gotcha_t::template configure<1, int, int*, char***, int, int*>(
"MPI_Init_thread");
};
}
auto
ensure_finalization()
{
if(get_debug())
fprintf(stderr, "[%s]\n", __FUNCTION__);
HOSTTRACE_DEBUG("[%s]\n", __FUNCTION__);
return tim::scope::destructor{ []() { hosttrace_trace_finalize(); } };
}
@@ -139,40 +84,30 @@ get_trace_session()
return _session;
}
enum class State : unsigned short
{
PreInit = 0,
Active,
Finalized
};
auto&
get_state()
{
static State _v{ State::PreInit };
return _v;
}
auto&
get_output_filename()
auto
get_perfetto_output_filename()
{
static auto _v = []() {
auto _tmp = tim::get_env<std::string>(
// default name: perfetto-trace.<pid>.proto or perfetto-trace.<rank>.proto
auto _default_fname = tim::settings::compose_output_filename(
JOIN('.', "perfetto-trace", (get_use_mpi()) ? "%rank%" : "%pid%"), "proto");
// have the default display the full path to the output file
return tim::get_env<std::string>(
"HOSTTRACE_OUTPUT_FILE",
JOIN('/', tim::get_env<std::string>("PWD", ".", false),
"hosttrace.perfetto-trace-%pid%"));
auto _replace = [&_tmp](const std::string& _key, auto _val) {
auto _pos = _tmp.find(_key);
if(_pos != std::string::npos)
_tmp.replace(_pos, _key.length(), std::to_string(_val));
};
_replace("%pid%", tim::process::get_id());
_replace("%rank%", tim::mpi::rank());
// backwards compatibility
_replace("%p", tim::process::get_id());
return _tmp;
JOIN('/', tim::get_env<std::string>("PWD", ".", false), _default_fname));
}();
return _v;
auto _tmp = _v;
auto _replace = [&_tmp](const std::string& _key, auto&& _val) {
auto _pos = _tmp.find(_key);
if(_pos != std::string::npos)
_tmp.replace(_pos, _key.length(), std::to_string(_val()));
};
_replace("%pid%", []() { return tim::process::get_id(); });
_replace("%rank%", []() { return tim::mpi::rank(); });
// backwards compatibility
_replace("%p", []() { return tim::process::get_id(); });
return _tmp;
}
auto&
@@ -195,63 +130,185 @@ is_system_backend()
return (get_backend() != "inprocess");
}
auto&
get_timemory_data()
{
static thread_local auto& _v =
hosttrace_timemory_data::instances().at(threading::get_id());
return _v;
}
auto&
get_functors()
{
using functor_t = std::function<void(const char*)>;
static auto _v =
std::pair<functor_t, functor_t>{ [](const char*) {}, [](const char*) {} };
return _v;
}
bool
hosttrace_init_perfetto()
{
if(get_debug())
fprintf(stderr, "[%s]\n", __FUNCTION__);
if(get_state() != State::PreInit)
return false;
HOSTTRACE_DEBUG("[%s]\n", __FUNCTION__);
// always initialize timemory because gotcha wrappers are always used
tim::settings::flamegraph_output() = false;
tim::settings::file_output() = false;
tim::settings::cout_output() = false;
tim::settings::file_output() = true;
tim::settings::enable_signal_handler() = true;
tim::timemory_init({ "hosttrace" });
tim::settings::collapse_processes() = false;
tim::settings::collapse_threads() = false;
tim::settings::max_thread_bookmarks() = 1;
tim::settings::global_components() = tim::get_env<std::string>(
"HOSTTRACE_COMPONENTS", "wall_clock", get_use_timemory());
auto& _fork_gotcha = get_fork_gotcha();
// enable timestamp directories when perfetto + mpi is activated
if(get_use_perfetto() && get_use_mpi())
tim::settings::time_output() = true;
auto _cmd = tim::read_command_line(tim::process::get_id());
auto _exe = (_cmd.empty()) ? "hosttrace" : _cmd.front();
auto _pos = _exe.find_last_of('/');
if(_pos < _exe.length() - 1)
_exe = _exe.substr(_pos + 1);
tim::timemory_init({ _exe }, "hosttrace-");
if(get_use_timemory())
{
comp::user_global_bundle::global_init();
std::set<int> _comps{};
// convert string into set of enumerations
for(auto&& itr : tim::delimit(tim::settings::global_components()))
_comps.emplace(tim::runtime::enumerate(itr));
if(_comps.size() == 1 && _comps.find(TIMEMORY_WALL_CLOCK) != _comps.end())
{
// using wall_clock directly is lower overhead than using it via user_bundle
bundle_t::get_initializer() = [](bundle_t& _bundle) {
_bundle.initialize<comp::wall_clock>();
};
}
else if(!_comps.empty())
{
// use user_bundle for other than wall-clock
bundle_t::get_initializer() = [](bundle_t& _bundle) {
_bundle.initialize<comp::user_global_bundle>();
};
}
else
{
tim::trait::runtime_enabled<hosttrace>::set(false);
}
}
// always activate gotcha wrappers
auto& _fork_gotcha = get_main_bundle();
_fork_gotcha->start();
assert(_fork_gotcha->get<fork_gotcha_t>()->get_is_running());
// environment settings
auto shmem_size_hint = tim::get_env<size_t>("HOSTTRACE_SHMEM_SIZE_HINT_KB", 40960);
auto buffer_size = tim::get_env<size_t>("HOSTTRACE_BUFFER_SIZE_KB", 1024000);
assert(_fork_gotcha->get<mpi_gotcha_t>()->get_is_running());
perfetto::TracingInitArgs args{};
perfetto::TraceConfig cfg{};
perfetto::protos::gen::TrackEventConfig track_event_cfg{};
auto *buffer_config = cfg.add_buffers();
buffer_config->set_size_kb(buffer_size);
buffer_config->set_fill_policy(perfetto::protos::gen::TraceConfig_BufferConfig_FillPolicy_DISCARD);
// perfetto initialization
if(get_use_perfetto())
{
// environment settings
auto shmem_size_hint =
tim::get_env<size_t>("HOSTTRACE_SHMEM_SIZE_HINT_KB", 40960);
auto buffer_size = tim::get_env<size_t>("HOSTTRACE_BUFFER_SIZE_KB", 1024000);
auto* ds_cfg = cfg.add_data_sources()->mutable_config();
ds_cfg->set_name("track_event");
ds_cfg->set_track_event_config_raw(track_event_cfg.SerializeAsString());
auto* buffer_config = cfg.add_buffers();
buffer_config->set_size_kb(buffer_size);
buffer_config->set_fill_policy(
perfetto::protos::gen::TraceConfig_BufferConfig_FillPolicy_DISCARD);
args.shmem_size_hint_kb = shmem_size_hint;
auto* ds_cfg = cfg.add_data_sources()->mutable_config();
ds_cfg->set_name("track_event");
ds_cfg->set_track_event_config_raw(track_event_cfg.SerializeAsString());
if(get_backend() != "inprocess")
args.backends |= perfetto::kSystemBackend;
if(get_backend() != "system")
args.backends |= perfetto::kInProcessBackend;
args.shmem_size_hint_kb = shmem_size_hint;
perfetto::Tracing::Initialize(args);
perfetto::TrackEvent::Register();
if(get_backend() != "inprocess")
args.backends |= perfetto::kSystemBackend;
if(get_backend() != "system")
args.backends |= perfetto::kInProcessBackend;
(void) get_output_filename();
perfetto::Tracing::Initialize(args);
perfetto::TrackEvent::Register();
tim::print_env(std::cerr,
[](const std::string& _v) { return _v.find("HOSTTRACE_") == 0; });
(void) get_perfetto_output_filename();
}
if(!is_system_backend())
// functors for starting and stopping timemory
static auto _push_timemory = [](const char* name) {
auto& _data = get_timemory_data();
// this generates a hash for the raw string array
auto _hash = tim::add_hash_id(tim::string_view_t{ name });
auto* _bundle = _data.allocator.allocate(1);
_data.bundles.emplace_back(_bundle);
_data.allocator.construct(_bundle, _hash);
_bundle->start();
};
static auto _pop_timemory = [](const char* name) {
auto& _data = get_timemory_data();
if(_data.bundles.empty())
{
HOSTTRACE_DEBUG("[%s] skipped %s :: empty bundle stack\n",
"hosttrace_pop_trace", name);
return;
}
_data.bundles.back()->stop();
_data.allocator.destroy(_data.bundles.back());
_data.allocator.deallocate(_data.bundles.back(), 1);
_data.bundles.pop_back();
};
if(get_use_perfetto() && get_use_timemory())
{
// if both are used, then use perfetto overload for calling lambda to launch
// timemory
get_functors().first = [](const char* name) {
TRACE_EVENT_BEGIN("hosttrace", perfetto::StaticString(name),
[&](perfetto::EventContext) { _push_timemory(name); });
};
get_functors().second = [](const char* name) {
TRACE_EVENT_END("hosttrace",
[&](perfetto::EventContext) { _pop_timemory(name); });
};
}
else if(get_use_perfetto())
{
get_functors().first = [](const char* name) {
TRACE_EVENT_BEGIN("hosttrace", perfetto::StaticString(name));
};
get_functors().second = [](const char*) { TRACE_EVENT_END("hosttrace"); };
}
else if(get_use_timemory())
{
get_functors().first = _push_timemory;
get_functors().second = _pop_timemory;
}
if(tim::dmp::rank() == 0)
{
tim::print_env(std::cerr,
[](const std::string& _v) { return _v.find("HOSTTRACE_") == 0; });
}
if(get_use_perfetto() && !is_system_backend())
{
#if defined(CUSTOM_DATA_SOURCE)
// Add the following:
perfetto::DataSourceDescriptor dsd{};
dsd.set_name("com.example.custom_data_source");
CustomDataSource::Register(dsd);
ds_cfg = cfg.add_data_sources()->mutable_config();
auto* ds_cfg = cfg.add_data_sources()->mutable_config();
ds_cfg->set_name("com.example.custom_data_source");
CustomDataSource::Trace([](CustomDataSource::TraceContext ctx) {
auto packet = ctx.NewTracePacket();
@@ -273,87 +330,118 @@ hosttrace_init_perfetto()
// ends the tracing session
static auto _ensure_finalization = ensure_finalization();
puts("");
if(tim::dmp::rank() == 0)
puts("");
return true;
}
} // namespace
//--------------------------------------------------------------------------------------//
extern "C"
{
void hosttrace_push_trace(const char* name)
{
if(get_debug())
fprintf(stderr, "[%s] %s\n", __FUNCTION__, name);
// return if not active
if(get_state() != State::Active && !hosttrace_init_perfetto())
if(get_state() == State::Finalized)
return;
// TRACE_EVENT_BEGIN(
// "hosttrace", perfetto::StaticString(name),
// [&](perfetto::EventContext ctx) { PRINT_HERE("executing %s", name); });
TRACE_EVENT_BEGIN("hosttrace", perfetto::StaticString(name));
if(get_state() != State::Active && !hosttrace_init_perfetto())
{
HOSTTRACE_DEBUG("[%s] %s :: not active and perfetto not initialized\n",
__FUNCTION__, name);
return;
}
else
{
HOSTTRACE_DEBUG("[%s] %s\n", __FUNCTION__, name);
}
get_functors().first(name);
}
void hosttrace_pop_trace(const char* name)
{
if(get_debug())
fprintf(stderr, "[%s] %s\n", __FUNCTION__, name);
// return if not active
if(get_state() != State::Active)
return;
// TRACE_EVENT_END("hosttrace",
// [&](perfetto::EventContext ctx) { PRINT_HERE("executing %s", name); });
TRACE_EVENT_END("hosttrace");
if(get_state() == State::Active)
{
HOSTTRACE_DEBUG("[%s] %s\n", __FUNCTION__, name);
get_functors().second(name);
}
else
{
HOSTTRACE_DEBUG("[%s] %s :: not active\n", __FUNCTION__, name);
}
}
void hosttrace_trace_init(const char*, bool, const char*)
{
if(get_debug())
fprintf(stderr, "[%s]\n", __FUNCTION__);
HOSTTRACE_DEBUG("[%s]\n", __FUNCTION__);
hosttrace_init_perfetto();
}
void hosttrace_trace_finalize(void)
{
if(get_debug())
fprintf(stderr, "[%s]\n", __FUNCTION__);
// return if not active
if(get_state() != State::Active)
return;
puts("");
HOSTTRACE_DEBUG("[%s]\n", __FUNCTION__);
if(tim::dmp::rank() == 0)
puts("");
get_state() = State::Finalized;
if(get_fork_gotcha())
if(get_main_bundle())
{
get_fork_gotcha()->stop();
std::cout << *get_fork_gotcha() << std::endl;
get_fork_gotcha().reset();
get_main_bundle()->stop();
int64_t _id = (get_use_mpi()) ? tim::dmp::rank() : tim::process::get_id();
std::stringstream _ss{};
_ss << "[" << __FUNCTION__ << "][" << _id << "] " << *get_main_bundle()
<< "\n";
std::cout << _ss.str();
get_main_bundle().reset();
}
if(!is_system_backend())
// ensure that all the MT instances are flushed
for(auto& itr : hosttrace_timemory_data::instances())
{
while(!itr.bundles.empty())
{
itr.bundles.back()->stop();
itr.bundles.back()->pop();
itr.allocator.destroy(itr.bundles.back());
itr.allocator.deallocate(itr.bundles.back(), 1);
itr.bundles.pop_back();
}
}
if(get_use_perfetto() && !is_system_backend())
{
// Make sure the last event is closed for this example.
perfetto::TrackEvent::Flush();
auto& tracing_session = get_trace_session();
tracing_session->StopBlocking();
std::vector<char> trace_data{ tracing_session->ReadTraceBlocking() };
if(trace_data.empty())
{
fprintf(stderr,
"[%s]> trace data is empty. File '%s' will not be written...\n",
__FUNCTION__, get_output_filename().c_str());
__FUNCTION__, get_perfetto_output_filename().c_str());
return;
}
// Write the trace into a file.
fprintf(stderr, "[%s]> Outputting '%s'. Trace data: %lu bytes...\n",
__FUNCTION__, get_output_filename().c_str(),
__FUNCTION__, get_perfetto_output_filename().c_str(),
(unsigned long) trace_data.size());
std::ofstream output{};
output.open(get_output_filename(), std::ios::out | std::ios::binary);
output.open(get_perfetto_output_filename(), std::ios::out | std::ios::binary);
if(!output)
fprintf(stderr, "[%s]> Error opening '%s'...\n", __FUNCTION__,
get_output_filename().c_str());
get_perfetto_output_filename().c_str());
else
output.write(&trace_data[0], trace_data.size());
output.close();
@@ -364,26 +452,33 @@ extern "C"
void hosttrace_trace_set_env(const char* env_name, const char* env_val)
{
if(get_debug())
fprintf(stderr, "[%s] Setting env: %s=%s\n", __FUNCTION__, env_name, env_val);
HOSTTRACE_DEBUG("[%s] Setting env: %s=%s\n", __FUNCTION__, env_name, env_val);
tim::set_env(env_name, env_val, 0);
}
void hosttrace_trace_set_mpi(bool use, bool attached)
{
HOSTTRACE_DEBUG("[%s] use: %s, attached: %s\n", __FUNCTION__, (use) ? "y" : "n",
(attached) ? "y" : "n");
if(use && !attached)
{
auto& _fork_gotcha = get_main_bundle();
_fork_gotcha->start();
tim::set_env("HOSTTRACE_USE_MPI", "ON", 1);
get_use_mpi() = true;
get_state() = State::DelayedInit;
}
}
}
void
fork_gotcha::audit(const gotcha_data_t& _data, audit::incoming)
std::unique_ptr<hosttrace_bundle_t>&
get_main_bundle()
{
PRINT_HERE("%s",
"Warning! Calling fork() within an OpenMPI application using libfabric "
"may result is segmentation fault");
TIMEMORY_CONDITIONAL_DEMANGLED_BACKTRACE(get_debug(), 16);
}
void
fork_gotcha::audit(const gotcha_data_t& _data, audit::outgoing, pid_t _pid)
{
PRINT_HERE("%s() return PID %i", _data.tool_id.c_str(), (int) _pid);
static auto _v =
(setup_gotchas(), std::make_unique<hosttrace_bundle_t>(
"hosttrace", quirk::config<quirk::auto_start>{}));
return _v;
}
namespace
@@ -393,10 +488,3 @@ namespace
// but static variable in hosttrace_init_perfetto is more likely
auto _ensure_finalization = ensure_finalization();
} // namespace
PERFETTO_TRACK_EVENT_STATIC_STORAGE();
TIMEMORY_INITIALIZE_STORAGE(fork_gotcha)
#if defined(CUSTOM_DATA_SOURCE)
PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(CustomDataSource);
#endif