// MIT License // // Copyright (c) 2020, The Regents of the University of California, // through Lawrence Berkeley National Laboratory (subject to receipt of any // required approvals from the U.S. Dept. of Energy). All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in all // copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. // #include "hosttrace.hpp" #include #include static bool is_driver = false; static bool allow_overlapping = false; static bool instr_dynamic_callsites = false; static size_t batch_size = 50; static strset_t extra_libs = {}; static size_t min_address_range = (1 << 9); // 512 static size_t min_loop_address_range = (1 << 9); // 512 static std::vector> hash_ids = {}; static std::map use_stubs = {}; static std::map beg_stubs = {}; static std::map end_stubs = {}; static strvec_t init_stub_names = {}; static strvec_t fini_stub_names = {}; static strset_t used_stub_names = {}; static std::vector env_variables = {}; static std::map beg_expr = {}; static std::map end_expr = {}; static const auto npos_v = string_t::npos; static string_t instr_mode = "trace"; static string_t instr_push_func = "hosttrace_push_trace"; static string_t instr_pop_func = "hosttrace_pop_trace"; static string_t instr_push_hash = "hosttrace_push_trace_hash"; static string_t instr_pop_hash = "hosttrace_pop_trace_hash"; static string_t print_instrumented = {}; static string_t print_available = {}; static string_t print_overlapping = {}; static std::string modfunc_dump_dir = "hosttrace-module-functions"; std::string get_absolute_exe_filepath(std::string exe_name); std::string get_absolute_lib_filepath(std::string lib_name); //======================================================================================// // // entry point // //======================================================================================// // int main(int argc, char** argv) { #if defined(DYNINST_API_RT) auto _dyn_api_rt_paths = tim::delimit(DYNINST_API_RT, ":"); #else auto _dyn_api_rt_paths = std::vector{}; #endif auto _dyn_api_rt_abs = get_absolute_lib_filepath("libdyninstAPI_RT.so"); _dyn_api_rt_paths.insert(_dyn_api_rt_paths.begin(), _dyn_api_rt_abs); for(auto&& itr : _dyn_api_rt_paths) { auto file_exists = [](const std::string& _fname) { struct stat _buffer; if(stat(_fname.c_str(), &_buffer) == 0) return (S_ISREG(_buffer.st_mode) != 0 || S_ISLNK(_buffer.st_mode) != 0); return false; }; if(file_exists(itr)) tim::set_env("DYNINSTAPI_RT_LIB", itr, 0); else if(file_exists(TIMEMORY_JOIN('/', itr, "libdyninstAPI_RT.so"))) tim::set_env("DYNINSTAPI_RT_LIB", TIMEMORY_JOIN('/', itr, "libdyninstAPI_RT.so"), 0); else if(file_exists(TIMEMORY_JOIN('/', itr, "libdyninstAPI_RT.a"))) tim::set_env("DYNINSTAPI_RT_LIB", TIMEMORY_JOIN('/', itr, "libdyninstAPI_RT.a"), 0); } verbprintf(0, "[hosttrace] DYNINST_API_RT: %s\n", tim::get_env("DYNINSTAPI_RT_LIB", "").c_str()); argv0 = argv[0]; bpatch = std::make_shared(); bool is_attached = false; address_space_t* addr_space = nullptr; string_t mutname = {}; string_t outfile = {}; std::vector inputlib = { "" }; std::vector libname = {}; std::vector sharedlibname = {}; std::vector staticlibname = {}; tim::process::id_t _pid = -1; bpatch->setTypeChecking(true); bpatch->setSaveFPR(true); bpatch->setDelayedParsing(true); bpatch->setInstrStackFrames(true); bpatch->setLivenessAnalysis(false); bpatch->setBaseTrampDeletion(false); bpatch->setTrampRecursive(false); bpatch->setMergeTramp(false); std::set dyninst_defs = { "TypeChecking", "SaveFPR", "DelayedParsing", "InstrStackFrames" }; int _argc = argc; int _cmdc = 0; char** _argv = new char*[_argc]; char** _cmdv = nullptr; for(int i = 0; i < argc; ++i) _argv[i] = nullptr; auto copy_str = [](char*& _dst, const char* _src) { _dst = strdup(_src); }; copy_str(_argv[0], argv[0]); for(int i = 1; i < argc; ++i) { string_t _arg = argv[i]; if(_arg.length() == 2 && _arg == "--") { _argc = i; _cmdc = argc - i - 1; _cmdv = new char*[_cmdc + 1]; _cmdv[_cmdc] = nullptr; int k = 0; for(int j = i + 1; j < argc; ++j, ++k) { copy_str(_cmdv[k], argv[j]); } mutname = _cmdv[0]; break; } else { copy_str(_argv[i], argv[i]); } } auto cmd_string = [](int _ac, char** _av) { stringstream_t ss; for(int i = 0; i < _ac; ++i) ss << _av[i] << " "; return ss.str(); }; if(_cmdc > 0 && !mutname.empty()) { auto resolved_mutname = get_absolute_exe_filepath(mutname); if(resolved_mutname != mutname) { mutname = resolved_mutname; delete _cmdv[0]; copy_str(_cmdv[0], resolved_mutname.c_str()); } } if(verbose_level > 1) { std::cout << "[hosttrace][original]: " << cmd_string(argc, argv) << std::endl; std::cout << "[hosttrace][cfg-args]: " << cmd_string(_argc, _argv) << std::endl; } if(_cmdc > 0) std::cout << "\n[hosttrace][command]: " << cmd_string(_cmdc, _cmdv) << "\n\n"; if(_cmdc > 0) cmdv0 = _cmdv[0]; std::stringstream jump_description; jump_description << "Instrument with function pointers in HOSTTRACE_JUMP_LIBRARY (default: " << tim::get_env("HOSTTRACE_JUMP_LIBRARY", "jump/libhosttrace.so") << ")"; // now can loop through the options. If the first character is '-', then we know // we have an option. Check to see if it is one of our options and process it. If // it is unrecognized, then set the errflag to report an error. When we come to a // non '-' charcter, then we must be at the application name. using parser_t = tim::argparse::argument_parser; parser_t parser("hosttrace"); parser.enable_help(); parser.add_argument() .names({ "-v", "--verbose" }) .description("Verbose output") .max_count(1); parser.add_argument().names({ "--debug" }).description("Debug output").count(0); parser.add_argument() .names({ "-e", "--error" }) .description("All warnings produce runtime errors") .count(0); parser.add_argument() .names({ "-o", "--output" }) .description("Enable binary-rewrite to new executable") .count(1); parser.add_argument() .names({ "-I", "-R", "--function-include" }) .description("Regex for selecting functions"); parser.add_argument() .names({ "-E", "--function-exclude" }) .description("Regex for excluding functions"); parser.add_argument() .names({ "-MI", "-MR", "--module-include" }) .description("Regex for selecting modules/files/libraries"); parser.add_argument() .names({ "-ME", "--module-exclude" }) .description("Regex for excluding modules/files/libraries"); parser.add_argument() .names({ "-m", "--main-function" }) .description("The primary function to instrument around, e.g. 'main'") .count(1); parser.add_argument() .names({ "-l", "--instrument-loops" }) .description("Instrument at the loop level") .count(0); parser.add_argument() .names({ "-j", "--jump" }) .description(jump_description.str()) .count(0); parser.add_argument() .names({ "-s", "--stubs" }) .description("Instrument with library stubs for LD_PRELOAD") .count(0); parser.add_argument() .names({ "-L", "--library" }) .description( "Libraries with instrumentation routines (default: \"libhosttrace\")"); parser.add_argument() .names({ "-S", "--stdlib" }) .description("Enable instrumentation of C++ standard library functions.") .count(0); parser.add_argument() .names({ "--cstdlib" }) .description("Enable instrumentation of C standard library functions.") .count(0); parser.add_argument() .names({ "-p", "--pid" }) .description("Connect to running process") .count(1); parser.add_argument() .names({ "-d", "--default-components" }) .description("Default components to instrument (only useful when timemory is " "enabled in hosttrace library)"); parser.add_argument() .names({ "-M", "--mode" }) .description("Instrumentation mode. 'trace' mode is immutable, 'region' mode is " "mutable by hosttrace library interface") .choices({ "trace", "region" }) .count(1); parser.add_argument() .names({ "--env" }) .description("Environment variables to add to the runtime in form " "VARIABLE=VALUE. E.g. use '--env HOSTTRACE_USE_TIMEMORY=ON' to " "default to using timemory instead of perfetto"); parser.add_argument() .names({ "--prefer" }) .description("Prefer this library types when available") .choices({ "shared", "static" }) .count(1); parser.add_argument({ "--driver" }, "Force main or _init/_fini instrumentation") .count(0) .action([](auto&) { is_driver = true; }); parser .add_argument({ "--mpi" }, "Enable MPI support (requires hosttrace built w/ MPI and GOTCHA " "support). NOTE: this will automatically be activated if " "MPI_Init/MPI_Init_thread and MPI_Finalize are found in the symbol " "table of target") .count(0); parser.add_argument({ "--label" }, "Labeling info for functions") .choices({ "file", "line", "return", "args" }); parser.add_argument({ "--load" }, "Supplemental instrumentation library names w/o extension (e.g. " "'libinstr' for 'libinstr.so' or 'libinstr.a')"); parser.add_argument( { "--init-functions" }, "Initialization function(s) for supplemental instrumentation libraries"); parser.add_argument( { "--fini-functions" }, "Finalization function(s) for supplemental instrumentation libraries"); parser .add_argument( { "-b", "--batch-size" }, "Dyninst supports batch insertion of multiple points. If one large batch " "insertion fails, this value will be used to create smaller batches") .count(1) .dtype("size_t") .action([](parser_t& p) { batch_size = p.get("batch-size"); }); parser .add_argument({ "--dynamic-callsites" }, "Force instrumentation if a function has dynamic callsites (e.g. " "function pointers)") .max_count(1) .dtype("boolean") .action([](parser_t& p) { instr_dynamic_callsites = p.get("dynamic-callsites"); }); parser .add_argument({ "-r", "--min-address-range" }, "If the address range of a function is less than this value, " "exclude it from instrumentation") .count(1) .dtype("size_t") .set_default(min_address_range) .action( [](parser_t& p) { min_address_range = p.get("min-address-range"); }); parser .add_argument({ "--min-address-range-loop" }, "If the address range of a function containing a loop is less than " "this value, " "exclude it from instrumentation") .count(1) .dtype("size_t") .set_default(min_loop_address_range) .action([](parser_t& p) { min_loop_address_range = p.get("min-address-range-loop"); }); parser.add_argument() .names({ "--allow-overlapping" }) .description( "Allow dyninst to instrument either multiple functions which overlap (share " "part of same function body) or single functions with multiple entry points. " "For more info, see Section 2 of the DyninstAPI documentation.") .count(0) .action([](parser_t&) { allow_overlapping = true; }); parser .add_argument( { "--print-dir" }, "Output directory for diagnostic available/instrumented/overlapping module " "function lists, e.g. {print-dir}/available.txt") .count(1) .dtype("string") .set_default(modfunc_dump_dir) .action([](parser_t& p) { modfunc_dump_dir = p.get("print-dir"); }); parser .add_argument( { "--print-instrumented" }, "Print the instrumented entities (functions, modules, or module-function " "pair) to stdout after applying regular expressions and exit") .count(1) .choices({ "functions", "modules", "functions+", "pair", "pair+" }) .action([](parser_t& p) { print_instrumented = p.get("print-instrumented"); }); parser .add_argument( { "--print-available" }, "Print the available entities for instrumentation (functions, modules, or " "module-function pair) to stdout applying regular expressions and exit") .count(1) .choices({ "functions", "modules", "functions+", "pair", "pair+" }) .action( [](parser_t& p) { print_available = p.get("print-available"); }); parser .add_argument( { "--print-overlapping" }, "Print the entities for instrumentation (functions, modules, or " "module-function pair) which overlap other function calls or have multiple " "entry points to stdout applying regular expressions and exit") .count(1) .choices({ "functions", "modules", "functions+", "pair", "pair+" }) .action([](parser_t& p) { print_overlapping = p.get("print-overlapping"); }); if(_cmdc == 0) { parser.add_argument() .names({ "-c", "--command" }) .description("Input executable and arguments (if '-- ' not provided)") .count(1); } parser .add_argument({ "--dyninst-options" }, "Advanced dyninst options: BPatch::set