From 90ff7188f81be23fc243bb79e902315d95622a9a Mon Sep 17 00:00:00 2001 From: "Jonathan R. Madsen" Date: Wed, 21 Sep 2022 13:58:14 -0500 Subject: [PATCH] Crusher hackathon updates (#164) - improved error handling in dyninst - improved error handling in omnitrace exe - new logging facility for omnitrace exe - improved backtraces - disable concurrent kernels in rocprofiler - updates `setup-env.sh` and modulefile - set `omnitrace_ROOT` - set `HSA_TOOLS_LIB` if roctracer or rocprofiler enabled - set `ROCP_TOOL_LIB` if rocprofiler enabled - closes #163 - No longer make setting `HSA_ENABLE_INTERRUPT=0` the default - this has performance implications - this was set to workaround a bug in ROCR which caused an ioctl call in ROCm to hang when interrupted. But it was only interrupted when realtime sampling was enabled since the CPU-clock doesn't increment when waiting - This bug should be fixed in ROCm 5.3 - omnitrace no longer activates a realtime sampler by default when sampling, thus this bug is no longer encountered unless the user explicitly triggers realtime sampling --- CMakeLists.txt | 8 + cmake/Templates/modulefile.in | 6 + cmake/Templates/setup-env.sh.in | 13 ++ external/dyninst | 2 +- external/timemory | 2 +- source/bin/omnitrace/CMakeLists.txt | 4 +- source/bin/omnitrace/details.cpp | 129 ++++++++++---- source/bin/omnitrace/fwd.hpp | 110 +++++++----- source/bin/omnitrace/log.cpp | 123 +++++++++++++ source/bin/omnitrace/log.hpp | 91 ++++++++++ source/bin/omnitrace/module_function.cpp | 44 ++++- source/bin/omnitrace/module_function.hpp | 58 +++--- source/bin/omnitrace/omnitrace.cpp | 167 ++++++++++++++---- source/bin/omnitrace/omnitrace.hpp | 101 +++++++---- source/bin/tests/CMakeLists.txt | 21 ++- source/lib/common/setup.hpp | 8 - source/lib/omnitrace-user/CMakeLists.txt | 7 +- source/lib/omnitrace-user/omnitrace/types.h | 88 +++++++++ source/lib/omnitrace-user/omnitrace/user.h | 67 +------ source/lib/omnitrace-user/user.cpp | 2 + source/lib/omnitrace/library.cpp | 6 +- .../library/components/backtrace.cpp | 2 +- .../library/components/backtrace_metrics.cpp | 2 +- .../components/backtrace_timestamp.cpp | 2 +- .../components/pthread_create_gotcha.cpp | 4 +- .../components/pthread_mutex_gotcha.cpp | 2 +- source/lib/omnitrace/library/cpu_freq.cpp | 4 +- source/lib/omnitrace/library/debug.hpp | 33 ++-- source/lib/omnitrace/library/rocm.cpp | 2 +- source/lib/omnitrace/library/rocm_smi.cpp | 2 +- source/lib/omnitrace/library/sampling.cpp | 14 +- source/lib/omnitrace/library/thread_data.hpp | 108 ++++++----- source/lib/omnitrace/library/thread_info.cpp | 16 +- source/lib/omnitrace/library/thread_info.hpp | 25 ++- 34 files changed, 932 insertions(+), 341 deletions(-) create mode 100644 source/bin/omnitrace/log.cpp create mode 100644 source/bin/omnitrace/log.hpp create mode 100644 source/lib/omnitrace-user/omnitrace/types.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 3f061bf20e..3bc4dd2e31 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -270,6 +270,14 @@ add_subdirectory(source) # # ------------------------------------------------------------------------------# +if(NOT OMNITRACE_USE_ROCTRACER AND NOT OMNITRACE_USE_ROCPROFILER) + set(OMNITRACE_HSA_ENV "# ") +endif() + +if(NOT OMNITRACE_USE_ROCPROFILER) + set(OMNITRACE_ROCP_ENV "# ") +endif() + configure_file( ${PROJECT_SOURCE_DIR}/omnitrace.cfg ${PROJECT_BINARY_DIR}/${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/omnitrace.cfg diff --git a/cmake/Templates/modulefile.in b/cmake/Templates/modulefile.in index 3aa91e94ed..8bed058ec1 100644 --- a/cmake/Templates/modulefile.in +++ b/cmake/Templates/modulefile.in @@ -8,8 +8,14 @@ puts stderr "Loads @PROJECT_NAME@ v@PROJECT_VERSION@" set ROOT [file normalize [file dirname [file normalize ${ModulesCurrentModulefile}]]/../../..] +setenv @PROJECT_NAME@_ROOT "${ROOT}" prepend-path CMAKE_PREFIX_PATH "${ROOT}" prepend-path PATH "${ROOT}/bin" prepend-path LD_LIBRARY_PATH "${ROOT}/@CMAKE_INSTALL_LIBDIR@" prepend-path PYTHONPATH "${ROOT}/@CMAKE_INSTALL_PYTHONDIR@" setenv @PROJECT_NAME@_DIR "${ROOT}/@CMAKE_INSTALL_DATAROOTDIR@/cmake/@PROJECT_NAME@" + +@OMNITRACE_HSA_ENV@setenv HSA_TOOLS_LIB "${ROOT}/@CMAKE_INSTALL_LIBDIR@/@CMAKE_SHARED_LIBRARY_PREFIX@omnitrace@CMAKE_SHARED_LIBRARY_SUFFIX@" +@OMNITRACE_HSA_ENV@setenv HSA_TOOLS_REPORT_LOAD_FAILURE 1 +@OMNITRACE_ROCP_ENV@setenv ROCP_TOOL_LIB "${ROOT}/@CMAKE_INSTALL_LIBDIR@/@CMAKE_SHARED_LIBRARY_PREFIX@omnitrace@CMAKE_SHARED_LIBRARY_SUFFIX@" +@OMNITRACE_ROCP_ENV@setenv ROCP_HSA_INTERCEPT 1 diff --git a/cmake/Templates/setup-env.sh.in b/cmake/Templates/setup-env.sh.in index 6389e4f2fe..6c7b96fca7 100644 --- a/cmake/Templates/setup-env.sh.in +++ b/cmake/Templates/setup-env.sh.in @@ -8,14 +8,27 @@ if [ ! -d "${BASEDIR}" ]; then return 1 fi +@PROJECT_NAME@_ROOT=${BASEDIR} PATH=${BASEDIR}/bin:${PATH} LD_LIBRARY_PATH=${BASEDIR}/@CMAKE_INSTALL_LIBDIR@:${LD_LIBRARY_PATH} PYTHONPATH=${BASEDIR}/@CMAKE_INSTALL_PYTHONDIR@:${PYTHONPATH} CMAKE_PREFIX_PATH=${BASEDIR}:${CMAKE_PREFIX_PATH} @PROJECT_NAME@_DIR=${BASEDIR}/@CMAKE_INSTALL_DATAROOTDIR@/cmake/@PROJECT_NAME@ +export @PROJECT_NAME@_ROOT export PATH export LD_LIBRARY_PATH export PYTHONPATH export CMAKE_PREFIX_PATH export @PROJECT_NAME@_DIR + +# ROCm environment variables +@OMNITRACE_HSA_ENV@HSA_TOOLS_LIB="${BASEDIR}/@CMAKE_INSTALL_LIBDIR@/@CMAKE_SHARED_LIBRARY_PREFIX@omnitrace@CMAKE_SHARED_LIBRARY_SUFFIX@" +@OMNITRACE_HSA_ENV@HSA_TOOLS_REPORT_LOAD_FAILURE=1 +@OMNITRACE_ROCP_ENV@ROCP_TOOL_LIB="${BASEDIR}/@CMAKE_INSTALL_LIBDIR@/@CMAKE_SHARED_LIBRARY_PREFIX@omnitrace@CMAKE_SHARED_LIBRARY_SUFFIX@" +@OMNITRACE_ROCP_ENV@ROCP_HSA_INTERCEPT=1 + +@OMNITRACE_HSA_ENV@export HSA_TOOLS_LIB +@OMNITRACE_HSA_ENV@export HSA_TOOLS_REPORT_LOAD_FAILURE +@OMNITRACE_ROCP_ENV@export ROCP_TOOL_LIB +@OMNITRACE_ROCP_ENV@export ROCP_HSA_INTERCEPT diff --git a/external/dyninst b/external/dyninst index d380d57aec..280f39610e 160000 --- a/external/dyninst +++ b/external/dyninst @@ -1 +1 @@ -Subproject commit d380d57aec27a6b5e44e0301e6f46f6996160d1b +Subproject commit 280f39610e25dfcf364091571b292fcb8fed8be2 diff --git a/external/timemory b/external/timemory index cb8fc16dd6..45fdc98ed9 160000 --- a/external/timemory +++ b/external/timemory @@ -1 +1 @@ -Subproject commit cb8fc16dd68a649ac0b2855399165d17b90d6e10 +Subproject commit 45fdc98ed93d6d699ba246cb39ff27c392aa8e00 diff --git a/source/bin/omnitrace/CMakeLists.txt b/source/bin/omnitrace/CMakeLists.txt index 049557271f..1f4460022b 100644 --- a/source/bin/omnitrace/CMakeLists.txt +++ b/source/bin/omnitrace/CMakeLists.txt @@ -13,11 +13,13 @@ target_sources( PRIVATE ${CMAKE_CURRENT_LIST_DIR}/omnitrace.cpp ${CMAKE_CURRENT_LIST_DIR}/details.cpp ${CMAKE_CURRENT_LIST_DIR}/function_signature.cpp + ${CMAKE_CURRENT_LIST_DIR}/log.cpp ${CMAKE_CURRENT_LIST_DIR}/module_function.cpp ${CMAKE_CURRENT_LIST_DIR}/omnitrace.hpp - ${CMAKE_CURRENT_LIST_DIR}/info.hpp ${CMAKE_CURRENT_LIST_DIR}/fwd.hpp ${CMAKE_CURRENT_LIST_DIR}/function_signature.hpp + ${CMAKE_CURRENT_LIST_DIR}/info.hpp + ${CMAKE_CURRENT_LIST_DIR}/log.hpp ${CMAKE_CURRENT_LIST_DIR}/module_function.hpp) target_link_libraries( diff --git a/source/bin/omnitrace/details.cpp b/source/bin/omnitrace/details.cpp index 9596bd0c65..0c4bd0c0f8 100644 --- a/source/bin/omnitrace/details.cpp +++ b/source/bin/omnitrace/details.cpp @@ -22,6 +22,7 @@ #include "function_signature.hpp" #include "fwd.hpp" +#include "log.hpp" #include "omnitrace.hpp" #include @@ -148,6 +149,8 @@ get_func_file_line_info(module_t* module, procedure_t* func) { using address_t = Dyninst::Address; + OMNITRACE_ADD_LOG_ENTRY("Getting function line info for", get_name(func)); + auto _file_name = get_name(module); auto _func_name = get_name(func); auto _return_type = get_return_type(func); @@ -179,6 +182,8 @@ function_signature get_loop_file_line_info(module_t* module, procedure_t* func, flow_graph_t*, basic_loop_t* loopToInstrument) { + OMNITRACE_ADD_LOG_ENTRY("Getting loop line info for", get_name(func)); + auto basic_blocks = std::vector{}; loopToInstrument->getLoopBasicBlocksExclusive(basic_blocks); @@ -271,6 +276,8 @@ get_basic_block_file_line_info(module_t* module, procedure_t* func) std::map _data{}; if(!func) return _data; + OMNITRACE_ADD_LOG_ENTRY("Getting basic block line info for", get_name(func)); + auto* _cfg = func->getCFG(); auto _basic_blocks = std::set{}; _cfg->getAllBasicBlocks(_basic_blocks); @@ -347,6 +354,8 @@ get_basic_block_file_line_info(module_t* module, procedure_t* func) std::vector get_source_code(module_t* module, procedure_t* func) { + OMNITRACE_ADD_LOG_ENTRY("Getting source code for", get_name(func)); + std::vector _lines{}; if(!module || !func) return _lines; auto* _cfg = func->getCFG(); @@ -370,26 +379,6 @@ get_source_code(module_t* module, procedure_t* func) return _lines; } -//======================================================================================// -// -// Error callback routine. -// -void -errorFunc(error_level_t level, int num, const char** params) -{ - char line[256]; - - const char* msg = bpatch->getEnglishErrorString(num); - bpatch->formatErrorString(line, sizeof(line), msg, params); - - if(num != expect_error) - { - printf("Error #%d (level %d): %s\n", num, level, line); - // We consider some errors fatal. - if(num == 101) exit(-1); - } -} - //======================================================================================// // // For compatibility purposes @@ -427,11 +416,30 @@ find_function(image_t* app_image, const std::string& _name, const strset_t& _ext return _func; } +//======================================================================================// +// +// Error callback routine. +// +void +errorFunc(error_level_t level, int num, const char** params) +{ + error_func_real(level, num, params); +} + //======================================================================================// // void error_func_real(error_level_t level, int num, const char* const* params) { + char line[4096]; + + const char* msg = bpatch->getEnglishErrorString(num); + bpatch->formatErrorString(line, sizeof(line), msg, params); + + OMNITRACE_ADD_LOG_ENTRY("Dyninst error function called with level", level, + ":: ID# =", num, "::", line) + .force(level < BPatchInfo); + if(num == 0) { // conditional reporting of warnings and informational messages @@ -439,34 +447,95 @@ error_func_real(error_level_t level, int num, const char* const* params) { if(level == BPatchInfo) { - if(error_print > 1) printf("%s\n", params[0]); + errprintf(2, "%s :: %i :: %s\n%s", std::to_string(level).c_str(), num, + line, tim::log::color::end()); } else - printf("%s", params[0]); + { + verbprintf(0, "%s :: %i :: %s\n%s", std::to_string(level).c_str(), num, + line, tim::log::color::end()); + } } } else { // reporting of actual errors - char line[256]; - const char* msg = bpatch->getEnglishErrorString(num); - bpatch->formatErrorString(line, sizeof(line), msg, params); if(num != expect_error) { - printf("Error #%d (level %d): %s\n", num, level, line); + verbprintf(-1, "%s :: %i :: %s\n%s", std::to_string(level).c_str(), num, line, + tim::log::color::end()); // We consider some errors fatal. - if(num == 101) exit(-1); + if(num == 101) throw std::runtime_error(msg); } } } //======================================================================================// // -// We've a null error function when we don't want to display an error +// Just log it // void error_func_fake(error_level_t level, int num, const char* const* params) { - consume_parameters(level, num, params); - // It does nothing. + char line[4096]; + + const char* msg = bpatch->getEnglishErrorString(num); + bpatch->formatErrorString(line, sizeof(line), msg, params); + + // just log it + OMNITRACE_ADD_LOG_ENTRY("Dyninst error function called with level", level, + ":: ID# =", num, "::", line) + .force(level < BPatchInfo); } + +namespace std +{ +std::string +to_string(instruction_category_t _category) +{ + using namespace Dyninst::InstructionAPI; + switch(_category) + { + case c_CallInsn: return "function_call"; + case c_ReturnInsn: return "return"; + case c_BranchInsn: return "branch"; + case c_CompareInsn: return "compare"; + case c_PrefetchInsn: return "prefetch"; + case c_SysEnterInsn: return "sys_enter"; + case c_SyscallInsn: return "sys_call"; + case c_VectorInsn: return "vector"; + case c_GPUKernelExitInsn: return "gpu_kernel_exit"; + case c_NoCategory: return "no_category"; + } + return std::string{ "unknown_category_id_" } + + std::to_string(static_cast(_category)); +} + +std::string +to_string(error_level_t _level) +{ + switch(_level) + { + case BPatchFatal: + { + return JOIN("", tim::log::color::fatal(), "FatalError"); + } + case BPatchSerious: + { + return JOIN("", tim::log::color::fatal(), "SeriousError"); + } + case BPatchWarning: + { + return JOIN("", tim::log::color::warning(), "Warning"); + } + case BPatchInfo: + { + return JOIN("", tim::log::color::info(), "Info"); + } + default: break; + } + + return JOIN("", tim::log::color::warning(), "UnknownErrorLevel", + static_cast(_level)); +} +} // namespace std diff --git a/source/bin/omnitrace/fwd.hpp b/source/bin/omnitrace/fwd.hpp index 43b7c9e5db..c65a39f3b3 100644 --- a/source/bin/omnitrace/fwd.hpp +++ b/source/bin/omnitrace/fwd.hpp @@ -22,6 +22,8 @@ #pragma once +#include "log.hpp" + #include #include #include @@ -48,6 +50,7 @@ #include #include #include +#include #include #include #include @@ -88,48 +91,49 @@ struct module_function; template using bpvector_t = BPatch_Vector; -using string_t = std::string; -using string_view_t = std::string_view; -using stringstream_t = std::stringstream; -using strvec_t = std::vector; -using strset_t = std::set; -using regexvec_t = std::vector; -using fmodset_t = std::set; -using fixed_modset_t = std::map; -using exec_callback_t = BPatchExecCallback; -using exit_callback_t = BPatchExitCallback; -using fork_callback_t = BPatchForkCallback; -using patch_t = BPatch; -using process_t = BPatch_process; -using thread_t = BPatch_thread; -using binary_edit_t = BPatch_binaryEdit; -using image_t = BPatch_image; -using module_t = BPatch_module; -using procedure_t = BPatch_function; -using snippet_t = BPatch_snippet; -using call_expr_t = BPatch_funcCallExpr; -using address_space_t = BPatch_addressSpace; -using flow_graph_t = BPatch_flowGraph; -using statement_t = BPatch_statement; -using basic_block_t = BPatch_basicBlock; -using basic_loop_t = BPatch_basicBlockLoop; -using procedure_loc_t = BPatch_procedureLocation; -using point_t = BPatch_point; -using object_t = BPatch_object; -using local_var_t = BPatch_localVar; -using sequence_t = BPatch_sequence; -using const_expr_t = BPatch_constExpr; -using error_level_t = BPatchErrorLevel; -using snippet_handle_t = BPatchSnippetHandle; -using patch_pointer_t = std::shared_ptr; -using snippet_pointer_t = std::shared_ptr; -using call_expr_pointer_t = std::shared_ptr; -using snippet_vec_t = bpvector_t; -using procedure_vec_t = bpvector_t; -using basic_block_set_t = std::set; -using basic_loop_vec_t = bpvector_t; -using snippet_pointer_vec_t = std::vector; -using instruction_t = Dyninst::InstructionAPI::Instruction; +using string_t = std::string; +using string_view_t = std::string_view; +using stringstream_t = std::stringstream; +using strvec_t = std::vector; +using strset_t = std::set; +using regexvec_t = std::vector; +using fmodset_t = std::set; +using fixed_modset_t = std::map; +using exec_callback_t = BPatchExecCallback; +using exit_callback_t = BPatchExitCallback; +using fork_callback_t = BPatchForkCallback; +using patch_t = BPatch; +using process_t = BPatch_process; +using thread_t = BPatch_thread; +using binary_edit_t = BPatch_binaryEdit; +using image_t = BPatch_image; +using module_t = BPatch_module; +using procedure_t = BPatch_function; +using snippet_t = BPatch_snippet; +using call_expr_t = BPatch_funcCallExpr; +using address_space_t = BPatch_addressSpace; +using flow_graph_t = BPatch_flowGraph; +using statement_t = BPatch_statement; +using basic_block_t = BPatch_basicBlock; +using basic_loop_t = BPatch_basicBlockLoop; +using procedure_loc_t = BPatch_procedureLocation; +using point_t = BPatch_point; +using object_t = BPatch_object; +using local_var_t = BPatch_localVar; +using sequence_t = BPatch_sequence; +using const_expr_t = BPatch_constExpr; +using error_level_t = BPatchErrorLevel; +using snippet_handle_t = BPatchSnippetHandle; +using patch_pointer_t = std::shared_ptr; +using snippet_pointer_t = std::shared_ptr; +using call_expr_pointer_t = std::shared_ptr; +using snippet_vec_t = bpvector_t; +using procedure_vec_t = bpvector_t; +using basic_block_set_t = std::set; +using basic_loop_vec_t = bpvector_t; +using snippet_pointer_vec_t = std::vector; +using instruction_t = Dyninst::InstructionAPI::Instruction; +using instruction_category_t = Dyninst::InstructionAPI::InsnCategory; void omnitrace_prefork_callback(thread_t* parent, thread_t* child); @@ -172,6 +176,7 @@ extern bool werror; extern bool debug_print; extern bool instr_print; extern int verbose_level; +extern int num_log_entries; // // instrumentation settings // @@ -204,12 +209,18 @@ extern regexvec_t file_exclude; extern regexvec_t file_restrict; extern regexvec_t func_restrict; extern CodeCoverageMode coverage_mode; + +// logging +extern std::unique_ptr log_ofs; // //======================================================================================// // control debug printf statements #define errprintf(LEVEL, ...) \ { \ + char _logmsgbuff[FUNCNAMELEN]; \ + snprintf(_logmsgbuff, FUNCNAMELEN, __VA_ARGS__); \ + OMNITRACE_ADD_LOG_ENTRY(_logmsgbuff); \ if(werror || LEVEL < 0) \ { \ if(debug_print || verbose_level >= LEVEL) \ @@ -229,6 +240,9 @@ extern CodeCoverageMode coverage_mode; // control verbose printf statements #define verbprintf(LEVEL, ...) \ { \ + char _logmsgbuff[FUNCNAMELEN]; \ + snprintf(_logmsgbuff, FUNCNAMELEN, __VA_ARGS__); \ + OMNITRACE_ADD_LOG_ENTRY(_logmsgbuff); \ if(debug_print || verbose_level >= LEVEL) \ fprintf(stdout, "[omnitrace][exe] " __VA_ARGS__); \ fflush(stdout); \ @@ -236,6 +250,9 @@ extern CodeCoverageMode coverage_mode; #define verbprintf_bare(LEVEL, ...) \ { \ + char _logmsgbuff[FUNCNAMELEN]; \ + snprintf(_logmsgbuff, FUNCNAMELEN, __VA_ARGS__); \ + OMNITRACE_ADD_LOG_ENTRY(_logmsgbuff); \ if(debug_print || verbose_level >= LEVEL) fprintf(stdout, __VA_ARGS__); \ fflush(stdout); \ } @@ -289,9 +306,6 @@ bool insert_instr(address_space_t* mutatee, Tp traceFunc, procedure_loc_t traceLoc, basic_block_t* basicBlock, bool allow_traps = instr_traps); -void -errorFunc(error_level_t level, int num, const char** params); - procedure_t* find_function(image_t* appImage, const string_t& functionName, const strset_t& = {}); @@ -306,3 +320,9 @@ get_name(procedure_t* _module); std::string_view get_name(module_t* _module); + +namespace std +{ +std::string to_string(instruction_category_t); +std::string to_string(error_level_t); +} // namespace std diff --git a/source/bin/omnitrace/log.cpp b/source/bin/omnitrace/log.cpp new file mode 100644 index 0000000000..2444d46a2b --- /dev/null +++ b/source/bin/omnitrace/log.cpp @@ -0,0 +1,123 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#include "log.hpp" +#include "fwd.hpp" + +#include +#include +#include +#include + +namespace color = tim::log::color; + +namespace +{ +std::vector log_entries = {}; + +auto +get_color_regex(std::string _v) +{ + auto _p = _v.find("["); + if(_p != std::string::npos) _v.insert(_p, "\\"); + return JOIN("", "\\", _v); +} + +auto _color_regex = std::regex{ JOIN("", "(", get_color_regex(tim::log::color::info()), + "|", get_color_regex(tim::log::color::source()), "|", + get_color_regex(tim::log::color::warning()), "|", + get_color_regex(tim::log::color::fatal()), "|", + get_color_regex(tim::log::color::end()), ")"), + std::regex_constants::optimize }; +} // namespace + +log_entry::log_entry(std::string _msg) +: m_message{ std::move(_msg) } +, m_backtrace{ tim::get_unw_stack<4, 1>() } +{ + if(log_ofs) *log_ofs << as_string("", "", "") << "\n"; +} + +log_entry::log_entry(source_location _loc, std::string _msg) +: m_location{ _loc } +, m_message{ std::move(_msg) } +, m_backtrace{ tim::get_unw_stack<4, 1>() } +{ + if(log_ofs) *log_ofs << as_string("", "", "") << "\n"; +} + +std::string +log_entry::as_string(const char* _color, const char* _src, const char* _end) const +{ + std::stringstream _ss; + if(m_location.function && m_location.file) + { + _ss << "[" << _src << m_location.file << ":" << m_location.line << _end << "][" + << _src << m_location.function << _end << "]"; + } + + bool _remove_color = (strlen(_color) + strlen(_src) + strlen(_end) == 0); + + _ss << " " << _color << std::regex_replace(m_message, std::regex{ "\n" }, " ... ") + << _end; + + return (_remove_color) ? std::regex_replace(_ss.str(), _color_regex, "") : _ss.str(); +} + +log_entry& +log_entry::add_log_entry(log_entry&& _v) +{ + return log_entries.emplace_back(std::move(_v)); +} + +void +print_log_entries(std::ostream& _os, int64_t _count, + std::function _condition, const char* _color, + bool _color_entries) +{ + size_t i0 = (_count < 0) ? 0 : std::max(log_entries.size() - _count, 0); + size_t _w = std::log10(log_entries.size()) + 1; + + if(dynamic_cast(&_os) || + (&_os != &std::cout && &_os != &std::cerr && &_os != &std::clog)) + { + _color = ""; + _color_entries = false; + } + + const char* _end = + (strlen(_color) > 0 || _color_entries) ? tim::log::color::end() : ""; + + // the requested number of log entries + for(size_t i = i0; i < log_entries.size(); ++i) + { + auto& itr = log_entries.at(i); + + if(!_condition || _condition(itr)) + { + _os << "[" << _color << std::setw(_w) << i << "/" << log_entries.size() + << _end << "]" + << ((_color_entries) ? itr.as_string() : itr.as_string("", "", "")) + << "\n"; + } + } +} diff --git a/source/bin/omnitrace/log.hpp b/source/bin/omnitrace/log.hpp new file mode 100644 index 0000000000..e3a36ad391 --- /dev/null +++ b/source/bin/omnitrace/log.hpp @@ -0,0 +1,91 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#pragma once + +#include +#include +#include + +#include +#include +#include +#include + +#if !defined(JOIN) +# define JOIN(...) ::timemory::join::join(__VA_ARGS__) +#endif + +struct log_entry; + +void +print_log_entries(std::ostream& = std::cerr, int64_t _count = 10, + std::function _cond = {}, + const char* _color = tim::log::color::warning(), + bool _color_entries = true); + +struct log_entry +{ + struct source_location + { + const char* function = nullptr; + const char* file = nullptr; + int line = 0; + }; + + log_entry(std::string _msg); + log_entry(source_location _loc, std::string _msg); + + std::string as_string(const char* _color = tim::log::color::info(), + const char* _src = tim::log::color::source(), + const char* _end = tim::log::color::end()) const; + + static log_entry& add_log_entry(log_entry&&); + + log_entry& force(bool _v = true) + { + m_forced = _v; + return *this; + } + + bool forced() const { return m_forced; } + +private: + bool m_forced = false; // if should always be displayed + source_location m_location = {}; + std::string m_message = {}; + tim::unwind::stack<4> m_backtrace = {}; + + friend void print_log_entries(std::ostream&, int64_t, + std::function, const char*, + bool); +}; + +#define OMNITRACE_ADD_LOG_ENTRY(...) \ + log_entry::add_log_entry( \ + { log_entry::source_location{ __FUNCTION__, __FILE__, __LINE__ }, \ + timemory::join::join(' ', __VA_ARGS__) }) + +#define OMNITRACE_ADD_DETAILED_LOG_ENTRY(DELIM, ...) \ + log_entry::add_log_entry( \ + { log_entry::source_location{ __FUNCTION__, __FILE__, __LINE__ }, \ + timemory::join::join(__VA_ARGS__) }) diff --git a/source/bin/omnitrace/module_function.cpp b/source/bin/omnitrace/module_function.cpp index b0f13d02f4..e456e9cefa 100644 --- a/source/bin/omnitrace/module_function.cpp +++ b/source/bin/omnitrace/module_function.cpp @@ -21,7 +21,9 @@ // SOFTWARE. #include "module_function.hpp" +#include "InstructionCategories.h" #include "fwd.hpp" +#include "log.hpp" #include "omnitrace.hpp" #include @@ -58,6 +60,8 @@ module_function::module_function(module_t* mod, procedure_t* proc) , module_name{ get_name(module) } , function_name{ get_name(function) } { + OMNITRACE_ADD_LOG_ENTRY("Adding function", function_name, "from module", module_name); + if(!function->isInstrumentable()) { verbprintf(1, @@ -75,6 +79,10 @@ module_function::module_function(module_t* mod, procedure_t* proc) signature = get_func_file_line_info(module, function); + // make sure all exist + for(int i = 0; i <= instruction_category_t::c_NoCategory; ++i) + instruction_types[static_cast(i)] = 0; + if(function->isInstrumentable()) { // this information is potentially not available and @@ -88,13 +96,39 @@ module_function::module_function(module_t* mod, procedure_t* proc) } instructions.reserve(basic_blocks.size()); + size_t _n = 0; for(const auto& itr : basic_blocks) { - std::vector _instructions{}; - itr->getInstructions(_instructions); - num_instructions += _instructions.size(); - if(debug_print || verbose_level > 3 || instr_print) - instructions.emplace_back(std::move(_instructions)); + std::vector> _instructions{}; + try + { + if(itr->getInstructions(_instructions)) + { + auto _num_instr = _instructions.size(); + num_instructions += _num_instr; + for(auto&& iitr : _instructions) + { + instruction_types[iitr.first.getCategory()] += 1; + } + // num_instructions += _instructions.size(); + if(debug_print || verbose_level > 3 || instr_print) + instructions.emplace_back(std::move(_instructions)); + } + else + { + // on average, the number of instructions is address range / 4 + auto _num_instr = (itr->getEndAddress() - itr->getStartAddress()) / 4; + verbprintf(2, + "No instructions found for basic block %zu in %s. " + "Approximating with %lu...\n", + _n, function_name.c_str(), _num_instr); + num_instructions += _num_instr; + } + } catch(std::runtime_error& _e) + { + errprintf(1, "Dyninst error: %s\n", _e.what()); + } + ++_n; } } } diff --git a/source/bin/omnitrace/module_function.hpp b/source/bin/omnitrace/module_function.hpp index 4ced5b7684..7b307fa0cd 100644 --- a/source/bin/omnitrace/module_function.hpp +++ b/source/bin/omnitrace/module_function.hpp @@ -34,8 +34,11 @@ struct module_function { - using width_t = std::array; - using address_t = Dyninst::Address; + using width_t = std::array; + using address_t = Dyninst::Address; + using instr_addr_pair_t = std::pair; + using str_msg_t = std::tuple; + using str_msg_vec_t = std::vector; static constexpr size_t absolute_max_width = 80; static width_t& get_width(); @@ -86,21 +89,19 @@ struct module_function bool is_address_range_constrained() const; // checks address range constraint bool is_num_instructions_constrained() const; // check # instructions constraint - size_t start_address = 0; - uint64_t address_range = 0; - uint64_t num_instructions = 0; - module_t* module = nullptr; - procedure_t* function = nullptr; - flow_graph_t* flow_graph = nullptr; - string_t module_name = {}; - string_t function_name = {}; - function_signature signature = {}; - basic_block_set_t basic_blocks = {}; - basic_loop_vec_t loop_blocks = {}; - std::vector> instructions = {}; - - using str_msg_t = std::tuple; - using str_msg_vec_t = std::vector; + size_t start_address = 0; + uint64_t address_range = 0; + uint64_t num_instructions = 0; + module_t* module = nullptr; + procedure_t* function = nullptr; + flow_graph_t* flow_graph = nullptr; + string_t module_name = {}; + string_t function_name = {}; + function_signature signature = {}; + basic_block_set_t basic_blocks = {}; + basic_loop_vec_t loop_blocks = {}; + std::map instruction_types = {}; + std::vector> instructions = {}; mutable str_msg_vec_t messages = {}; @@ -211,21 +212,30 @@ module_function::serialize(ArchiveT& ar, const unsigned) cereal::make_nvp("is_loop_num_instructions_constrained", is_loop_num_instructions_constrained())); ar.finishNode(); + ar.setNextName("instruction_breakdown"); + ar.startNode(); + for(auto itr : instruction_types) + ar(cereal::make_nvp(std::to_string(itr.first).c_str(), itr.second)); + ar.finishNode(); // instructions can inflate JSON size so only output when verbosity is increased // above default if(debug_print || verbose_level > 3 || instr_print) { - std::vector> _instructions{}; - _instructions.reserve(instructions.size()); + ar.setNextName("instructions"); + ar.startNode(); + ar.makeArray(); for(auto&& itr : instructions) { - std::vector _subinstr{}; - _subinstr.reserve(itr.size()); + ar.startNode(); for(auto&& iitr : itr) - _subinstr.emplace_back(iitr.format()); - _instructions.emplace_back(std::move(_subinstr)); + { + std::stringstream _addr{}; + _addr << "0x" << std::hex << iitr.second; + ar(cereal::make_nvp(_addr.str().c_str(), iitr.first.format())); + } + ar.finishNode(); } - ar(cereal::make_nvp("instructions", _instructions)); + ar.finishNode(); } } } diff --git a/source/bin/omnitrace/omnitrace.cpp b/source/bin/omnitrace/omnitrace.cpp index f14ec3ef47..fedc4f6880 100644 --- a/source/bin/omnitrace/omnitrace.cpp +++ b/source/bin/omnitrace/omnitrace.cpp @@ -22,13 +22,18 @@ #include "omnitrace.hpp" #include "fwd.hpp" +#include "log.hpp" +#include #include #include +#include #include +#include #include #include #include +#include #include #include @@ -78,6 +83,7 @@ bool instr_print = false; bool simulate = false; bool include_uninstr = false; int verbose_level = tim::get_env("OMNITRACE_VERBOSE_INSTRUMENT", 0); +int num_log_entries = tim::get_env("OMNITRACE_LOG_COUNT", 20); string_t main_fname = "main"; string_t argv0 = {}; string_t cmdv0 = {}; @@ -104,6 +110,8 @@ regexvec_t file_restrict = {}; regexvec_t func_restrict = {}; CodeCoverageMode coverage_mode = CODECOV_NONE; +std::unique_ptr log_ofs = {}; + namespace { bool binary_rewrite = false; @@ -166,6 +174,8 @@ void find_dyn_api_rt(); } // namespace +namespace process = tim::process; + //======================================================================================// // // entry point @@ -175,11 +185,54 @@ find_dyn_api_rt(); int main(int argc, char** argv) { + { + using signal_settings = tim::signal_settings; + using sys_signal = tim::sys_signal; + + // default signals to catch + for(const auto& itr : + { sys_signal::Interrupt, sys_signal::FPE, sys_signal::Stop, sys_signal::Quit, + sys_signal::Illegal, sys_signal::Abort, sys_signal::Bus, + sys_signal::SegFault, sys_signal::FileSize, sys_signal::CPUtime }) + signal_settings::enable(itr); + + auto _exit_action = [](int nsig) { + TIMEMORY_PRINTF_FATAL( + stderr, "omnitrace exited with signal %i :: %s\n", nsig, + signal_settings::str(static_cast(nsig)).c_str()); + + print_log_entries(std::cerr, num_log_entries); + + std::cerr << "\n[omnitrace][exe] Potentially important log entries\n\n"; + + print_log_entries(std::cerr, -1, [](const auto& _v) { return _v.forced(); }); + + TIMEMORY_PRINTF_FATAL(stderr, "\n"); + TIMEMORY_PRINTF_FATAL( + stderr, + "These were the last %i log entries from omnitrace. You can control the " + "number of log entries via the '--log ' option or OMNITRACE_LOG_COUNT " + "env variable.\n"); + + if(log_ofs) log_ofs->close(); + log_ofs.reset(); + + kill(process::get_id(), nsig); + }; + + signal_settings::set_exit_action(_exit_action); + signal_settings::check_environment(); + tim::enable_signal_detection(signal_settings::get_enabled()); + } + argv0 = argv[0]; + OMNITRACE_ADD_LOG_ENTRY(argv[0]); + address_space_t* addr_space = nullptr; string_t mutname = {}; string_t outfile = {}; + string_t logfile = {}; std::vector inputlib = { "libomnitrace-dl" }; std::vector libname = {}; std::vector sharedlibname = {}; @@ -298,6 +351,21 @@ main(int argc, char** argv) debug_print = p.get("debug"); if(debug_print && !p.exists("verbose")) verbose_level = 256; }); + parser + .add_argument({ "--log" }, "Number of log entries to display after an error. Any " + "value < 0 will emit the entire log") + .count(1) + .action([](parser_t& p) { num_log_entries = p.get("log"); }); + parser + .add_argument({ "--log-file" }, + "Write the log out the specified file during the run") + .count(1) + .action([&logfile](parser_t& p) { + auto _oname = p.get("log-file"); + auto _cfg = tim::settings::compose_filename_config{}; + _cfg.subdirectory = "instrumentation"; + logfile = tim::settings::compose_output_filename(_oname, "log", _cfg); + }); parser .add_argument({ "--simulate" }, "Exit after outputting diagnostic " @@ -971,6 +1039,17 @@ main(int argc, char** argv) tim::timemory_init(_cmdc, _cmdv, "omnitrace-"); } + if(!logfile.empty()) + { + log_ofs = std::make_unique(); + verbprintf_bare(0, "%s", ::tim::log::color::source()); + verbprintf(0, "Opening '%s' for log output... ", logfile.c_str()); + if(!tim::filepath::open(*log_ofs, logfile)) + throw std::runtime_error(JOIN(" ", "Error opening log output file", logfile)); + verbprintf_bare(0, "Done\n%s", ::tim::log::color::end()); + print_log_entries(*log_ofs, -1, {}, "", false); + } + //----------------------------------------------------------------------------------// // // REGEX OPTIONS @@ -980,6 +1059,9 @@ main(int argc, char** argv) { // Helper function for adding regex expressions auto add_regex = [](auto& regex_array, const string_t& regex_expr) { + OMNITRACE_ADD_DETAILED_LOG_ENTRY("", "Adding regular expression \"", + regex_expr, "\" to regex_array@", + ®ex_array); if(!regex_expr.empty()) regex_array.emplace_back(std::regex(regex_expr, regex_opts)); }; @@ -1099,26 +1181,6 @@ main(int argc, char** argv) // //----------------------------------------------------------------------------------// - // for runtime instrumentation, we need to set this before the process gets created - if(!binary_rewrite) - { - auto _hsa_val = tim::get_env("HSA_ENABLE_INTERRUPT", 1, false); - tim::set_env("HSA_ENABLE_INTERRUPT", "0", 0); - if(_pid >= 0 && _hsa_val != 0) - { - verbprintf(0, "#-------------------------------------------------------" - "-------------------------------------------#\n"); - verbprintf(0, "\n"); - verbprintf(0, "WARNING! Sampling may result in ioctl() deadlock within " - "the ROCR runtime.\n"); - verbprintf(0, "To avoid this, set HSA_ENABLE_INTERRUPT=0 in the environment " - "before starting your ROCm/HIP application\n"); - verbprintf(0, "\n"); - verbprintf(0, "#-------------------------------------------------------" - "-------------------------------------------#\n"); - } - } - addr_space = omnitrace_get_address_space(bpatch, _cmdc, _cmdv, binary_rewrite, _pid, mutname); @@ -1166,10 +1228,14 @@ main(int argc, char** argv) }; auto _add_overlapping = [](module_t* mitr, procedure_t* pitr) { + OMNITRACE_ADD_LOG_ENTRY("Checking if procedure", get_name(pitr), "in module", + get_name(mitr), "is overlapping"); if(!pitr->isInstrumentable()) return; std::vector _overlapping{}; if(pitr->findOverlapping(_overlapping)) { + OMNITRACE_ADD_LOG_ENTRY("Adding overlapping procedure", get_name(pitr), + "and module", get_name(mitr)); _insert_module_function(overlapping_module_functions, module_function{ mitr, pitr }); for(auto* oitr : _overlapping) @@ -1289,6 +1355,8 @@ main(int argc, char** argv) is_static_exe = addr_space->isStaticExecutable(); + OMNITRACE_ADD_LOG_ENTRY("address space is", (is_static_exe) ? "" : "not", + "a static executable"); if(binary_rewrite) app_binary = static_cast(addr_space); else @@ -1296,6 +1364,8 @@ main(int argc, char** argv) is_attached = (_pid >= 0 && app_thread != nullptr); + OMNITRACE_ADD_LOG_ENTRY("address space is attached:", is_attached); + if(!app_binary && !app_thread) { errprintf(-1, "No application thread or binary! nullptr to BPatch_binaryEdit* " @@ -1314,21 +1384,26 @@ main(int argc, char** argv) string_t _tried_libs; for(auto _libname : _libnames) { + OMNITRACE_ADD_LOG_ENTRY("Getting the absolute lib filepath to", _libname); _libname = get_absolute_lib_filepath(_libname); _tried_libs += string_t("|") + _libname; verbprintf(1, "loading library: '%s'...\n", _libname.c_str()); result = (addr_space->loadLibrary(_libname.c_str()) != nullptr); verbprintf(2, "loadLibrary(%s) result = %s\n", _libname.c_str(), (result) ? "success" : "failure"); - if(result) break; + if(result) + { + OMNITRACE_ADD_LOG_ENTRY("Using library:", _libname); + break; + } } if(!result) { - fprintf(stderr, - "Error: 'loadLibrary(%s)' failed.\nPlease ensure that the " - "library directory is in LD_LIBRARY_PATH environment variable " - "or absolute path is provided\n", - _tried_libs.substr(1).c_str()); + errprintf(-127, + "Error: 'loadLibrary(%s)' failed.\nPlease ensure that the " + "library directory is in LD_LIBRARY_PATH environment variable " + "or absolute path is provided\n", + _tried_libs.substr(1).c_str()); exit(EXIT_FAILURE); } }; @@ -1347,6 +1422,7 @@ main(int argc, char** argv) }; for(auto& lname : lnames) lname = _get_library_ext(lname); + OMNITRACE_ADD_LOG_ENTRY("Using library:", lnames); return lnames; }; @@ -1623,10 +1699,6 @@ main(int argc, char** argv) for(auto&& itr : env_config_variables) env_vars.emplace_back(itr); env_vars.emplace_back(TIMEMORY_JOIN('=', "OMNITRACE_MODE", instr_mode)); - env_vars.emplace_back(TIMEMORY_JOIN('=', "HSA_ENABLE_INTERRUPT", "0")); -#if defined(OMNITRACE_USE_ROCTRACER) && OMNITRACE_USE_ROCTRACER > 0 - env_vars.emplace_back(TIMEMORY_JOIN('=', "HSA_TOOLS_LIB", _libname)); -#endif env_vars.emplace_back(TIMEMORY_JOIN('=', "OMNITRACE_MPI_INIT", "OFF")); env_vars.emplace_back(TIMEMORY_JOIN('=', "OMNITRACE_MPI_FINALIZE", "OFF")); env_vars.emplace_back( @@ -1756,26 +1828,43 @@ main(int argc, char** argv) if(main_entr_points) { verbprintf(1, "Adding main entry snippets...\n"); - addr_space->insertSnippet(_init_sequence, *main_entr_points, - BPatch_callBefore, BPatch_firstSnippet); + addr_space->insertSnippet(_init_sequence, *main_entr_points); + // insert_instr(addr_space, *main_entr_points, _init_sequence, BPatch_entry); } else { for(auto* itr : _objs) - itr->insertInitCallback(_init_sequence); + { + try + { + itr->insertInitCallback(_init_sequence); + } catch(std::runtime_error& _e) + { + errprintf(0, "Dyninst error inserting init callback: %s\n", + _e.what()); + } + } } } if(main_exit_points) { verbprintf(1, "Adding main exit snippets...\n"); - addr_space->insertSnippet(_fini_sequence, *main_exit_points, BPatch_callAfter, - BPatch_firstSnippet); + // insert_instr(addr_space, *main_exit_points, _fini_sequence, BPatch_exit); + addr_space->insertSnippet(_fini_sequence, *main_exit_points); } else { for(auto* itr : _objs) - itr->insertFiniCallback(_fini_sequence); + { + try + { + itr->insertFiniCallback(_fini_sequence); + } catch(std::runtime_error& _e) + { + errprintf(0, "Dyninst error inserting fini callback: %s\n", _e.what()); + } + } } //----------------------------------------------------------------------------------// @@ -2173,6 +2262,12 @@ main(int argc, char** argv) verbprintf(0, "End of omnitrace\n"); verbprintf(1, "Exit code: %i\n", code); + + if(log_ofs) + { + verbprintf(2, "Closing log file...\n"); + log_ofs->close(); + } return code; } diff --git a/source/bin/omnitrace/omnitrace.hpp b/source/bin/omnitrace/omnitrace.hpp index 1f475f2ab9..9b24f608d5 100644 --- a/source/bin/omnitrace/omnitrace.hpp +++ b/source/bin/omnitrace/omnitrace.hpp @@ -25,6 +25,7 @@ #include "function_signature.hpp" #include "fwd.hpp" #include "info.hpp" +#include "log.hpp" #include "module_function.hpp" #include @@ -199,8 +200,7 @@ omnitrace_get_address_space(patch_pointer_t& _bpatch, int _cmdc, char** _cmdv, if(!_name.empty()) mutatee = _bpatch->openBinary(_name.c_str(), false); if(!mutatee) { - fprintf(stderr, "[omnitrace][exe] Failed to open binary '%s'\n", - _name.c_str()); + verbprintf(-1, "Failed to open binary '%s'\n", _name.c_str()); throw std::runtime_error("Failed to open binary"); } verbprintf_bare(1, "Done\n"); @@ -213,27 +213,28 @@ omnitrace_get_address_space(patch_pointer_t& _bpatch, int _cmdc, char** _cmdv, mutatee = _bpatch->processAttach(_cmdv0, _pid); if(!mutatee) { - fprintf(stderr, "[omnitrace][exe] Failed to connect to process %i\n", - (int) _pid); + verbprintf(-1, "Failed to connect to process %i\n", (int) _pid); throw std::runtime_error("Failed to attach to process"); } verbprintf_bare(1, "Done\n"); } else { - verbprintf(1, "Creating process '%s'... ", _cmdv[0]); + std::stringstream ss; + for(int i = 0; i < _cmdc; ++i) + { + if(!_cmdv || !_cmdv[i]) continue; + ss << " " << _cmdv[i]; + } + auto _cmd_msg = ss.str(); + if(_cmd_msg.length() > 1) _cmd_msg = _cmd_msg.substr(1); + + verbprintf(1, "Creating process '%s'... ", _cmd_msg.c_str()); fflush(stderr); mutatee = _bpatch->processCreate(_cmdv[0], (const char**) _cmdv, nullptr); if(!mutatee) { - std::stringstream ss; - for(int i = 0; i < _cmdc; ++i) - { - if(!_cmdv[i]) continue; - ss << _cmdv[i] << " "; - } - fprintf(stderr, "[omnitrace][exe] Failed to create process: '%s'\n", - ss.str().c_str()); + verbprintf(-1, "Failed to create process: '%s'\n", _cmd_msg.c_str()); throw std::runtime_error("Failed to create process"); } verbprintf_bare(1, "Done\n"); @@ -249,6 +250,8 @@ omnitrace_thread_exit(thread_t* thread, BPatch_exitType exit_type) { if(!thread) return; + OMNITRACE_ADD_LOG_ENTRY("Executing the thread callback"); + BPatch_process* app = thread->getProcess(); if(!terminate_expr) @@ -290,6 +293,8 @@ omnitrace_thread_exit(thread_t* thread, BPatch_exitType exit_type) TIMEMORY_NOINLINE inline void omnitrace_fork_callback(thread_t* parent, thread_t* child) { + OMNITRACE_ADD_LOG_ENTRY("Executing the fork callback"); + if(child) { auto* app = child->getProcess(); @@ -320,10 +325,20 @@ omnitrace_fork_callback(thread_t* parent, thread_t* child) template bool insert_instr(address_space_t* mutatee, const bpvector_t& _points, Tp traceFunc, - procedure_loc_t traceLoc, bool allow_traps) + procedure_loc_t, bool allow_traps) { if(!traceFunc || _points.empty()) return false; + auto _names = [&_points]() { + std::set _v{}; + for(const auto& itr : _points) + if(itr && itr->getFunction()) _v.emplace(get_name(itr->getFunction())); + return _v; + }(); + + OMNITRACE_ADD_LOG_ENTRY("Inserting", _points.size(), + "instrumentation points into function(s)", _names); + auto _trace = traceFunc.get(); auto _traps = std::set{}; if(!allow_traps) @@ -334,18 +349,20 @@ insert_instr(address_space_t* mutatee, const bpvector_t& _points, Tp t } } + OMNITRACE_ADD_LOG_ENTRY("Found", _traps.size(), + "instrumentation points using traps in function(s)", _names); + size_t _n = 0; for(const auto& itr : _points) { - if(!itr || _traps.count(itr) > 0) - continue; - else if(traceLoc == BPatch_entry) - mutatee->insertSnippet(*_trace, *itr, BPatch_callBefore, BPatch_firstSnippet); - else - mutatee->insertSnippet(*_trace, *itr); + if(!itr || _traps.count(itr) > 0) continue; + mutatee->insertSnippet(*_trace, *itr); ++_n; } + OMNITRACE_ADD_LOG_ENTRY("Inserted", _n, "instrumentation points in function(s)", + _names); + return (_n > 0); } // @@ -364,6 +381,9 @@ insert_instr(address_space_t* mutatee, procedure_t* funcToInstr, Tp traceFunc, bpvector_t* _points = nullptr; auto _trace = traceFunc.get(); + OMNITRACE_ADD_LOG_ENTRY("Searching for loop instrumentation points in function", + get_name(funcToInstr)); + if(!cfGraph) funcToInstr->getCFG(); if(cfGraph && loopToInstrument) { @@ -380,6 +400,10 @@ insert_instr(address_space_t* mutatee, procedure_t* funcToInstr, Tp traceFunc, if(_points == nullptr) return false; if(_points->empty()) return false; + OMNITRACE_ADD_LOG_ENTRY("Inserting max of", _points->size(), + "loop instrumentation points in function", + get_name(funcToInstr)); + std::set _traps{}; if(!allow_traps) { @@ -389,18 +413,21 @@ insert_instr(address_space_t* mutatee, procedure_t* funcToInstr, Tp traceFunc, } } + OMNITRACE_ADD_LOG_ENTRY("Found", _traps.size(), + "loop instrumentation points using traps in function", + get_name(funcToInstr)); + size_t _n = 0; for(auto& itr : *_points) { - if(!itr || _traps.count(itr) > 0) - continue; - else if(traceLoc == BPatch_entry) - mutatee->insertSnippet(*_trace, *itr, BPatch_callBefore, BPatch_firstSnippet); - else - mutatee->insertSnippet(*_trace, *itr); + if(!itr || _traps.count(itr) > 0) continue; + mutatee->insertSnippet(*_trace, *itr); ++_n; } + OMNITRACE_ADD_LOG_ENTRY("Inserted", _n, "loop instrumentation points in function", + get_name(funcToInstr)); + return (_n > 0); } // @@ -412,9 +439,15 @@ bool insert_instr(address_space_t* mutatee, Tp traceFunc, procedure_loc_t traceLoc, basic_block_t* basicBlock, bool allow_traps) { + if(!basicBlock) return false; + point_t* _point = nullptr; auto _trace = traceFunc.get(); + OMNITRACE_ADD_LOG_ENTRY( + "Searching for basic-block entry and exit instrumentation points ::", + *basicBlock); + basic_block_t* _bb = basicBlock; switch(traceLoc) { @@ -426,15 +459,23 @@ insert_instr(address_space_t* mutatee, Tp traceFunc, procedure_loc_t traceLoc, return false; } - if(_point == nullptr) return false; + if(_point == nullptr) + { + OMNITRACE_ADD_LOG_ENTRY("No instrumentation points were found in basic-block ", + *basicBlock); + return false; + } - if(!allow_traps && _point->usesTrap_NP()) return false; + if(!allow_traps && _point->usesTrap_NP()) + { + OMNITRACE_ADD_LOG_ENTRY("Basic-block", *basicBlock, + "uses traps and traps are disallowed"); + return false; + } switch(traceLoc) { case BPatch_entry: - return (mutatee->insertSnippet(*_trace, *_point, BPatch_callBefore, - BPatch_firstSnippet) != nullptr); case BPatch_exit: return (mutatee->insertSnippet(*_trace, *_point) != nullptr); default: { diff --git a/source/bin/tests/CMakeLists.txt b/source/bin/tests/CMakeLists.txt index 27d55781d2..02d63aa5dd 100644 --- a/source/bin/tests/CMakeLists.txt +++ b/source/bin/tests/CMakeLists.txt @@ -33,7 +33,7 @@ function(OMNITRACE_ADD_BIN_TEST) endif() # common - list(APPEND TEST_ENVIRONMENT "OMNITRACE_CI=ON" + list(APPEND TEST_ENVIRONMENT "OMNITRACE_CI=ON" "OMNITRACE_CONFIG_FILE=" "OMNITRACE_OUTPUT_PATH=omnitrace-tests-output") # copy for inverse set(TEST_ENVIRONMENT_INV "${TEST_ENVIRONMENT}") @@ -178,7 +178,7 @@ omnitrace_add_bin_test( TIMEOUT 60 PASS_REGEX ".*available.json.*available.txt.*available.xml.*excluded.json.*excluded.txt.*excluded.xml.*instrumented.json.*instrumented.txt.*instrumented.xml.*overlapping.json.*overlapping.txt.*overlapping.xml.*" - ) + FAIL_REGEX "No such file or directory|not found") omnitrace_add_bin_test( ADD_INVERSE @@ -191,6 +191,23 @@ omnitrace_add_bin_test( "\\\[omnitrace\\\]\\\[exe\\\] Runtime instrumentation is not possible!(.*)\n(.*)\\\[omnitrace\\\]\\\[exe\\\] Switching to binary rewrite mode and assuming '--simulate --all-functions'" ) +omnitrace_add_bin_test( + NAME omnitrace-exe-write-log + TARGET omnitrace-exe + ARGS --print-instrumented functions -v 1 --log-file user.log -- ls + LABELS "log" + TIMEOUT 120 + PASS_REGEX "Opening .*/instrumentation/user.log") + +omnitrace_add_bin_test( + NAME omnitrace-exe-write-log-check + DEPENDS omnitrace-exe-write-log + COMMAND ls omnitrace-tests-output/omnitrace-exe-write-log/instrumentation/user.log + LABELS "log" + TIMEOUT 60 + PASS_REGEX "user.log" + FAIL_REGEX "No such file or directory|not found") + omnitrace_add_bin_test( NAME omnitrace-avail-help TARGET omnitrace-avail diff --git a/source/lib/common/setup.hpp b/source/lib/common/setup.hpp index cd29b9d6f2..8b7dd7860d 100644 --- a/source/lib/common/setup.hpp +++ b/source/lib/common/setup.hpp @@ -142,14 +142,6 @@ setup_environ(int _verbose, const std::string& _search_paths = {}, _omnilib = common::path::find_path(_omnilib, _verbose, _search_paths); _omnilib_dl = common::path::find_path(_omnilib_dl, _verbose, _search_paths); - // This environment variable forces the ROCR-Runtime to use polling to wait - // for signals rather than interrupts. We set this variable to avoid issues with - // rocm/roctracer hanging when interrupted by the sampler - // - // see: - // https://github.com/ROCm-Developer-Tools/roctracer/issues/22#issuecomment-572814465 - setenv("HSA_ENABLE_INTERRUPT", "0", 0); - #if defined(OMNITRACE_USE_ROCTRACER) && OMNITRACE_USE_ROCTRACER > 0 setenv("HSA_TOOLS_LIB", _omnilib.c_str(), 0); #endif diff --git a/source/lib/omnitrace-user/CMakeLists.txt b/source/lib/omnitrace-user/CMakeLists.txt index 601f6e839c..9c9ef6eec9 100644 --- a/source/lib/omnitrace-user/CMakeLists.txt +++ b/source/lib/omnitrace-user/CMakeLists.txt @@ -14,8 +14,10 @@ add_library(omnitrace-user-library SHARED) add_library(omnitrace::omnitrace-user-library ALIAS omnitrace-user-library) target_sources( - omnitrace-user-library PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/user.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/omnitrace/user.h) + omnitrace-user-library + PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/user.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/omnitrace/user.h + ${CMAKE_CURRENT_SOURCE_DIR}/omnitrace/types.h) target_include_directories( omnitrace-user-library PUBLIC $ $) @@ -34,6 +36,7 @@ set_target_properties( omnitrace_strip_target(omnitrace-user-library) install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/omnitrace/user.h + ${CMAKE_CURRENT_SOURCE_DIR}/omnitrace/types.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/omnitrace) install( diff --git a/source/lib/omnitrace-user/omnitrace/types.h b/source/lib/omnitrace-user/omnitrace/types.h new file mode 100644 index 0000000000..4432134f92 --- /dev/null +++ b/source/lib/omnitrace-user/omnitrace/types.h @@ -0,0 +1,88 @@ +// MIT License +// +// Copyright (c) 2022 Advanced Micro Devices, Inc. All Rights Reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +#ifndef OMNITRACE_TYPES_H_ +#define OMNITRACE_TYPES_H_ 1 + +#include + +#if defined(__cplusplus) +extern "C" +{ +#endif + + /// @enum OMNITRACE_USER_ERROR + /// @brief Identifier for errors + /// + typedef enum OMNITRACE_USER_ERROR + { + OMNITRACE_USER_SUCCESS = 0, ///< No error + OMNITRACE_USER_ERROR_NO_BINDING, ///< Function pointer was not assigned + OMNITRACE_USER_ERROR_BAD_VALUE, ///< Provided value was invalid + OMNITRACE_USER_ERROR_INVALID_CATEGORY, ///< Invalid user binding category + OMNITRACE_USER_ERROR_INTERNAL, ///< Internal error occurred within libomnitrace + OMNITRACE_USER_ERROR_LAST + } omnitrace_user_error_t; + + /// @enum OMNITRACE_USER_BINDINGS + /// @brief Identifier for function pointer categories + /// @code{.cpp} + /// int (*omnitrace_push_region_f)(const char*) = nullptr; + /// + /// int custom_push_region(const char* name) + /// { + /// // custom push region prints message before calling internal callback + /// printf("Pushing region %s\n", name); + /// return (*omnitrace_push_region_f)(name); + /// } + /// + /// int main(int argc, char** argv) + /// { + /// // get the internal callback to start a user-defined region + /// omnitrace_user_get_callbacks(OMNITRACE_USER_REGION, + /// (void**) &omnitrace_push_region_f, + /// nullptr); + /// // assign the custom callback to start a user-defined region + /// if(omnitrace_push_region_f) + /// omnitrace_user_configure(OMNITRACE_USER_REGION, + /// (void*) &custom_push_region, + /// nullptr); + /// // ... + /// } + /// + /// @endcode + typedef enum OMNITRACE_USER_BINDINGS + { + OMNITRACE_USER_START_STOP = + 0, ///< Function pointers which control global start/stop + OMNITRACE_USER_START_STOP_THREAD, ///< Function pointers which control per-thread + ///< start/stop + OMNITRACE_USER_REGION, ///< Function pointers which generate user-defined regions + OMNITRACE_USER_SAMPLE, ///< Function pointer which generate samples + OMNITRACE_USER_BINDINGS_LAST + } omnitrace_user_bindings_t; + +#if defined(__cplusplus) +} +#endif + +#endif // OMNITRACE_TYPES_H_ diff --git a/source/lib/omnitrace-user/omnitrace/user.h b/source/lib/omnitrace-user/omnitrace/user.h index 39ae0d0460..8fb263a0cc 100644 --- a/source/lib/omnitrace-user/omnitrace/user.h +++ b/source/lib/omnitrace-user/omnitrace/user.h @@ -24,83 +24,22 @@ #define OMNITRACE_USER_H_ 1 #if defined(OMNITRACE_USER_SOURCE) && (OMNITRACE_USER_SOURCE > 0) -# if !defined(OMNITRACE_ATTRIBUTE) -# define OMNITRACE_ATTRIBUTE(...) __attribute__((__VA_ARGS__)) -# endif -# if !defined(OMNITRACE_VISIBILITY) -# define OMNITRACE_VISIBILITY(MODE) OMNITRACE_ATTRIBUTE(visibility(MODE)) -# endif # if !defined(OMNITRACE_PUBLIC_API) -# define OMNITRACE_PUBLIC_API OMNITRACE_VISIBILITY("default") -# endif -# if !defined(OMNITRACE_HIDDEN_API) -# define OMNITRACE_HIDDEN_API OMNITRACE_VISIBILITY("hidden") +# define OMNITRACE_PUBLIC_API __attribute__((visibility("default"))) # endif #else # if !defined(OMNITRACE_PUBLIC_API) # define OMNITRACE_PUBLIC_API # endif -# if !defined(OMNITRACE_HIDDEN_API) -# define OMNITRACE_HIDDEN_API -# endif #endif +#include "omnitrace/types.h" + #if defined(__cplusplus) extern "C" { #endif - /// @enum OMNITRACE_USER_ERROR - /// @brief Identifier for errors - /// - enum OMNITRACE_USER_ERROR - { - OMNITRACE_USER_SUCCESS = 0, ///< No error - OMNITRACE_USER_ERROR_NO_BINDING, ///< Function pointer was not assigned - OMNITRACE_USER_ERROR_BAD_FUNCTION_POINTER, ///< Provided function pointer was - ///< invalid - OMNITRACE_USER_ERROR_INVALID_CATEGORY, ///< Invalid user binding category - OMNITRACE_USER_ERROR_INTERNAL, ///< Internal error occurred within libomnitrace - OMNITRACE_USER_ERROR_LAST - }; - - /// @enum OMNITRACE_USER_BINDINGS - /// @brief Identifier for function pointer categories - /// @code{.cpp} - /// int (*omnitrace_push_region_f)(const char*) = nullptr; - /// - /// int custom_push_region(const char* name) - /// { - /// // custom push region prints message before calling internal callback - /// printf("Pushing region %s\n", name); - /// return (*omnitrace_push_region_f)(name); - /// } - /// - /// int main(int argc, char** argv) - /// { - /// // get the internal callback to start a user-defined region - /// omnitrace_user_get_callbacks(OMNITRACE_USER_REGION, - /// (void**) &omnitrace_push_region_f, - /// nullptr); - /// // assign the custom callback to start a user-defined region - /// if(omnitrace_push_region_f) - /// omnitrace_user_configure(OMNITRACE_USER_REGION, - /// (void*) &custom_push_region, - /// nullptr); - /// // ... - /// } - /// - /// @endcode - enum OMNITRACE_USER_BINDINGS - { - OMNITRACE_USER_START_STOP = - 0, ///< Function pointers which control global start/stop - OMNITRACE_USER_START_STOP_THREAD, ///< Function pointers which control per-thread - ///< start/stop - OMNITRACE_USER_REGION, ///< Function pointers which generate user-defined regions - OMNITRACE_USER_BINDINGS_LAST - }; - /// @fn int omnitrace_user_start_trace(void) /// @return @ref OMNITRACE_USER_ERROR value /// @brief Enable tracing on this thread and all subsequently created threads diff --git a/source/lib/omnitrace-user/user.cpp b/source/lib/omnitrace-user/user.cpp index bd70353786..f1ec0391dc 100644 --- a/source/lib/omnitrace-user/user.cpp +++ b/source/lib/omnitrace-user/user.cpp @@ -25,6 +25,7 @@ #endif #include "omnitrace/user.h" +#include "omnitrace/types.h" #include #include @@ -149,6 +150,7 @@ extern "C" { case OMNITRACE_USER_SUCCESS: return "Success"; case OMNITRACE_USER_ERROR_NO_BINDING: return "Function pointer not assigned"; + case OMNITRACE_USER_ERROR_BAD_VALUE: return "Invalid value was provided"; case OMNITRACE_USER_ERROR_INVALID_CATEGORY: return "Invalid user binding category"; case OMNITRACE_USER_ERROR_INTERNAL: diff --git a/source/lib/omnitrace/library.cpp b/source/lib/omnitrace/library.cpp index 882244e2f6..c3f682fe20 100644 --- a/source/lib/omnitrace/library.cpp +++ b/source/lib/omnitrace/library.cpp @@ -69,9 +69,9 @@ ensure_finalization(bool _static_init = false) { const auto& _info = thread_info::init(); auto _tid = _info->index_data; - OMNITRACE_CI_THROW(_tid->internal_value != threading::get_id(), + OMNITRACE_CI_THROW(_tid->sequent_value != threading::get_id(), "Error! internal tid != %li :: %li", threading::get_id(), - _tid->internal_value); + _tid->sequent_value); OMNITRACE_CI_THROW(_tid->system_value != threading::get_sys_tid(), "Error! system tid != %li :: %li", threading::get_sys_tid(), _tid->system_value); @@ -670,7 +670,7 @@ omnitrace_finalize_hidden(void) for(size_t i = 0; i < max_supported_threads; ++i) { auto& itr = instrumentation_bundles::instances().at(i); - const auto& _info = thread_info::get(i, InternalTID); + const auto& _info = thread_info::get(i, SequentTID); while(!itr.bundles.empty()) { int _lvl = 1; diff --git a/source/lib/omnitrace/library/components/backtrace.cpp b/source/lib/omnitrace/library/components/backtrace.cpp index 72fe934220..c9e67833dd 100644 --- a/source/lib/omnitrace/library/components/backtrace.cpp +++ b/source/lib/omnitrace/library/components/backtrace.cpp @@ -203,7 +203,7 @@ backtrace::sample(int) // 4a. funlockfile [common but not explicitly in call-stack] // 4b. __resume_rt [common but not explicitly in call-stack] // 4c. killpg [common but not explicitly in call-stack] - m_data = get_unw_backtrace_raw(); + m_data = get_unw_stack(); } } // namespace component } // namespace omnitrace diff --git a/source/lib/omnitrace/library/components/backtrace_metrics.cpp b/source/lib/omnitrace/library/components/backtrace_metrics.cpp index 952ff8ad75..4d917293aa 100644 --- a/source/lib/omnitrace/library/components/backtrace_metrics.cpp +++ b/source/lib/omnitrace/library/components/backtrace_metrics.cpp @@ -243,7 +243,7 @@ void backtrace_metrics::fini_perfetto(int64_t _tid) { auto _hw_cnt_labels = *get_papi_labels(_tid); - const auto& _thread_info = thread_info::get(_tid, InternalTID); + const auto& _thread_info = thread_info::get(_tid, SequentTID); OMNITRACE_CI_THROW(!_thread_info, "Error! missing thread info for tid=%li\n", _tid); if(!_thread_info) return; diff --git a/source/lib/omnitrace/library/components/backtrace_timestamp.cpp b/source/lib/omnitrace/library/components/backtrace_timestamp.cpp index b9a62dff2b..691298bea7 100644 --- a/source/lib/omnitrace/library/components/backtrace_timestamp.cpp +++ b/source/lib/omnitrace/library/components/backtrace_timestamp.cpp @@ -38,7 +38,7 @@ backtrace_timestamp::operator<(const backtrace_timestamp& rhs) const bool backtrace_timestamp::is_valid() const { - const auto& _info = thread_info::get(m_tid, InternalTID); + const auto& _info = thread_info::get(m_tid, SequentTID); return (_info) ? _info->is_valid_time(m_real) : false; } diff --git a/source/lib/omnitrace/library/components/pthread_create_gotcha.cpp b/source/lib/omnitrace/library/components/pthread_create_gotcha.cpp index a7b800ab78..24bdc3cdf8 100644 --- a/source/lib/omnitrace/library/components/pthread_create_gotcha.cpp +++ b/source/lib/omnitrace/library/components/pthread_create_gotcha.cpp @@ -161,7 +161,7 @@ pthread_create_gotcha::wrapper::operator()() const auto _bundle = std::shared_ptr{}; auto _signals = std::set{}; auto _coverage = (get_mode() == Mode::Coverage); - // const auto& _parent_info = thread_info::get(m_parent_tid, LookupTID); + // const auto& _parent_info = thread_info::get(m_parent_tid, InternalTID); auto _dtor = [&]() { set_thread_state(ThreadState::Internal); if(_is_sampling) @@ -191,7 +191,7 @@ pthread_create_gotcha::wrapper::operator()() const if(_active && !_coverage) { const auto& _tid_index = thread_info::init(); - _tid = _tid_index->index_data->internal_value; + _tid = _tid_index->index_data->sequent_value; threading::set_thread_name(TIMEMORY_JOIN(" ", "Thread", _tid).c_str()); if(!thread_bundle_data_t::instances().at(_tid)) { diff --git a/source/lib/omnitrace/library/components/pthread_mutex_gotcha.cpp b/source/lib/omnitrace/library/components/pthread_mutex_gotcha.cpp index 271b491af4..828fcc97af 100644 --- a/source/lib/omnitrace/library/components/pthread_mutex_gotcha.cpp +++ b/source/lib/omnitrace/library/components/pthread_mutex_gotcha.cpp @@ -283,7 +283,7 @@ static_data::operator()( using thread_data_t = omnitrace::thread_data>; static thread_local auto& _v = - thread_data_t::instance(omnitrace::construct_on_init{}, _data); + thread_data_t::instance(omnitrace::construct_on_thread{}, _data); return *_v; } } // namespace policy diff --git a/source/lib/omnitrace/library/cpu_freq.cpp b/source/lib/omnitrace/library/cpu_freq.cpp index 00d741aa79..0f322413e9 100644 --- a/source/lib/omnitrace/library/cpu_freq.cpp +++ b/source/lib/omnitrace/library/cpu_freq.cpp @@ -165,7 +165,7 @@ post_process() auto _process_frequencies = [](size_t _idx, size_t _offset) { using freq_track = perfetto_counter_track; - const auto& _thread_info = thread_info::get(0, LookupTID); + const auto& _thread_info = thread_info::get(0, InternalTID); OMNITRACE_CI_THROW(!_thread_info, "Missing thread info for thread 0"); if(!_thread_info) return; @@ -199,7 +199,7 @@ post_process() "Page Faults", "User Time", "Kernel Time" }, { "MB", "MB", "MB", "", "", "sec", "sec" }); - const auto& _thread_info = thread_info::get(0, LookupTID); + const auto& _thread_info = thread_info::get(0, InternalTID); OMNITRACE_CI_THROW(!_thread_info, "Missing thread info for thread 0"); if(!_thread_info) return; diff --git a/source/lib/omnitrace/library/debug.hpp b/source/lib/omnitrace/library/debug.hpp index 8ad3b1925d..27c9186b20 100644 --- a/source/lib/omnitrace/library/debug.hpp +++ b/source/lib/omnitrace/library/debug.hpp @@ -118,20 +118,21 @@ get_chars(T&& _c, std::index_sequence) # define OMNITRACE_DEBUG_BUFFER_LEN 1024 #endif -#if !defined(OMNITRACE_PROCESS_IDENTIFIER) +#if !defined(OMNITRACE_DEBUG_PROCESS_IDENTIFIER) # if defined(TIMEMORY_USE_MPI) -# define OMNITRACE_PROCESS_IDENTIFIER static_cast(::tim::dmp::rank()) +# define OMNITRACE_DEBUG_PROCESS_IDENTIFIER static_cast(::tim::dmp::rank()) # elif defined(TIMEMORY_USE_MPI_HEADERS) -# define OMNITRACE_PROCESS_IDENTIFIER \ +# define OMNITRACE_DEBUG_PROCESS_IDENTIFIER \ (::tim::dmp::is_initialized()) ? static_cast(::tim::dmp::rank()) \ : static_cast(::tim::process::get_id()) # else -# define OMNITRACE_PROCESS_IDENTIFIER static_cast(::tim::process::get_id()) +# define OMNITRACE_DEBUG_PROCESS_IDENTIFIER \ + static_cast(::tim::process::get_id()) # endif #endif -#if !defined(OMNITRACE_THREAD_IDENTIFIER) -# define OMNITRACE_THREAD_IDENTIFIER ::tim::threading::get_id() +#if !defined(OMNITRACE_DEBUG_THREAD_IDENTIFIER) +# define OMNITRACE_DEBUG_THREAD_IDENTIFIER ::tim::threading::get_id() #endif #if defined(__clang__) || (__GNUC__ < 9) @@ -174,8 +175,8 @@ get_chars(T&& _c, std::index_sequence) ::omnitrace::debug::flush(); \ ::omnitrace::debug::lock _lk{}; \ OMNITRACE_FPRINTF_STDERR_COLOR(info); \ - fprintf(stderr, "[omnitrace][%i][%li]%s", OMNITRACE_PROCESS_IDENTIFIER, \ - OMNITRACE_THREAD_IDENTIFIER, \ + fprintf(stderr, "[omnitrace][%i][%li]%s", OMNITRACE_DEBUG_PROCESS_IDENTIFIER, \ + OMNITRACE_DEBUG_THREAD_IDENTIFIER, \ ::omnitrace::debug::is_bracket(__VA_ARGS__) ? "" : " "); \ fprintf(stderr, __VA_ARGS__); \ ::omnitrace::debug::flush(); \ @@ -201,8 +202,9 @@ get_chars(T&& _c, std::index_sequence) ::omnitrace::debug::flush(); \ ::omnitrace::debug::lock _lk{}; \ OMNITRACE_FPRINTF_STDERR_COLOR(info); \ - fprintf(stderr, "[omnitrace][%i][%li][%s]%s", OMNITRACE_PROCESS_IDENTIFIER, \ - OMNITRACE_THREAD_IDENTIFIER, OMNITRACE_FUNCTION, \ + fprintf(stderr, "[omnitrace][%i][%li][%s]%s", \ + OMNITRACE_DEBUG_PROCESS_IDENTIFIER, OMNITRACE_DEBUG_THREAD_IDENTIFIER, \ + OMNITRACE_FUNCTION, \ ::omnitrace::debug::is_bracket(__VA_ARGS__) ? "" : " "); \ fprintf(stderr, __VA_ARGS__); \ ::omnitrace::debug::flush(); \ @@ -228,7 +230,7 @@ get_chars(T&& _c, std::index_sequence) { \ char _msg_buffer[OMNITRACE_DEBUG_BUFFER_LEN]; \ snprintf(_msg_buffer, OMNITRACE_DEBUG_BUFFER_LEN, "[omnitrace][%i][%li][%s]%s", \ - OMNITRACE_PROCESS_IDENTIFIER, OMNITRACE_THREAD_IDENTIFIER, \ + OMNITRACE_DEBUG_PROCESS_IDENTIFIER, OMNITRACE_DEBUG_THREAD_IDENTIFIER, \ OMNITRACE_FUNCTION, \ ::omnitrace::debug::is_bracket(__VA_ARGS__) ? "" : " "); \ auto len = strlen(_msg_buffer); \ @@ -265,8 +267,8 @@ get_chars(T&& _c, std::index_sequence) { \ ::omnitrace::debug::flush(); \ OMNITRACE_FPRINTF_STDERR_COLOR(fatal); \ - fprintf(stderr, "[omnitrace][%i][%li]%s", OMNITRACE_PROCESS_IDENTIFIER, \ - OMNITRACE_THREAD_IDENTIFIER, \ + fprintf(stderr, "[omnitrace][%i][%li]%s", OMNITRACE_DEBUG_PROCESS_IDENTIFIER, \ + OMNITRACE_DEBUG_THREAD_IDENTIFIER, \ ::omnitrace::debug::is_bracket(__VA_ARGS__) ? "" : " "); \ fprintf(stderr, __VA_ARGS__); \ ::omnitrace::debug::flush(); \ @@ -296,8 +298,9 @@ get_chars(T&& _c, std::index_sequence) { \ ::omnitrace::debug::flush(); \ OMNITRACE_FPRINTF_STDERR_COLOR(fatal); \ - fprintf(stderr, "[omnitrace][%i][%li][%s]%s", OMNITRACE_PROCESS_IDENTIFIER, \ - OMNITRACE_THREAD_IDENTIFIER, OMNITRACE_FUNCTION, \ + fprintf(stderr, "[omnitrace][%i][%li][%s]%s", \ + OMNITRACE_DEBUG_PROCESS_IDENTIFIER, OMNITRACE_DEBUG_THREAD_IDENTIFIER, \ + OMNITRACE_FUNCTION, \ ::omnitrace::debug::is_bracket(__VA_ARGS__) ? "" : " "); \ fprintf(stderr, __VA_ARGS__); \ ::omnitrace::debug::flush(); \ diff --git a/source/lib/omnitrace/library/rocm.cpp b/source/lib/omnitrace/library/rocm.cpp index 990d546e39..c960ef96fc 100644 --- a/source/lib/omnitrace/library/rocm.cpp +++ b/source/lib/omnitrace/library/rocm.cpp @@ -142,7 +142,7 @@ extern "C" settings->timestamp_on = 1; settings->intercept_mode = 1; settings->hsa_intercepting = 1; - settings->k_concurrent = 1; + settings->k_concurrent = 0; settings->obj_dumping = 0; // settings->code_obj_tracking = 0; // settings->memcopy_tracking = 0; diff --git a/source/lib/omnitrace/library/rocm_smi.cpp b/source/lib/omnitrace/library/rocm_smi.cpp index 33a348fda0..733e0c3bbf 100644 --- a/source/lib/omnitrace/library/rocm_smi.cpp +++ b/source/lib/omnitrace/library/rocm_smi.cpp @@ -243,7 +243,7 @@ data::post_process(uint32_t _dev_id) auto& _rocm_smi_v = sampler_instances::instances().at(_dev_id); auto _rocm_smi = (_rocm_smi_v) ? *_rocm_smi_v : std::deque{}; - const auto& _thread_info = thread_info::get(0, LookupTID); + const auto& _thread_info = thread_info::get(0, InternalTID); OMNITRACE_CI_THROW(!_thread_info, "Missing thread info for thread 0"); if(!_thread_info) return; diff --git a/source/lib/omnitrace/library/sampling.cpp b/source/lib/omnitrace/library/sampling.cpp index 2ccbd39076..0218cfda90 100644 --- a/source/lib/omnitrace/library/sampling.cpp +++ b/source/lib/omnitrace/library/sampling.cpp @@ -238,7 +238,7 @@ start_duration_thread() std::set configure(bool _setup, int64_t _tid = threading::get_id()) { - const auto& _info = thread_info::get(_tid, InternalTID); + const auto& _info = thread_info::get(_tid, SequentTID); auto& _sampler = sampling::get_sampler(_tid); auto& _running = get_sampler_running(_tid); bool _is_running = (!_running) ? false : *_running; @@ -270,7 +270,7 @@ configure(bool _setup, int64_t _tid = threading::get_id()) // thus we should not start a sampler for it if(_tid > 0 && _info && _info->is_offset) return std::set{}; // if the thread state is disabled or completed, return - if(_info && _info->index_data->internal_value == _tid && + if(_info && _info->index_data->sequent_value == _tid && get_thread_state() == ThreadState::Disabled) return std::set{}; @@ -287,7 +287,8 @@ configure(bool _setup, int64_t _tid = threading::get_id()) if(get_debug_sampling()) _verbose = 2; OMNITRACE_DEBUG("Configuring sampler for thread %lu...\n", _tid); - sampling::sampler_instances::construct("omnitrace", _tid, _verbose); + sampling::sampler_instances::construct(construct_on_thread{ _tid }, "omnitrace", + _tid, _verbose); _sampler->set_flags(SA_RESTART); _sampler->set_verbose(_verbose); @@ -399,7 +400,8 @@ unique_ptr_t>& get_signal_types(int64_t _tid) { static auto& _v = signal_type_instances::instances(); - signal_type_instances::construct(omnitrace::get_sampling_signals(_tid)); + signal_type_instances::construct(construct_on_thread{ _tid }, + omnitrace::get_sampling_signals(_tid)); return _v.at(_tid); } @@ -486,7 +488,7 @@ post_process() continue; } - const auto& _thread_info = thread_info::get(i, InternalTID); + const auto& _thread_info = thread_info::get(i, SequentTID); OMNITRACE_VERBOSE(3 || get_debug_sampling(), "Getting sampler data for thread %lu...\n", i); @@ -578,7 +580,7 @@ post_process_perfetto(int64_t _tid, const bundle_t* _init, OMNITRACE_VERBOSE(3 || get_debug_sampling(), "[%li] Post-processing backtraces for perfetto...\n", _tid); - const auto& _thread_info = thread_info::get(_tid, InternalTID); + const auto& _thread_info = thread_info::get(_tid, SequentTID); OMNITRACE_CI_THROW(!_thread_info, "No valid thread info for tid=%li\n", _tid); if(!_thread_info) return; diff --git a/source/lib/omnitrace/library/thread_data.hpp b/source/lib/omnitrace/library/thread_data.hpp index 7c36ccd34e..275bd9e809 100644 --- a/source/lib/omnitrace/library/thread_data.hpp +++ b/source/lib/omnitrace/library/thread_data.hpp @@ -115,6 +115,11 @@ private: using construct_on_init = std::true_type; +struct construct_on_thread +{ + int64_t index = threading::get_id(); +}; + template struct thread_data { @@ -123,14 +128,26 @@ struct thread_data using construct_on_init = std::true_type; template - static void construct(Args&&...); + static void construct(construct_on_thread&&, Args&&...); static value_type& instance(); static instance_array_t& instances(); template - static value_type& instance(construct_on_init, Args&&...); + static value_type& instance(construct_on_thread&&, Args&&...); template static instance_array_t& instances(construct_on_init, Args&&...); + template + static void construct(Args&&... args) + { + construct(construct_on_thread{}, std::forward(args)...); + } + + template + static value_type& instance(Args&&... args) + { + return instance(construct_on_thread{}, std::forward(args)...); + } + static constexpr size_t size() { return MaxThreads; } decltype(auto) begin() { return instances().begin(); } @@ -143,16 +160,12 @@ struct thread_data template template void -thread_data::construct(Args&&... _args) +thread_data::construct(construct_on_thread&& _t, Args&&... _args) { // construct outside of lambda to prevent data-race - static auto& _instances = instances(); - static thread_local bool _v = [&]() { - _instances.at(threading::get_id()) = - generate{}(std::forward(_args)...); - return true; - }(); - (void) _v; + static auto& _instances = instances(); + if(!_instances.at(_t.index)) + _instances.at(_t.index) = generate{}(std::forward(_args)...); } template @@ -173,10 +186,10 @@ thread_data::instances() template template unique_ptr_t& -thread_data::instance(construct_on_init, Args&&... _args) +thread_data::instance(construct_on_thread&& _t, Args&&... _args) { - construct(std::forward(_args)...); - return instances().at(threading::get_id()); + construct(construct_on_thread{ _t }, std::forward(_args)...); + return instances().at(_t.index); } template @@ -202,16 +215,15 @@ thread_data::instances(construct_on_init, Args&&... _args) template struct thread_data, Tag, MaxThreads> { - using value_type = std::optional; - using instance_array_t = std::array; - using construct_on_init = std::true_type; + using value_type = std::optional; + using instance_array_t = std::array; template - static void construct(Args&&...); + static void construct(construct_on_thread&&, Args&&...); static value_type& instance(); static instance_array_t& instances(); template - static value_type& instance(construct_on_init, Args&&...); + static value_type& instance(construct_on_thread&&, Args&&...); template static instance_array_t& instances(construct_on_init, Args&&...); @@ -227,16 +239,13 @@ struct thread_data, Tag, MaxThreads> template template void -thread_data, Tag, MaxThreads>::construct(Args&&... _args) +thread_data, Tag, MaxThreads>::construct(construct_on_thread&& _t, + Args&&... _args) { // construct outside of lambda to prevent data-race - static auto& _instances = instances(); - static thread_local bool _v = [&]() { - _instances.at(threading::get_id()) = - generate{}(std::forward(_args)...); - return true; - }(); - (void) _v; + static auto& _instances = instances(); + if(!_instances.at(_t.index)) + _instances.at(_t.index) = generate{}(std::forward(_args)...); } template @@ -257,11 +266,11 @@ thread_data, Tag, MaxThreads>::instances() template template std::optional& -thread_data, Tag, MaxThreads>::instance(construct_on_init, +thread_data, Tag, MaxThreads>::instance(construct_on_thread&& _t, Args&&... _args) { - construct(std::forward(_args)...); - return instances().at(threading::get_id()); + construct(construct_on_thread{ _t }, std::forward(_args)...); + return instances().at(_t.index); } template @@ -291,19 +300,30 @@ using tim::identity_t; template struct thread_data, Tag, MaxThreads> { - using value_type = Tp; - using instance_array_t = std::array; - using construct_on_init = std::true_type; + using value_type = Tp; + using instance_array_t = std::array; template - static void construct(Args&&...); + static void construct(construct_on_thread&&, Args&&...); static value_type& instance(); static instance_array_t& instances(); template - static value_type& instance(construct_on_init, Args&&...); + static value_type& instance(construct_on_thread&&, Args&&...); template static instance_array_t& instances(construct_on_init, Args&&...); + template + static void construct(Args&&... args) + { + construct(construct_on_thread{}, std::forward(args)...); + } + + template + static value_type& instance(Args&&... args) + { + return instance(construct_on_thread{}, std::forward(args)...); + } + static constexpr size_t size() { return MaxThreads; } decltype(auto) begin() { return instances().begin(); } @@ -316,16 +336,13 @@ struct thread_data, Tag, MaxThreads> template template void -thread_data, Tag, MaxThreads>::construct(Args&&... _args) +thread_data, Tag, MaxThreads>::construct(construct_on_thread&& _t, + Args&&... _args) { // construct outside of lambda to prevent data-race - static auto& _instances = instances(); - static thread_local bool _v = [&]() { - _instances.at(threading::get_id()) = - generate{}(std::forward(_args)...); - return true; - }(); - (void) _v; + static auto& _instances = instances(); + if(!_instances.at(_t.index)) + _instances.at(_t.index) = generate{}(std::forward(_args)...); } template @@ -346,10 +363,11 @@ thread_data, Tag, MaxThreads>::instances() template template Tp& -thread_data, Tag, MaxThreads>::instance(construct_on_init, Args&&... _args) +thread_data, Tag, MaxThreads>::instance(construct_on_thread&& _t, + Args&&... _args) { - construct(std::forward(_args)...); - return instances().at(threading::get_id()); + construct(construct_on_thread{ _t }, std::forward(_args)...); + return instances().at(_t.index); } template diff --git a/source/lib/omnitrace/library/thread_info.cpp b/source/lib/omnitrace/library/thread_info.cpp index 0eeece6914..e53cffc5e9 100644 --- a/source/lib/omnitrace/library/thread_info.cpp +++ b/source/lib/omnitrace/library/thread_info.cpp @@ -58,13 +58,13 @@ init_index_data(bool _offset = false) { OMNITRACE_BASIC_VERBOSE_F( 2, "Thread %li on PID %i (rank: %i) assigned omnitrace TID %li\n", - itr->system_value, process::get_id(), dmp::rank(), itr->internal_value); + itr->system_value, process::get_id(), dmp::rank(), itr->sequent_value); } else { OMNITRACE_VERBOSE_F( 2, "Thread %li on PID %i (rank: %i) assigned omnitrace TID %li\n", - itr->system_value, process::get_id(), dmp::rank(), itr->internal_value); + itr->system_value, process::get_id(), dmp::rank(), itr->sequent_value); } } return itr; @@ -103,7 +103,7 @@ thread_info::get() const std::optional& thread_info::get(int64_t _tid, ThreadIdType _type) { - if(_type == ThreadIdType::LookupTID) + if(_type == ThreadIdType::InternalTID) return thread_info_data_t::instances().at(_tid); else if(_type == ThreadIdType::SystemTID) { @@ -113,12 +113,12 @@ thread_info::get(int64_t _tid, ThreadIdType _type) if(itr && itr->index_data->system_value == _tid) return itr; } } - else if(_type == ThreadIdType::InternalTID) + else if(_type == ThreadIdType::SequentTID) { const auto& _v = thread_info_data_t::instances(); for(const auto& itr : _v) { - if(itr && itr->index_data->internal_value == _tid) return itr; + if(itr && itr->index_data->sequent_value == _tid) return itr; } } @@ -149,7 +149,7 @@ thread_info::set_stop(uint64_t _ts) { for(auto& itr : thread_info_data_t::instances()) { - if(itr && itr->index_data && itr->index_data->lookup_value > _tid) + if(itr && itr->index_data && itr->index_data->internal_value > _tid) { if(itr->lifetime.second > _v->lifetime.second) itr->lifetime.second = _v->lifetime.second; @@ -203,8 +203,8 @@ thread_info::as_string() const std::stringstream _ss{}; _ss << std::boolalpha << "is_offset=" << is_offset; if(index_data) - _ss << ", index_data=(" << index_data->lookup_value << ", " - << index_data->system_value << ", " << index_data->internal_value << ")"; + _ss << ", index_data=(" << index_data->internal_value << ", " + << index_data->system_value << ", " << index_data->sequent_value << ")"; _ss << ", lifetime=(" << lifetime.first << ":" << lifetime.second << ")"; return _ss.str(); } diff --git a/source/lib/omnitrace/library/thread_info.hpp b/source/lib/omnitrace/library/thread_info.hpp index cc6275d885..b55c44af61 100644 --- a/source/lib/omnitrace/library/thread_info.hpp +++ b/source/lib/omnitrace/library/thread_info.hpp @@ -34,11 +34,26 @@ namespace omnitrace { +// InternalTID: zero-based, process-local thread-ID from atomic increment +// from user-created threads and omnitrace-created threads. +// This value may vary based on threads created by different +// backends, e.g., roctracer will create threads +// +// SystemTID: system thread-ID. Should be same value as what is seen +// in debugger, etc. +// +// SequentTID: zero-based, process-local thread-ID based on the sequence of +// user-created threads which are created in-between the +// initialization and finalization of omnitrace. +// In theory, omnitrace will never increment this value +// because of a thread explicitly by omnitrace or +// by other of the dependent libraries. Most commonly +// used for indexing into omnitrace's thread-local data. enum ThreadIdType : int { - LookupTID = 0, - SystemTID = 1, - InternalTID = 2, + InternalTID = 0, + SystemTID = 1, // system thread id + SequentTID = 2, }; struct thread_index_data @@ -46,9 +61,9 @@ struct thread_index_data // the lookup value is always incremented for each thread // the system value is the tid provided by the operating system // the internal value is the value which the user expects - int64_t lookup_value = utility::get_thread_index(); + int64_t internal_value = utility::get_thread_index(); int64_t system_value = tim::threading::get_sys_tid(); - int64_t internal_value = tim::threading::get_id(); + int64_t sequent_value = tim::threading::get_id(); }; struct thread_info